summaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp1724
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt10
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp518
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h549
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp2610
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h561
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp706
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h178
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfLabel.cpp35
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfLabel.h56
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.cpp235
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.h153
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfWriter.cpp129
-rw-r--r--lib/CodeGen/AsmPrinter/Makefile15
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp160
-rw-r--r--lib/CodeGen/BranchFolding.cpp1204
-rw-r--r--lib/CodeGen/CMakeLists.txt62
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp358
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp161
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp397
-rw-r--r--lib/CodeGen/ELFWriter.cpp575
-rw-r--r--lib/CodeGen/ELFWriter.h230
-rw-r--r--lib/CodeGen/GCMetadata.cpp212
-rw-r--r--lib/CodeGen/GCMetadataPrinter.cpp30
-rw-r--r--lib/CodeGen/GCStrategy.cpp392
-rw-r--r--lib/CodeGen/IfConversion.cpp1229
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp892
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp289
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp114
-rw-r--r--lib/CodeGen/LiveInterval.cpp853
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp2298
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp66
-rw-r--r--lib/CodeGen/LiveVariables.cpp695
-rw-r--r--lib/CodeGen/LowerSubregs.cpp292
-rw-r--r--lib/CodeGen/MachOWriter.cpp976
-rw-r--r--lib/CodeGen/MachOWriter.h629
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp372
-rw-r--r--lib/CodeGen/MachineDominators.cpp53
-rw-r--r--lib/CodeGen/MachineFunction.cpp598
-rw-r--r--lib/CodeGen/MachineInstr.cpp1105
-rw-r--r--lib/CodeGen/MachineLICM.cpp406
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp40
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp368
-rw-r--r--lib/CodeGen/MachinePassRegistry.cpp41
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp125
-rw-r--r--lib/CodeGen/MachineSink.cpp257
-rw-r--r--lib/CodeGen/MachineVerifier.cpp690
-rw-r--r--lib/CodeGen/Makefile22
-rw-r--r--lib/CodeGen/OcamlGC.cpp38
-rw-r--r--lib/CodeGen/PBQP.cpp1395
-rw-r--r--lib/CodeGen/PBQP.h284
-rw-r--r--lib/CodeGen/PHIElimination.cpp431
-rw-r--r--lib/CodeGen/Passes.cpp54
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp941
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp1485
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp679
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h167
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp92
-rw-r--r--lib/CodeGen/README.txt208
-rw-r--r--lib/CodeGen/RegAllocBigBlock.cpp892
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp1535
-rw-r--r--lib/CodeGen/RegAllocLocal.cpp1068
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp871
-rw-r--r--lib/CodeGen/RegAllocSimple.cpp257
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp41
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp480
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp572
-rw-r--r--lib/CodeGen/ScheduleDAGEmit.cpp71
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp468
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h184
-rw-r--r--lib/CodeGen/ScheduleDAGPrinter.cpp97
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt22
-rw-r--r--lib/CodeGen/SelectionDAG/CallingConvLower.cpp148
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp6203
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp1033
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp3091
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp1388
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp2382
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp1074
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h736
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp453
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp335
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2151
-rw-r--r--lib/CodeGen/SelectionDAG/Makefile15
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp635
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp268
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp1533
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp294
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h179
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp668
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp5743
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp6052
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuild.h558
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp1347
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp416
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp2592
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp439
-rw-r--r--lib/CodeGen/ShrinkWrapping.cpp1141
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp2827
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.h313
-rw-r--r--lib/CodeGen/Spiller.cpp229
-rw-r--r--lib/CodeGen/Spiller.h37
-rw-r--r--lib/CodeGen/StackProtector.cpp224
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp733
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp1053
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp194
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp997
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp199
-rw-r--r--lib/CodeGen/VirtRegMap.cpp269
-rw-r--r--lib/CodeGen/VirtRegMap.h495
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp2225
-rw-r--r--lib/CodeGen/VirtRegRewriter.h56
112 files changed, 86727 insertions, 0 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
new file mode 100644
index 0000000..45462da
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -0,0 +1,1724 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include <cerrno>
+using namespace llvm;
+
+static cl::opt<cl::boolOrDefault>
+AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
+ cl::init(cl::BOU_UNSET));
+
+char AsmPrinter::ID = 0;
+AsmPrinter::AsmPrinter(raw_ostream &o, TargetMachine &tm,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL, bool VDef)
+ : MachineFunctionPass(&ID), FunctionNumber(0), OptLevel(OL), O(o),
+ TM(tm), TAI(T), TRI(tm.getRegisterInfo()),
+ IsInTextSection(false)
+{
+ switch (AsmVerbose) {
+ case cl::BOU_UNSET: VerboseAsm = VDef; break;
+ case cl::BOU_TRUE: VerboseAsm = true; break;
+ case cl::BOU_FALSE: VerboseAsm = false; break;
+ }
+}
+
+AsmPrinter::~AsmPrinter() {
+ for (gcp_iterator I = GCMetadataPrinters.begin(),
+ E = GCMetadataPrinters.end(); I != E; ++I)
+ delete I->second;
+}
+
+/// SwitchToTextSection - Switch to the specified text section of the executable
+/// if we are not already in it!
+///
+void AsmPrinter::SwitchToTextSection(const char *NewSection,
+ const GlobalValue *GV) {
+ std::string NS;
+ if (GV && GV->hasSection())
+ NS = TAI->getSwitchToSectionDirective() + GV->getSection();
+ else
+ NS = NewSection;
+
+ // If we're already in this section, we're done.
+ if (CurrentSection == NS) return;
+
+ // Close the current section, if applicable.
+ if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
+ O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
+
+ CurrentSection = NS;
+
+ if (!CurrentSection.empty())
+ O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n';
+
+ IsInTextSection = true;
+}
+
+/// SwitchToDataSection - Switch to the specified data section of the executable
+/// if we are not already in it!
+///
+void AsmPrinter::SwitchToDataSection(const char *NewSection,
+ const GlobalValue *GV) {
+ std::string NS;
+ if (GV && GV->hasSection())
+ NS = TAI->getSwitchToSectionDirective() + GV->getSection();
+ else
+ NS = NewSection;
+
+ // If we're already in this section, we're done.
+ if (CurrentSection == NS) return;
+
+ // Close the current section, if applicable.
+ if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
+ O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
+
+ CurrentSection = NS;
+
+ if (!CurrentSection.empty())
+ O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n';
+
+ IsInTextSection = false;
+}
+
+/// SwitchToSection - Switch to the specified section of the executable if we
+/// are not already in it!
+void AsmPrinter::SwitchToSection(const Section* NS) {
+ const std::string& NewSection = NS->getName();
+
+ // If we're already in this section, we're done.
+ if (CurrentSection == NewSection) return;
+
+ // Close the current section, if applicable.
+ if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
+ O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n';
+
+ // FIXME: Make CurrentSection a Section* in the future
+ CurrentSection = NewSection;
+ CurrentSection_ = NS;
+
+ if (!CurrentSection.empty()) {
+ // If section is named we need to switch into it via special '.section'
+ // directive and also append funky flags. Otherwise - section name is just
+ // some magic assembler directive.
+ if (NS->isNamed())
+ O << TAI->getSwitchToSectionDirective()
+ << CurrentSection
+ << TAI->getSectionFlags(NS->getFlags());
+ else
+ O << CurrentSection;
+ O << TAI->getDataSectionStartSuffix() << '\n';
+ }
+
+ IsInTextSection = (NS->getFlags() & SectionFlags::Code);
+}
+
+void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+}
+
+bool AsmPrinter::doInitialization(Module &M) {
+ Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix());
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+
+ if (TAI->hasSingleParameterDotFile()) {
+ /* Very minimal debug info. It is ignored if we emit actual
+ debug info. If we don't, this at helps the user find where
+ a function came from. */
+ O << "\t.file\t\"" << M.getModuleIdentifier() << "\"\n";
+ }
+
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ MP->beginAssembly(O, *this, *TAI);
+
+ if (!M.getModuleInlineAsm().empty())
+ O << TAI->getCommentString() << " Start of file scope inline assembly\n"
+ << M.getModuleInlineAsm()
+ << '\n' << TAI->getCommentString()
+ << " End of file scope inline assembly\n";
+
+ SwitchToDataSection(""); // Reset back to no section.
+
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ if (MMI) MMI->AnalyzeModule(M);
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ return false;
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+ if (TAI->getWeakRefDirective()) {
+ if (!ExtWeakSymbols.empty())
+ SwitchToDataSection("");
+
+ for (std::set<const GlobalValue*>::iterator i = ExtWeakSymbols.begin(),
+ e = ExtWeakSymbols.end(); i != e; ++i)
+ O << TAI->getWeakRefDirective() << Mang->getValueName(*i) << '\n';
+ }
+
+ if (TAI->getSetDirective()) {
+ if (!M.alias_empty())
+ SwitchToSection(TAI->getTextSection());
+
+ O << '\n';
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I!=E; ++I) {
+ std::string Name = Mang->getValueName(I);
+ std::string Target;
+
+ const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal());
+ Target = Mang->getValueName(GV);
+
+ if (I->hasExternalLinkage() || !TAI->getWeakRefDirective())
+ O << "\t.globl\t" << Name << '\n';
+ else if (I->hasWeakLinkage())
+ O << TAI->getWeakRefDirective() << Name << '\n';
+ else if (!I->hasLocalLinkage())
+ assert(0 && "Invalid alias linkage");
+
+ printVisibility(Name, I->getVisibility());
+
+ O << TAI->getSetDirective() << ' ' << Name << ", " << Target << '\n';
+ }
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
+ MP->finishAssembly(O, *this, *TAI);
+
+ // If we don't have any trampolines, then we don't require stack memory
+ // to be executable. Some targets have a directive to declare this.
+ Function* InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+ if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
+ if (TAI->getNonexecutableStackDirective())
+ O << TAI->getNonexecutableStackDirective() << '\n';
+
+ delete Mang; Mang = 0;
+ return false;
+}
+
+const std::string &
+AsmPrinter::getCurrentFunctionEHName(const MachineFunction *MF,
+ std::string &Name) const {
+ assert(MF && "No machine function?");
+ Name = MF->getFunction()->getName();
+ if (Name.empty())
+ Name = Mang->getValueName(MF->getFunction());
+ Name = Mang->makeNameProper(TAI->getEHGlobalPrefix() +
+ Name + ".eh", TAI->getGlobalPrefix());
+ return Name;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ // What's my mangled name?
+ CurrentFnName = Mang->getValueName(MF.getFunction());
+ IncrementFunctionNumber();
+}
+
+namespace {
+ // SectionCPs - Keep track the alignment, constpool entries per Section.
+ struct SectionCPs {
+ const Section *S;
+ unsigned Alignment;
+ SmallVector<unsigned, 4> CPEs;
+ SectionCPs(const Section *s, unsigned a) : S(s), Alignment(a) {};
+ };
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // Calculate sections for constant pool entries. We collect entries to go into
+ // the same section together to reduce amount of section switch statements.
+ SmallVector<SectionCPs, 4> CPSections;
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = CP[i];
+ unsigned Align = CPE.getAlignment();
+ const Section* S = TAI->SelectSectionForMachineConst(CPE.getType());
+ // The number of sections are small, just do a linear search from the
+ // last section to the first.
+ bool Found = false;
+ unsigned SecIdx = CPSections.size();
+ while (SecIdx != 0) {
+ if (CPSections[--SecIdx].S == S) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ SecIdx = CPSections.size();
+ CPSections.push_back(SectionCPs(S, Align));
+ }
+
+ if (Align > CPSections[SecIdx].Alignment)
+ CPSections[SecIdx].Alignment = Align;
+ CPSections[SecIdx].CPEs.push_back(i);
+ }
+
+ // Now print stuff into the calculated sections.
+ for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+ SwitchToSection(CPSections[i].S);
+ EmitAlignment(Log2_32(CPSections[i].Alignment));
+
+ unsigned Offset = 0;
+ for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+ unsigned CPI = CPSections[i].CPEs[j];
+ MachineConstantPoolEntry CPE = CP[CPI];
+
+ // Emit inter-object padding for alignment.
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
+ EmitZeros(NewOffset - Offset);
+
+ const Type *Ty = CPE.getType();
+ Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
+
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << CPI << ":\t\t\t\t\t";
+ if (VerboseAsm) {
+ O << TAI->getCommentString() << ' ';
+ WriteTypeSymbolic(O, CPE.getType(), 0);
+ }
+ O << '\n';
+ if (CPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(CPE.Val.ConstVal);
+ }
+ }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.
+///
+void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
+ MachineFunction &MF) {
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+
+ // Pick the directive to use to print the jump table entries, and switch to
+ // the appropriate section.
+ TargetLowering *LoweringInfo = TM.getTargetLowering();
+
+ const char* JumpTableDataSection = TAI->getJumpTableDataSection();
+ const Function *F = MF.getFunction();
+ unsigned SectionFlags = TAI->SectionFlagsForGlobal(F);
+ if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) ||
+ !JumpTableDataSection ||
+ SectionFlags & SectionFlags::Linkonce) {
+ // In PIC mode, we need to emit the jump table to the same section as the
+ // function body itself, otherwise the label differences won't make sense.
+ // We should also do if the section name is NULL or function is declared in
+ // discardable section.
+ SwitchToSection(TAI->SectionForGlobal(F));
+ } else {
+ SwitchToDataSection(JumpTableDataSection);
+ }
+
+ EmitAlignment(Log2_32(MJTI->getAlignment()));
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[i].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ // For PIC codegen, if possible we want to use the SetDirective to reduce
+ // the number of relocations the assembler will generate for the jump table.
+ // Set directives are all printed before the jump table itself.
+ SmallPtrSet<MachineBasicBlock*, 16> EmittedSets;
+ if (TAI->getSetDirective() && IsPic)
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+ if (EmittedSets.insert(JTBBs[ii]))
+ printPICJumpTableSetLabel(i, JTBBs[ii]);
+
+ // On some targets (e.g. darwin) we want to emit two consequtive labels
+ // before each jump table. The first label is never referenced, but tells
+ // the assembler and linker the extents of the jump table object. The
+ // second label is actually referenced by the code.
+ if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
+ O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << i << ":\n";
+
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+ printPICJumpTableEntry(MJTI, JTBBs[ii], i);
+ O << '\n';
+ }
+ }
+}
+
+void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const {
+ bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+
+ // Use JumpTableDirective otherwise honor the entry size from the jump table
+ // info.
+ const char *JTEntryDirective = TAI->getJumpTableDirective();
+ bool HadJTEntryDirective = JTEntryDirective != NULL;
+ if (!HadJTEntryDirective) {
+ JTEntryDirective = MJTI->getEntrySize() == 4 ?
+ TAI->getData32bitsDirective() : TAI->getData64bitsDirective();
+ }
+
+ O << JTEntryDirective << ' ';
+
+ // If we have emitted set directives for the jump table entries, print
+ // them rather than the entries themselves. If we're emitting PIC, then
+ // emit the table entries as differences between two text section labels.
+ // If we're emitting non-PIC code, then emit the entries as direct
+ // references to the target basic blocks.
+ if (IsPic) {
+ if (TAI->getSetDirective()) {
+ O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << '_' << uid << "_set_" << MBB->getNumber();
+ } else {
+ printBasicBlockLabel(MBB, false, false, false);
+ // If the arch uses custom Jump Table directives, don't calc relative to
+ // JT
+ if (!HadJTEntryDirective)
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << uid;
+ }
+ } else {
+ printBasicBlockLabel(MBB, false, false, false);
+ }
+}
+
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used") {
+ if (TAI->getUsedDirective() != 0) // No need to emit this at all.
+ EmitLLVMUsedList(GV->getInitializer());
+ return true;
+ }
+
+ // Ignore debug and non-emitted data.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+ if (GV->getName() == "llvm.global_ctors") {
+ SwitchToDataSection(TAI->getStaticCtorsSection());
+ EmitAlignment(Align, 0);
+ EmitXXStructorList(GV->getInitializer());
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ SwitchToDataSection(TAI->getStaticDtorsSection());
+ EmitAlignment(Align, 0);
+ EmitXXStructorList(GV->getInitializer());
+ return true;
+ }
+
+ return false;
+}
+
+/// findGlobalValue - if CV is an expression equivalent to a single
+/// global value, return that value.
+const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return GV;
+ else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned Opcode = CE->getOpcode();
+ switch (Opcode) {
+ case Instruction::GetElementPtr: {
+ const Constant *ptrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+ if (TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], idxVec.size()))
+ return 0;
+ return findGlobalValue(ptrVal);
+ }
+ case Instruction::BitCast:
+ return findGlobalValue(CE->getOperand(0));
+ default:
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/// EmitLLVMUsedList - For targets that define a TAI::UsedDirective, mark each
+/// global in the specified llvm.used list for which emitUsedDirectiveFor
+/// is true, as being used with this directive.
+
+void AsmPrinter::EmitLLVMUsedList(Constant *List) {
+ const char *Directive = TAI->getUsedDirective();
+
+ // Should be an array of 'sbyte*'.
+ ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ const GlobalValue *GV = findGlobalValue(InitList->getOperand(i));
+ if (TAI->emitUsedDirectiveFor(GV, Mang)) {
+ O << Directive;
+ EmitConstantValueOnly(InitList->getOperand(i));
+ O << '\n';
+ }
+ }
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the
+/// function pointers, ignoring the init priority.
+void AsmPrinter::EmitXXStructorList(Constant *List) {
+ // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // init priority, which we ignore.
+ if (!isa<ConstantArray>(List)) return;
+ ConstantArray *InitList = cast<ConstantArray>(List);
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ if (CS->getOperand(1)->isNullValue())
+ return; // Found a null terminator, exit printing.
+ // Emit the function pointer.
+ EmitGlobalConstant(CS->getOperand(1));
+ }
+}
+
+/// getGlobalLinkName - Returns the asm/link name of of the specified
+/// global variable. Should be overridden by each target asm printer to
+/// generate the appropriate value.
+const std::string &AsmPrinter::getGlobalLinkName(const GlobalVariable *GV,
+ std::string &LinkName) const {
+ if (isa<Function>(GV)) {
+ LinkName += TAI->getFunctionAddrPrefix();
+ LinkName += Mang->getValueName(GV);
+ LinkName += TAI->getFunctionAddrSuffix();
+ } else {
+ LinkName += TAI->getGlobalVarAddrPrefix();
+ LinkName += Mang->getValueName(GV);
+ LinkName += TAI->getGlobalVarAddrSuffix();
+ }
+
+ return LinkName;
+}
+
+/// EmitExternalGlobal - Emit the external reference to a global variable.
+/// Should be overridden if an indirect reference should be used.
+void AsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) {
+ std::string GLN;
+ O << getGlobalLinkName(GV, GLN);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+/// LEB 128 number encoding.
+
+/// PrintULEB128 - Print a series of hexidecimal values (separated by commas)
+/// representing an unsigned leb128 value.
+void AsmPrinter::PrintULEB128(unsigned Value) const {
+ char Buffer[20];
+ do {
+ unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+ Value >>= 7;
+ if (Value) Byte |= 0x80;
+ O << "0x" << utohex_buffer(Byte, Buffer+20);
+ if (Value) O << ", ";
+ } while (Value);
+}
+
+/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas)
+/// representing a signed leb128 value.
+void AsmPrinter::PrintSLEB128(int Value) const {
+ int Sign = Value >> (8 * sizeof(Value) - 1);
+ bool IsMore;
+ char Buffer[20];
+
+ do {
+ unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+ Value >>= 7;
+ IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+ if (IsMore) Byte |= 0x80;
+ O << "0x" << utohex_buffer(Byte, Buffer+20);
+ if (IsMore) O << ", ";
+ } while (IsMore);
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// PrintHex - Print a value as a hexidecimal value.
+///
+void AsmPrinter::PrintHex(int Value) const {
+ char Buffer[20];
+ O << "0x" << utohex_buffer(static_cast<unsigned>(Value), Buffer+20);
+}
+
+/// EOL - Print a newline character to asm stream. If a comment is present
+/// then it will be printed first. Comments should not contain '\n'.
+void AsmPrinter::EOL() const {
+ O << '\n';
+}
+
+void AsmPrinter::EOL(const std::string &Comment) const {
+ if (VerboseAsm && !Comment.empty()) {
+ O << '\t'
+ << TAI->getCommentString()
+ << ' '
+ << Comment;
+ }
+ O << '\n';
+}
+
+void AsmPrinter::EOL(const char* Comment) const {
+ if (VerboseAsm && *Comment) {
+ O << '\t'
+ << TAI->getCommentString()
+ << ' '
+ << Comment;
+ }
+ O << '\n';
+}
+
+/// EmitULEB128Bytes - Emit an assembler byte data directive to compose an
+/// unsigned leb128 value.
+void AsmPrinter::EmitULEB128Bytes(unsigned Value) const {
+ if (TAI->hasLEB128()) {
+ O << "\t.uleb128\t"
+ << Value;
+ } else {
+ O << TAI->getData8bitsDirective();
+ PrintULEB128(Value);
+ }
+}
+
+/// EmitSLEB128Bytes - print an assembler byte data directive to compose a
+/// signed leb128 value.
+void AsmPrinter::EmitSLEB128Bytes(int Value) const {
+ if (TAI->hasLEB128()) {
+ O << "\t.sleb128\t"
+ << Value;
+ } else {
+ O << TAI->getData8bitsDirective();
+ PrintSLEB128(Value);
+ }
+}
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+ O << TAI->getData8bitsDirective();
+ PrintHex(Value & 0xFF);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+ O << TAI->getData16bitsDirective();
+ PrintHex(Value & 0xFFFF);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+ O << TAI->getData32bitsDirective();
+ PrintHex(Value);
+}
+
+/// EmitInt64 - Emit a long long directive and value.
+///
+void AsmPrinter::EmitInt64(uint64_t Value) const {
+ if (TAI->getData64bitsDirective()) {
+ O << TAI->getData64bitsDirective();
+ PrintHex(Value);
+ } else {
+ if (TM.getTargetData()->isBigEndian()) {
+ EmitInt32(unsigned(Value >> 32)); O << '\n';
+ EmitInt32(unsigned(Value));
+ } else {
+ EmitInt32(unsigned(Value)); O << '\n';
+ EmitInt32(unsigned(Value >> 32));
+ }
+ }
+}
+
+/// toOctal - Convert the low order bits of X into an octal digit.
+///
+static inline char toOctal(int X) {
+ return (X&7)+'0';
+}
+
+/// printStringChar - Print a char, escaped if necessary.
+///
+static void printStringChar(raw_ostream &O, unsigned char C) {
+ if (C == '"') {
+ O << "\\\"";
+ } else if (C == '\\') {
+ O << "\\\\";
+ } else if (isprint((unsigned char)C)) {
+ O << C;
+ } else {
+ switch(C) {
+ case '\b': O << "\\b"; break;
+ case '\f': O << "\\f"; break;
+ case '\n': O << "\\n"; break;
+ case '\r': O << "\\r"; break;
+ case '\t': O << "\\t"; break;
+ default:
+ O << '\\';
+ O << toOctal(C >> 6);
+ O << toOctal(C >> 3);
+ O << toOctal(C >> 0);
+ break;
+ }
+ }
+}
+
+/// EmitString - Emit a string with quotes and a null terminator.
+/// Special characters are emitted properly.
+/// \literal (Eg. '\t') \endliteral
+void AsmPrinter::EmitString(const std::string &String) const {
+ EmitString(String.c_str(), String.size());
+}
+
+void AsmPrinter::EmitString(const char *String, unsigned Size) const {
+ const char* AscizDirective = TAI->getAscizDirective();
+ if (AscizDirective)
+ O << AscizDirective;
+ else
+ O << TAI->getAsciiDirective();
+ O << '\"';
+ for (unsigned i = 0; i < Size; ++i)
+ printStringChar(O, String[i]);
+ if (AscizDirective)
+ O << '\"';
+ else
+ O << "\\0\"";
+}
+
+
+/// EmitFile - Emit a .file directive.
+void AsmPrinter::EmitFile(unsigned Number, const std::string &Name) const {
+ O << "\t.file\t" << Number << " \"";
+ for (unsigned i = 0, N = Name.size(); i < N; ++i)
+ printStringChar(O, Name[i]);
+ O << '\"';
+}
+
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary. For example, if you pass in 3 here, you will get an 8
+// byte alignment. If a global value is specified, and if that global has
+// an explicit alignment requested, it will unconditionally override the
+// alignment request. However, if ForcedAlignBits is specified, this value
+// has final say: the ultimate alignment will be the max of ForcedAlignBits
+// and the alignment computed with NumBits and the global.
+//
+// The algorithm is:
+// Align = NumBits;
+// if (GV && GV->hasalignment) Align = GV->getalignment();
+// Align = std::max(Align, ForcedAlignBits);
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
+ unsigned ForcedAlignBits,
+ bool UseFillExpr) const {
+ if (GV && GV->getAlignment())
+ NumBits = Log2_32(GV->getAlignment());
+ NumBits = std::max(NumBits, ForcedAlignBits);
+
+ if (NumBits == 0) return; // No need to emit alignment.
+ if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits;
+ O << TAI->getAlignDirective() << NumBits;
+
+ unsigned FillValue = TAI->getTextAlignFillValue();
+ UseFillExpr &= IsInTextSection && FillValue;
+ if (UseFillExpr) {
+ O << ',';
+ PrintHex(FillValue);
+ }
+ O << '\n';
+}
+
+
+/// EmitZeros - Emit a block of zeros.
+///
+void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const {
+ if (NumZeros) {
+ if (TAI->getZeroDirective()) {
+ O << TAI->getZeroDirective() << NumZeros;
+ if (TAI->getZeroDirectiveSuffix())
+ O << TAI->getZeroDirectiveSuffix();
+ O << '\n';
+ } else {
+ for (; NumZeros; --NumZeros)
+ O << TAI->getData8bitsDirective(AddrSpace) << "0\n";
+ }
+ }
+}
+
+// Print out the specified constant, without a storage class. Only the
+// constants valid in constant expressions can occur here.
+void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ O << '0';
+ else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ O << CI->getZExtValue();
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ // This is a constant address for a global variable or function. Use the
+ // name of the variable or function as the address value, possibly
+ // decorating it with GlobalVarAddrPrefix/Suffix or
+ // FunctionAddrPrefix/Suffix (these all default to "" )
+ if (isa<Function>(GV)) {
+ O << TAI->getFunctionAddrPrefix()
+ << Mang->getValueName(GV)
+ << TAI->getFunctionAddrSuffix();
+ } else {
+ O << TAI->getGlobalVarAddrPrefix()
+ << Mang->getValueName(GV)
+ << TAI->getGlobalVarAddrSuffix();
+ }
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned Opcode = CE->getOpcode();
+ switch (Opcode) {
+ case Instruction::GetElementPtr: {
+ // generate a symbolic expression for the byte address
+ const Constant *ptrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+ if (int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+ idxVec.size())) {
+ // Truncate/sext the offset to the pointer size.
+ if (TD->getPointerSizeInBits() != 64) {
+ int SExtAmount = 64-TD->getPointerSizeInBits();
+ Offset = (Offset << SExtAmount) >> SExtAmount;
+ }
+
+ if (Offset)
+ O << '(';
+ EmitConstantValueOnly(ptrVal);
+ if (Offset > 0)
+ O << ") + " << Offset;
+ else if (Offset < 0)
+ O << ") - " << -Offset;
+ } else {
+ EmitConstantValueOnly(ptrVal);
+ }
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ assert(0 && "FIXME: Don't yet support this kind of constant cast expr");
+ break;
+ case Instruction::BitCast:
+ return EmitConstantValueOnly(CE->getOperand(0));
+
+ case Instruction::IntToPtr: {
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(), false/*ZExt*/);
+ return EmitConstantValueOnly(Op);
+ }
+
+
+ case Instruction::PtrToInt: {
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ const Type *Ty = CE->getType();
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot greater or equal to the size of the pointer.
+ if (TD->getTypeAllocSize(Ty) >= TD->getTypeAllocSize(Op->getType()))
+ return EmitConstantValueOnly(Op);
+
+ O << "((";
+ EmitConstantValueOnly(Op);
+ APInt ptrMask = APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Ty));
+
+ SmallString<40> S;
+ ptrMask.toStringUnsigned(S);
+ O << ") & " << S.c_str() << ')';
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ O << '(';
+ EmitConstantValueOnly(CE->getOperand(0));
+ O << ')';
+ switch (Opcode) {
+ case Instruction::Add:
+ O << " + ";
+ break;
+ case Instruction::Sub:
+ O << " - ";
+ break;
+ case Instruction::And:
+ O << " & ";
+ break;
+ case Instruction::Or:
+ O << " | ";
+ break;
+ case Instruction::Xor:
+ O << " ^ ";
+ break;
+ default:
+ break;
+ }
+ O << '(';
+ EmitConstantValueOnly(CE->getOperand(1));
+ O << ')';
+ break;
+ default:
+ assert(0 && "Unsupported operator!");
+ }
+ } else {
+ assert(0 && "Unknown constant value!");
+ }
+}
+
+/// printAsCString - Print the specified array as a C compatible string, only if
+/// the predicate isString is true.
+///
+static void printAsCString(raw_ostream &O, const ConstantArray *CVA,
+ unsigned LastElt) {
+ assert(CVA->isString() && "Array is not string compatible!");
+
+ O << '\"';
+ for (unsigned i = 0; i != LastElt; ++i) {
+ unsigned char C =
+ (unsigned char)cast<ConstantInt>(CVA->getOperand(i))->getZExtValue();
+ printStringChar(O, C);
+ }
+ O << '\"';
+}
+
+/// EmitString - Emit a zero-byte-terminated string constant.
+///
+void AsmPrinter::EmitString(const ConstantArray *CVA) const {
+ unsigned NumElts = CVA->getNumOperands();
+ if (TAI->getAscizDirective() && NumElts &&
+ cast<ConstantInt>(CVA->getOperand(NumElts-1))->getZExtValue() == 0) {
+ O << TAI->getAscizDirective();
+ printAsCString(O, CVA, NumElts-1);
+ } else {
+ O << TAI->getAsciiDirective();
+ printAsCString(O, CVA, NumElts);
+ }
+ O << '\n';
+}
+
+void AsmPrinter::EmitGlobalConstantArray(const ConstantArray *CVA,
+ unsigned AddrSpace) {
+ if (CVA->isString()) {
+ EmitString(CVA);
+ } else { // Not a string. Print the values in successive locations
+ for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+ EmitGlobalConstant(CVA->getOperand(i), AddrSpace);
+ }
+}
+
+void AsmPrinter::EmitGlobalConstantVector(const ConstantVector *CP) {
+ const VectorType *PTy = CP->getType();
+
+ for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
+ EmitGlobalConstant(CP->getOperand(I));
+}
+
+void AsmPrinter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
+ unsigned AddrSpace) {
+ // Print the fields in successive locations. Pad to align if needed!
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(CVS->getType());
+ const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
+ uint64_t sizeSoFar = 0;
+ for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
+ const Constant* field = CVS->getOperand(i);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
+ uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
+ - cvsLayout->getElementOffset(i)) - fieldSize;
+ sizeSoFar += fieldSize + padSize;
+
+ // Now print the actual field value.
+ EmitGlobalConstant(field, AddrSpace);
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ EmitZeros(padSize, AddrSpace);
+ }
+ assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP,
+ unsigned AddrSpace) {
+ // FP Constants are printed as integer constants to avoid losing
+ // precision...
+ const TargetData *TD = TM.getTargetData();
+ if (CFP->getType() == Type::DoubleTy) {
+ double Val = CFP->getValueAPF().convertToDouble(); // for comment only
+ uint64_t i = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ if (TAI->getData64bitsDirective(AddrSpace)) {
+ O << TAI->getData64bitsDirective(AddrSpace) << i;
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " double value: " << Val;
+ O << '\n';
+ } else if (TD->isBigEndian()) {
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " double most significant word " << Val;
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " double least significant word " << Val;
+ O << '\n';
+ } else {
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " double least significant word " << Val;
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " double most significant word " << Val;
+ O << '\n';
+ }
+ return;
+ } else if (CFP->getType() == Type::FloatTy) {
+ float Val = CFP->getValueAPF().convertToFloat(); // for comment only
+ O << TAI->getData32bitsDirective(AddrSpace)
+ << CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " float " << Val;
+ O << '\n';
+ return;
+ } else if (CFP->getType() == Type::X86_FP80Ty) {
+ // all long double variants are printed as hex
+ // api needed to prevent premature destruction
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ // Convert to double so we can print the approximate val as a comment.
+ APFloat DoubleVal = CFP->getValueAPF();
+ bool ignored;
+ DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ if (TD->isBigEndian()) {
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double most significant halfword of ~"
+ << DoubleVal.convertToDouble();
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double least significant halfword";
+ O << '\n';
+ } else {
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double least significant halfword of ~"
+ << DoubleVal.convertToDouble();
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next halfword";
+ O << '\n';
+ O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double most significant halfword";
+ O << '\n';
+ }
+ EmitZeros(TD->getTypeAllocSize(Type::X86_FP80Ty) -
+ TD->getTypeStoreSize(Type::X86_FP80Ty), AddrSpace);
+ return;
+ } else if (CFP->getType() == Type::PPC_FP128Ty) {
+ // all long double variants are printed as hex
+ // api needed to prevent premature destruction
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ if (TD->isBigEndian()) {
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double most significant word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double least significant word";
+ O << '\n';
+ } else {
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double least significant word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double next word";
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " long double most significant word";
+ O << '\n';
+ }
+ return;
+ } else assert(0 && "Floating point constant type not handled");
+}
+
+void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
+ unsigned AddrSpace) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned BitWidth = CI->getBitWidth();
+ assert(isPowerOf2_32(BitWidth) &&
+ "Non-power-of-2-sized integers not handled!");
+
+ // We don't expect assemblers to support integer data directives
+ // for more than 64 bits, so we emit the data in at most 64-bit
+ // quantities at a time.
+ const uint64_t *RawData = CI->getValue().getRawData();
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ uint64_t Val;
+ if (TD->isBigEndian())
+ Val = RawData[e - i - 1];
+ else
+ Val = RawData[i];
+
+ if (TAI->getData64bitsDirective(AddrSpace))
+ O << TAI->getData64bitsDirective(AddrSpace) << Val << '\n';
+ else if (TD->isBigEndian()) {
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " Double-word most significant word " << Val;
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " Double-word least significant word " << Val;
+ O << '\n';
+ } else {
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " Double-word least significant word " << Val;
+ O << '\n';
+ O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString()
+ << " Double-word most significant word " << Val;
+ O << '\n';
+ }
+ }
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+ const TargetData *TD = TM.getTargetData();
+ const Type *type = CV->getType();
+ unsigned Size = TD->getTypeAllocSize(type);
+
+ if (CV->isNullValue() || isa<UndefValue>(CV)) {
+ EmitZeros(Size, AddrSpace);
+ return;
+ } else if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+ EmitGlobalConstantArray(CVA , AddrSpace);
+ return;
+ } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+ EmitGlobalConstantStruct(CVS, AddrSpace);
+ return;
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ EmitGlobalConstantFP(CFP, AddrSpace);
+ return;
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ // Small integers are handled below; large integers are handled here.
+ if (Size > 4) {
+ EmitGlobalConstantLargeInt(CI, AddrSpace);
+ return;
+ }
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ EmitGlobalConstantVector(CP);
+ return;
+ }
+
+ printDataDirective(type, AddrSpace);
+ EmitConstantValueOnly(CV);
+ if (VerboseAsm) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ SmallString<40> S;
+ CI->getValue().toStringUnsigned(S, 16);
+ O << "\t\t\t" << TAI->getCommentString() << " 0x" << S.c_str();
+ }
+ }
+ O << '\n';
+}
+
+void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ // Target doesn't support this yet!
+ abort();
+}
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself. This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings. The
+/// syntax used is ${:comment}. Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
+ if (!strcmp(Code, "private")) {
+ O << TAI->getPrivateGlobalPrefix();
+ } else if (!strcmp(Code, "comment")) {
+ if (VerboseAsm)
+ O << TAI->getCommentString();
+ } else if (!strcmp(Code, "uid")) {
+ // Assign a unique ID to this machine instruction.
+ static const MachineInstr *LastMI = 0;
+ static const Function *F = 0;
+ static unsigned Counter = 0U-1;
+
+ // Comparing the address of MI isn't sufficient, because machineinstrs may
+ // be allocated to the same address across functions.
+ const Function *ThisF = MI->getParent()->getParent()->getFunction();
+
+ // If this is a new machine instruction, bump the counter.
+ if (LastMI != MI || F != ThisF) {
+ ++Counter;
+ LastMI = MI;
+ F = ThisF;
+ }
+ O << Counter;
+ } else {
+ cerr << "Unknown special formatter '" << Code
+ << "' for machine instr: " << *MI;
+ exit(1);
+ }
+}
+
+/// processDebugLoc - Processes the debug information of each machine
+/// instruction's DebugLoc.
+void AsmPrinter::processDebugLoc(DebugLoc DL) {
+ if (TAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) {
+ if (!DL.isUnknown()) {
+ static DebugLocTuple PrevDLT(0, ~0U, ~0U);
+ DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
+
+ if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT)
+ printLabel(DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
+ DICompileUnit(CurDLT.CompileUnit)));
+
+ PrevDLT = CurDLT;
+ }
+ }
+}
+
+/// printInlineAsm - This method formats and prints the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
+ unsigned NumOperands = MI->getNumOperands();
+
+ // Count the number of register definitions.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != NumOperands-1 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+ // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+ // If this asmstr is empty, just print the #APP/#NOAPP markers.
+ // These are useful to see where empty asm's wound up.
+ if (AsmStr[0] == 0) {
+ O << TAI->getInlineAsmStart() << "\n\t" << TAI->getInlineAsmEnd() << '\n';
+ return;
+ }
+
+ O << TAI->getInlineAsmStart() << "\n\t";
+
+ // The variant of the current asmprinter.
+ int AsmPrinterVariant = TAI->getAssemblerDialect();
+
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ O.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ O << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$': // $$ -> $
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ O << '$';
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1) {
+ cerr << "Nested variants found in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // consume '|' character.
+ if (CurVariant == -1)
+ O << '|'; // this is gcc's behavior for | outside a variant
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // consume ')' character.
+ if (CurVariant == -1)
+ O << '}'; // this is gcc's behavior for } outside a variant
+ else
+ CurVariant = -1;
+ break;
+ }
+ if (Done) break;
+
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
+ // If we have ${:foo}, then this is not a real operand reference, it is a
+ // "magic" string reference, just like in .td files. Arrange to call
+ // PrintSpecial.
+ if (HasCurlyBraces && *LastEmitted == ':') {
+ ++LastEmitted;
+ const char *StrStart = LastEmitted;
+ const char *StrEnd = strchr(StrStart, '}');
+ if (StrEnd == 0) {
+ cerr << "Unterminated ${:foo} operand in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+
+ std::string Val(StrStart, StrEnd);
+ PrintSpecial(MI, Val.c_str());
+ LastEmitted = StrEnd+1;
+ break;
+ }
+
+ const char *IDStart = LastEmitted;
+ char *IDEnd;
+ errno = 0;
+ long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs.
+ if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) {
+ cerr << "Bad $ operand number in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+ LastEmitted = IDEnd;
+
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0) {
+ cerr << "Bad ${:} expression in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}') {
+ cerr << "Bad ${} expression in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+ ++LastEmitted; // Consume '}' character.
+ }
+
+ if ((unsigned)Val >= NumOperands-1) {
+ cerr << "Invalid $ operand number in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = 1;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ if (OpNo >= MI->getNumOperands()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (Modifier[0]=='l') // labels are target independent
+ printBasicBlockLabel(MI->getOperand(OpNo).getMBB(),
+ false, false, false);
+ else {
+ AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+ if ((OpFlags & 7) == 4) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0);
+ }
+ }
+ }
+ if (Error) {
+ cerr << "Invalid operand found in inline asm: '"
+ << AsmStr << "'\n";
+ MI->dump();
+ exit(1);
+ }
+ }
+ break;
+ }
+ }
+ }
+ O << "\n\t" << TAI->getInlineAsmEnd() << '\n';
+}
+
+/// printImplicitDef - This method prints the specified machine instruction
+/// that is an implicit def.
+void AsmPrinter::printImplicitDef(const MachineInstr *MI) const {
+ if (VerboseAsm)
+ O << '\t' << TAI->getCommentString() << " implicit-def: "
+ << TRI->getAsmName(MI->getOperand(0).getReg()) << '\n';
+}
+
+/// printLabel - This method prints a local label used by debug and
+/// exception handling tables.
+void AsmPrinter::printLabel(const MachineInstr *MI) const {
+ printLabel(MI->getOperand(0).getImm());
+}
+
+void AsmPrinter::printLabel(unsigned Id) const {
+ O << TAI->getPrivateGlobalPrefix() << "label" << Id << ":\n";
+}
+
+/// printDeclare - This method prints a local variable declaration used by
+/// debug tables.
+/// FIXME: It doesn't really print anything rather it inserts a DebugVariable
+/// entry into dwarf table.
+void AsmPrinter::printDeclare(const MachineInstr *MI) const {
+ unsigned FI = MI->getOperand(0).getIndex();
+ GlobalValue *GV = MI->getOperand(1).getGlobal();
+ DW->RecordVariable(cast<GlobalVariable>(GV), FI, MI);
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant. Targets should
+/// overried this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+/// printBasicBlockLabel - This method prints the label for the specified
+/// MachineBasicBlock
+void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB,
+ bool printAlign,
+ bool printColon,
+ bool printComment) const {
+ if (printAlign) {
+ unsigned Align = MBB->getAlignment();
+ if (Align)
+ EmitAlignment(Log2_32(Align));
+ }
+
+ O << TAI->getPrivateGlobalPrefix() << "BB" << getFunctionNumber() << '_'
+ << MBB->getNumber();
+ if (printColon)
+ O << ':';
+ if (printComment && MBB->getBasicBlock())
+ O << '\t' << TAI->getCommentString() << ' '
+ << MBB->getBasicBlock()->getNameStart();
+}
+
+/// printPICJumpTableSetLabel - This method prints a set label for the
+/// specified MachineBasicBlock for a jumptable entry.
+void AsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const {
+ if (!TAI->getSetDirective())
+ return;
+
+ O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
+ printBasicBlockLabel(MBB, false, false, false);
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << uid << '\n';
+}
+
+void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const {
+ if (!TAI->getSetDirective())
+ return;
+
+ O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << '_' << uid2
+ << "_set_" << MBB->getNumber() << ',';
+ printBasicBlockLabel(MBB, false, false, false);
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << uid << '_' << uid2 << '\n';
+}
+
+/// printDataDirective - This method prints the asm directive for the
+/// specified type.
+void AsmPrinter::printDataDirective(const Type *type, unsigned AddrSpace) {
+ const TargetData *TD = TM.getTargetData();
+ switch (type->getTypeID()) {
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(type)->getBitWidth();
+ if (BitWidth <= 8)
+ O << TAI->getData8bitsDirective(AddrSpace);
+ else if (BitWidth <= 16)
+ O << TAI->getData16bitsDirective(AddrSpace);
+ else if (BitWidth <= 32)
+ O << TAI->getData32bitsDirective(AddrSpace);
+ else if (BitWidth <= 64) {
+ assert(TAI->getData64bitsDirective(AddrSpace) &&
+ "Target cannot handle 64-bit constant exprs!");
+ O << TAI->getData64bitsDirective(AddrSpace);
+ } else {
+ assert(0 && "Target cannot handle given data directive width!");
+ }
+ break;
+ }
+ case Type::PointerTyID:
+ if (TD->getPointerSize() == 8) {
+ assert(TAI->getData64bitsDirective(AddrSpace) &&
+ "Target cannot handle 64-bit pointer exprs!");
+ O << TAI->getData64bitsDirective(AddrSpace);
+ } else if (TD->getPointerSize() == 2) {
+ O << TAI->getData16bitsDirective(AddrSpace);
+ } else if (TD->getPointerSize() == 1) {
+ O << TAI->getData8bitsDirective(AddrSpace);
+ } else {
+ O << TAI->getData32bitsDirective(AddrSpace);
+ }
+ break;
+ case Type::FloatTyID: case Type::DoubleTyID:
+ case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID:
+ assert (0 && "Should have already output floating point constant.");
+ default:
+ assert (0 && "Can't handle printing this type of thing");
+ break;
+ }
+}
+
+void AsmPrinter::printSuffixedName(const char *Name, const char *Suffix,
+ const char *Prefix) {
+ if (Name[0]=='\"')
+ O << '\"';
+ O << TAI->getPrivateGlobalPrefix();
+ if (Prefix) O << Prefix;
+ if (Name[0]=='\"')
+ O << '\"';
+ if (Name[0]=='\"')
+ O << Name[1];
+ else
+ O << Name;
+ O << Suffix;
+ if (Name[0]=='\"')
+ O << '\"';
+}
+
+void AsmPrinter::printSuffixedName(const std::string &Name, const char* Suffix) {
+ printSuffixedName(Name.c_str(), Suffix);
+}
+
+void AsmPrinter::printVisibility(const std::string& Name,
+ unsigned Visibility) const {
+ if (Visibility == GlobalValue::HiddenVisibility) {
+ if (const char *Directive = TAI->getHiddenDirective())
+ O << Directive << Name << '\n';
+ } else if (Visibility == GlobalValue::ProtectedVisibility) {
+ if (const char *Directive = TAI->getProtectedDirective())
+ O << Directive << Name << '\n';
+ }
+}
+
+void AsmPrinter::printOffset(int64_t Offset) const {
+ if (Offset > 0)
+ O << '+' << Offset;
+ else if (Offset < 0)
+ O << Offset;
+}
+
+GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
+ if (!S->usesMetadata())
+ return 0;
+
+ gcp_iterator GCPI = GCMetadataPrinters.find(S);
+ if (GCPI != GCMetadataPrinters.end())
+ return GCPI->second;
+
+ const char *Name = S->getName().c_str();
+
+ for (GCMetadataPrinterRegistry::iterator
+ I = GCMetadataPrinterRegistry::begin(),
+ E = GCMetadataPrinterRegistry::end(); I != E; ++I)
+ if (strcmp(Name, I->getName()) == 0) {
+ GCMetadataPrinter *GMP = I->instantiate();
+ GMP->S = S;
+ GCMetadataPrinters.insert(std::make_pair(S, GMP));
+ return GMP;
+ }
+
+ cerr << "no GCMetadataPrinter registered for GC: " << Name << "\n";
+ abort();
+}
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..066aaab
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_llvm_library(LLVMAsmPrinter
+ AsmPrinter.cpp
+ DIE.cpp
+ DwarfDebug.cpp
+ DwarfException.cpp
+ DwarfLabel.cpp
+ DwarfPrinter.cpp
+ DwarfWriter.cpp
+ OcamlGCPrinter.cpp
+ )
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
new file mode 100644
index 0000000..dc149cf
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -0,0 +1,518 @@
+//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DIE.h"
+#include "DwarfPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include <ostream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevData Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Attribute);
+ ID.AddInteger(Form);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrev Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Tag);
+ ID.AddInteger(ChildrenFlag);
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i)
+ Data[i].Profile(ID);
+}
+
+/// Emit - Print the abbreviation using the specified asm printer.
+///
+void DIEAbbrev::Emit(const AsmPrinter *Asm) const {
+ // Emit its Dwarf tag type.
+ Asm->EmitULEB128Bytes(Tag);
+ Asm->EOL(dwarf::TagString(Tag));
+
+ // Emit whether it has children DIEs.
+ Asm->EmitULEB128Bytes(ChildrenFlag);
+ Asm->EOL(dwarf::ChildrenString(ChildrenFlag));
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ const DIEAbbrevData &AttrData = Data[i];
+
+ // Emit attribute type.
+ Asm->EmitULEB128Bytes(AttrData.getAttribute());
+ Asm->EOL(dwarf::AttributeString(AttrData.getAttribute()));
+
+ // Emit form type.
+ Asm->EmitULEB128Bytes(AttrData.getForm());
+ Asm->EOL(dwarf::FormEncodingString(AttrData.getForm()));
+ }
+
+ // Mark end of abbreviation.
+ Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(1)");
+ Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(2)");
+}
+
+#ifndef NDEBUG
+void DIEAbbrev::print(std::ostream &O) {
+ O << "Abbreviation @"
+ << std::hex << (intptr_t)this << std::dec
+ << " "
+ << dwarf::TagString(Tag)
+ << " "
+ << dwarf::ChildrenString(ChildrenFlag)
+ << "\n";
+
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << " "
+ << dwarf::AttributeString(Data[i].getAttribute())
+ << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << "\n";
+ }
+}
+void DIEAbbrev::dump() { print(cerr); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE::~DIE() {
+ for (unsigned i = 0, N = Children.size(); i < N; ++i)
+ delete Children[i];
+}
+
+/// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+///
+void DIE::AddSiblingOffset() {
+ DIEInteger *DI = new DIEInteger(0);
+ Values.insert(Values.begin(), DI);
+ Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIE::Profile(FoldingSetNodeID &ID) {
+ Abbrev.Profile(ID);
+
+ for (unsigned i = 0, N = Children.size(); i < N; ++i)
+ ID.AddPointer(Children[i]);
+
+ for (unsigned j = 0, M = Values.size(); j < M; ++j)
+ ID.AddPointer(Values[j]);
+}
+
+#ifndef NDEBUG
+void DIE::print(std::ostream &O, unsigned IncIndent) {
+ static unsigned IndentCount = 0;
+ IndentCount += IncIndent;
+ const std::string Indent(IndentCount, ' ');
+ bool isBlock = Abbrev.getTag() == 0;
+
+ if (!isBlock) {
+ O << Indent
+ << "Die: "
+ << "0x" << std::hex << (intptr_t)this << std::dec
+ << ", Offset: " << Offset
+ << ", Size: " << Size
+ << "\n";
+
+ O << Indent
+ << dwarf::TagString(Abbrev.getTag())
+ << " "
+ << dwarf::ChildrenString(Abbrev.getChildrenFlag());
+ } else {
+ O << "Size: " << Size;
+ }
+ O << "\n";
+
+ const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
+
+ IndentCount += 2;
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << Indent;
+
+ if (!isBlock)
+ O << dwarf::AttributeString(Data[i].getAttribute());
+ else
+ O << "Blk[" << i << "]";
+
+ O << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << " ";
+ Values[i]->print(O);
+ O << "\n";
+ }
+ IndentCount -= 2;
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+ Children[j]->print(O, 4);
+ }
+
+ if (!isBlock) O << "\n";
+ IndentCount -= IncIndent;
+}
+
+void DIE::dump() {
+ print(cerr);
+}
+#endif
+
+
+#ifndef NDEBUG
+void DIEValue::dump() {
+ print(cerr);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEInteger Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(Dwarf *D, unsigned Form) const {
+ const AsmPrinter *Asm = D->getAsm();
+ switch (Form) {
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: Asm->EmitInt8(Integer); break;
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: Asm->EmitInt16(Integer); break;
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: Asm->EmitInt32(Integer); break;
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: Asm->EmitInt64(Integer); break;
+ case dwarf::DW_FORM_udata: Asm->EmitULEB128Bytes(Integer); break;
+ case dwarf::DW_FORM_sdata: Asm->EmitSLEB128Bytes(Integer); break;
+ default: assert(0 && "DIE Value form not supported yet"); break;
+ }
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: return sizeof(int8_t);
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: return sizeof(int16_t);
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: return sizeof(int32_t);
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_data8: return sizeof(int64_t);
+ case dwarf::DW_FORM_udata: return TargetAsmInfo::getULEB128Size(Integer);
+ case dwarf::DW_FORM_sdata: return TargetAsmInfo::getSLEB128Size(Integer);
+ default: assert(0 && "DIE Value form not supported yet"); break;
+ }
+ return 0;
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEInteger::Profile(FoldingSetNodeID &ID, unsigned Int) {
+ ID.AddInteger(isInteger);
+ ID.AddInteger(Int);
+}
+void DIEInteger::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, Integer);
+}
+
+#ifndef NDEBUG
+void DIEInteger::print(std::ostream &O) {
+ O << "Int: " << (int64_t)Integer
+ << " 0x" << std::hex << Integer << std::dec;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEString Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::EmitValue(Dwarf *D, unsigned Form) const {
+ D->getAsm()->EmitString(Str);
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEString::Profile(FoldingSetNodeID &ID, const std::string &Str) {
+ ID.AddInteger(isString);
+ ID.AddString(Str);
+}
+void DIEString::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, Str);
+}
+
+#ifndef NDEBUG
+void DIEString::print(std::ostream &O) {
+ O << "Str: \"" << Str << "\"";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDwarfLabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIEDwarfLabel::EmitValue(Dwarf *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitReference(Label, false, IsSmall);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIEDwarfLabel::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEDwarfLabel::Profile(FoldingSetNodeID &ID, const DWLabel &Label) {
+ ID.AddInteger(isLabel);
+ Label.Profile(ID);
+}
+void DIEDwarfLabel::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, Label);
+}
+
+#ifndef NDEBUG
+void DIEDwarfLabel::print(std::ostream &O) {
+ O << "Lbl: ";
+ Label.print(O);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEObjectLabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIEObjectLabel::EmitValue(Dwarf *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitReference(Label, false, IsSmall);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIEObjectLabel::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEObjectLabel::Profile(FoldingSetNodeID &ID, const std::string &Label) {
+ ID.AddInteger(isAsIsLabel);
+ ID.AddString(Label);
+}
+void DIEObjectLabel::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, Label.c_str());
+}
+
+#ifndef NDEBUG
+void DIEObjectLabel::print(std::ostream &O) {
+ O << "Obj: " << Label;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIESectionOffset Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIESectionOffset::EmitValue(Dwarf *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitSectionOffset(Label.getTag(), Section.getTag(),
+ Label.getNumber(), Section.getNumber(),
+ IsSmall, IsEH, UseSet);
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIESectionOffset::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIESectionOffset::Profile(FoldingSetNodeID &ID, const DWLabel &Label,
+ const DWLabel &Section) {
+ ID.AddInteger(isSectionOffset);
+ Label.Profile(ID);
+ Section.Profile(ID);
+ // IsEH and UseSet are specific to the Label/Section that we will emit the
+ // offset for; so Label/Section are enough for uniqueness.
+}
+void DIESectionOffset::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, Label, Section);
+}
+
+#ifndef NDEBUG
+void DIESectionOffset::print(std::ostream &O) {
+ O << "Off: ";
+ Label.print(O);
+ O << "-";
+ Section.print(O);
+ O << "-" << IsEH << "-" << UseSet;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDelta Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(Dwarf *D, unsigned Form) const {
+ bool IsSmall = Form == dwarf::DW_FORM_data4;
+ D->EmitDifference(LabelHi, LabelLo, IsSmall);
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(const TargetData *TD, unsigned Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ return TD->getPointerSize();
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEDelta::Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi,
+ const DWLabel &LabelLo) {
+ ID.AddInteger(isDelta);
+ LabelHi.Profile(ID);
+ LabelLo.Profile(ID);
+}
+void DIEDelta::Profile(FoldingSetNodeID &ID) {
+ Profile(ID, LabelHi, LabelLo);
+}
+
+#ifndef NDEBUG
+void DIEDelta::print(std::ostream &O) {
+ O << "Del: ";
+ LabelHi.print(O);
+ O << "-";
+ LabelLo.print(O);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEEntry Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEEntry::EmitValue(Dwarf *D, unsigned Form) const {
+ D->getAsm()->EmitInt32(Entry->getOffset());
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIEEntry::Profile(FoldingSetNodeID &ID, DIE *Entry) {
+ ID.AddInteger(isEntry);
+ ID.AddPointer(Entry);
+}
+void DIEEntry::Profile(FoldingSetNodeID &ID) {
+ ID.AddInteger(isEntry);
+
+ if (Entry)
+ ID.AddPointer(Entry);
+ else
+ ID.AddPointer(this);
+}
+
+#ifndef NDEBUG
+void DIEEntry::print(std::ostream &O) {
+ O << "Die: 0x" << std::hex << (intptr_t)Entry << std::dec;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEBlock Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(const TargetData *TD) {
+ if (!Size) {
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ Size += Values[i]->SizeOf(TD, AbbrevData[i].getForm());
+ }
+
+ return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(Dwarf *D, unsigned Form) const {
+ const AsmPrinter *Asm = D->getAsm();
+ switch (Form) {
+ case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
+ case dwarf::DW_FORM_block: Asm->EmitULEB128Bytes(Size); break;
+ default: assert(0 && "Improper form for block"); break;
+ }
+
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ Asm->EOL();
+ Values[i]->EmitValue(D, AbbrevData[i].getForm());
+ }
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+ case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+ case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_block: return Size + TargetAsmInfo::getULEB128Size(Size);
+ default: assert(0 && "Improper form for block"); break;
+ }
+ return 0;
+}
+
+void DIEBlock::Profile(FoldingSetNodeID &ID) {
+ ID.AddInteger(isBlock);
+ DIE::Profile(ID);
+}
+
+#ifndef NDEBUG
+void DIEBlock::print(std::ostream &O) {
+ O << "Blk: ";
+ DIE::print(O, 5);
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
new file mode 100644
index 0000000..b14d91c
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -0,0 +1,549 @@
+//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DIE_H__
+#define CODEGEN_ASMPRINTER_DIE_H__
+
+#include "DwarfLabel.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/raw_ostream.h"
+#include <iosfwd>
+
+namespace llvm {
+ class AsmPrinter;
+ class Dwarf;
+ class TargetData;
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
+ /// Dwarf abbreviation.
+ class VISIBILITY_HIDDEN DIEAbbrevData {
+ /// Attribute - Dwarf attribute code.
+ ///
+ unsigned Attribute;
+
+ /// Form - Dwarf form code.
+ ///
+ unsigned Form;
+ public:
+ DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {}
+
+ // Accessors.
+ unsigned getAttribute() const { return Attribute; }
+ unsigned getForm() const { return Form; }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+ /// information object.
+ class VISIBILITY_HIDDEN DIEAbbrev : public FoldingSetNode {
+ /// Tag - Dwarf tag code.
+ ///
+ unsigned Tag;
+
+ /// Unique number for node.
+ ///
+ unsigned Number;
+
+ /// ChildrenFlag - Dwarf children flag.
+ ///
+ unsigned ChildrenFlag;
+
+ /// Data - Raw data bytes for abbreviation.
+ ///
+ SmallVector<DIEAbbrevData, 8> Data;
+ public:
+ DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
+ virtual ~DIEAbbrev() {}
+
+ // Accessors.
+ unsigned getTag() const { return Tag; }
+ unsigned getNumber() const { return Number; }
+ unsigned getChildrenFlag() const { return ChildrenFlag; }
+ const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
+ void setTag(unsigned T) { Tag = T; }
+ void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; }
+ void setNumber(unsigned N) { Number = N; }
+
+ /// AddAttribute - Adds another set of attribute information to the
+ /// abbreviation.
+ void AddAttribute(unsigned Attribute, unsigned Form) {
+ Data.push_back(DIEAbbrevData(Attribute, Form));
+ }
+
+ /// AddFirstAttribute - Adds a set of attribute information to the front
+ /// of the abbreviation.
+ void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+ Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
+ }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+
+ /// Emit - Print the abbreviation using the specified asm printer.
+ ///
+ void Emit(const AsmPrinter *Asm) const;
+
+#ifndef NDEBUG
+ void print(std::ostream *O) {
+ if (O) print(*O);
+ }
+ void print(std::ostream &O);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIE - A structured debug information entry. Has an abbreviation which
+ /// describes it's organization.
+ class CompileUnit;
+ class DIEValue;
+
+ class VISIBILITY_HIDDEN DIE : public FoldingSetNode {
+ protected:
+ /// Abbrev - Buffer for constructing abbreviation.
+ ///
+ DIEAbbrev Abbrev;
+
+ /// Offset - Offset in debug info section.
+ ///
+ unsigned Offset;
+
+ /// Size - Size of instance + children.
+ ///
+ unsigned Size;
+
+ /// Children DIEs.
+ ///
+ std::vector<DIE *> Children;
+
+ /// Attributes values.
+ ///
+ SmallVector<DIEValue*, 32> Values;
+
+ /// Abstract compile unit.
+ CompileUnit *AbstractCU;
+ public:
+ explicit DIE(unsigned Tag)
+ : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0), Size(0) {}
+ virtual ~DIE();
+
+ // Accessors.
+ DIEAbbrev &getAbbrev() { return Abbrev; }
+ unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
+ unsigned getTag() const { return Abbrev.getTag(); }
+ unsigned getOffset() const { return Offset; }
+ unsigned getSize() const { return Size; }
+ const std::vector<DIE *> &getChildren() const { return Children; }
+ SmallVector<DIEValue*, 32> &getValues() { return Values; }
+ CompileUnit *getAbstractCompileUnit() const { return AbstractCU; }
+
+ void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
+ void setOffset(unsigned O) { Offset = O; }
+ void setSize(unsigned S) { Size = S; }
+ void setAbstractCompileUnit(CompileUnit *CU) { AbstractCU = CU; }
+
+ /// AddValue - Add a value and attributes to a DIE.
+ ///
+ void AddValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+ Abbrev.AddAttribute(Attribute, Form);
+ Values.push_back(Value);
+ }
+
+ /// SiblingOffset - Return the offset of the debug information entry's
+ /// sibling.
+ unsigned SiblingOffset() const { return Offset + Size; }
+
+ /// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+ ///
+ void AddSiblingOffset();
+
+ /// AddChild - Add a child to the DIE.
+ ///
+ void AddChild(DIE *Child) {
+ Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
+ Children.push_back(Child);
+ }
+
+ /// Detach - Detaches objects connected to it after copying.
+ ///
+ void Detach() {
+ Children.clear();
+ }
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) ;
+
+#ifndef NDEBUG
+ void print(std::ostream *O, unsigned IncIndent = 0) {
+ if (O) print(*O, IncIndent);
+ }
+ void print(std::ostream &O, unsigned IncIndent = 0);
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEValue - A debug information entry value.
+ ///
+ class VISIBILITY_HIDDEN DIEValue : public FoldingSetNode {
+ public:
+ enum {
+ isInteger,
+ isString,
+ isLabel,
+ isAsIsLabel,
+ isSectionOffset,
+ isDelta,
+ isEntry,
+ isBlock
+ };
+ protected:
+ /// Type - Type of data stored in the value.
+ ///
+ unsigned Type;
+ public:
+ explicit DIEValue(unsigned T) : Type(T) {}
+ virtual ~DIEValue() {}
+
+ // Accessors
+ unsigned getType() const { return Type; }
+
+ /// EmitValue - Emit value via the Dwarf writer.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const = 0;
+
+ /// SizeOf - Return the size of a value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const = 0;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ virtual void Profile(FoldingSetNodeID &ID) = 0;
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *) { return true; }
+
+#ifndef NDEBUG
+ void print(std::ostream *O) {
+ if (O) print(*O);
+ }
+ virtual void print(std::ostream &O) = 0;
+ void dump();
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEInteger - An integer value DIE.
+ ///
+ class VISIBILITY_HIDDEN DIEInteger : public DIEValue {
+ uint64_t Integer;
+ public:
+ explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+ /// BestForm - Choose the best form for integer.
+ ///
+ static unsigned BestForm(bool IsSigned, uint64_t Int) {
+ if (IsSigned) {
+ if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1;
+ if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2;
+ if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4;
+ } else {
+ if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1;
+ if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
+ if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4;
+ }
+ return dwarf::DW_FORM_data8;
+ }
+
+ /// EmitValue - Emit integer of appropriate size.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of integer value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, unsigned Int);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEInteger *) { return true; }
+ static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEString - A string value DIE.
+ ///
+ class VISIBILITY_HIDDEN DIEString : public DIEValue {
+ const std::string Str;
+ public:
+ explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {}
+
+ /// EmitValue - Emit string value.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of string value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *, unsigned /*Form*/) const {
+ return Str.size() + sizeof(char); // sizeof('\0');
+ }
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const std::string &Str);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEString *) { return true; }
+ static bool classof(const DIEValue *S) { return S->getType() == isString; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEDwarfLabel - A Dwarf internal label expression DIE.
+ //
+ class VISIBILITY_HIDDEN DIEDwarfLabel : public DIEValue {
+ const DWLabel Label;
+ public:
+ explicit DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {}
+
+ /// EmitValue - Emit label value.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of label value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const DWLabel &Label);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEDwarfLabel *) { return true; }
+ static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEObjectLabel - A label to an object in code or data.
+ //
+ class VISIBILITY_HIDDEN DIEObjectLabel : public DIEValue {
+ const std::string Label;
+ public:
+ explicit DIEObjectLabel(const std::string &L)
+ : DIEValue(isAsIsLabel), Label(L) {}
+
+ /// EmitValue - Emit label value.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of label value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const std::string &Label);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEObjectLabel *) { return true; }
+ static bool classof(const DIEValue *L) {
+ return L->getType() == isAsIsLabel;
+ }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIESectionOffset - A section offset DIE.
+ ///
+ class VISIBILITY_HIDDEN DIESectionOffset : public DIEValue {
+ const DWLabel Label;
+ const DWLabel Section;
+ bool IsEH : 1;
+ bool UseSet : 1;
+ public:
+ DIESectionOffset(const DWLabel &Lab, const DWLabel &Sec,
+ bool isEH = false, bool useSet = true)
+ : DIEValue(isSectionOffset), Label(Lab), Section(Sec),
+ IsEH(isEH), UseSet(useSet) {}
+
+ /// EmitValue - Emit section offset.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of section offset value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const DWLabel &Label,
+ const DWLabel &Section);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIESectionOffset *) { return true; }
+ static bool classof(const DIEValue *D) {
+ return D->getType() == isSectionOffset;
+ }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEDelta - A simple label difference DIE.
+ ///
+ class VISIBILITY_HIDDEN DIEDelta : public DIEValue {
+ const DWLabel LabelHi;
+ const DWLabel LabelLo;
+ public:
+ DIEDelta(const DWLabel &Hi, const DWLabel &Lo)
+ : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
+
+ /// EmitValue - Emit delta value.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of delta value in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi,
+ const DWLabel &LabelLo);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEDelta *) { return true; }
+ static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEntry - A pointer to another debug information entry. An instance of
+ /// this class can also be used as a proxy for a debug information entry not
+ /// yet defined (ie. types.)
+ class VISIBILITY_HIDDEN DIEEntry : public DIEValue {
+ DIE *Entry;
+ public:
+ explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
+
+ DIE *getEntry() const { return Entry; }
+ void setEntry(DIE *E) { Entry = E; }
+
+ /// EmitValue - Emit debug information entry offset.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of debug information entry in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const {
+ return sizeof(int32_t);
+ }
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, DIE *Entry);
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEEntry *) { return true; }
+ static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// DIEBlock - A block of values. Primarily used for location expressions.
+ //
+ class VISIBILITY_HIDDEN DIEBlock : public DIEValue, public DIE {
+ unsigned Size; // Size in bytes excluding size header.
+ public:
+ DIEBlock()
+ : DIEValue(isBlock), DIE(0), Size(0) {}
+ virtual ~DIEBlock() {}
+
+ /// ComputeSize - calculate the size of the block.
+ ///
+ unsigned ComputeSize(const TargetData *TD);
+
+ /// BestForm - Choose the best form for data.
+ ///
+ unsigned BestForm() const {
+ if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1;
+ if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2;
+ if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4;
+ return dwarf::DW_FORM_block;
+ }
+
+ /// EmitValue - Emit block data.
+ ///
+ virtual void EmitValue(Dwarf *D, unsigned Form) const;
+
+ /// SizeOf - Determine size of block data in bytes.
+ ///
+ virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ virtual void Profile(FoldingSetNodeID &ID);
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEBlock *) { return true; }
+ static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O);
+#endif
+ };
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
new file mode 100644
index 0000000..25217b0
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -0,0 +1,2610 @@
+//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfDebug.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/System/Path.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include <ostream>
+using namespace llvm;
+
+static TimerGroup &getDwarfTimerGroup() {
+ static TimerGroup DwarfTimerGroup("Dwarf Debugging");
+ return DwarfTimerGroup;
+}
+
+//===----------------------------------------------------------------------===//
+
+/// Configuration values for initial hash set sizes (log2).
+///
+static const unsigned InitDiesSetSize = 9; // log2(512)
+static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
+static const unsigned InitValuesSetSize = 9; // log2(512)
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associate
+/// with a source file.
+class VISIBILITY_HIDDEN CompileUnit {
+ /// ID - File identifier for source.
+ ///
+ unsigned ID;
+
+ /// Die - Compile unit debug information entry.
+ ///
+ DIE *Die;
+
+ /// GVToDieMap - Tracks the mapping of unit level debug informaton
+ /// variables to debug information entries.
+ std::map<GlobalVariable *, DIE *> GVToDieMap;
+
+ /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton
+ /// descriptors to debug information entries using a DIEEntry proxy.
+ std::map<GlobalVariable *, DIEEntry *> GVToDIEEntryMap;
+
+ /// Globals - A map of globally visible named entities for this unit.
+ ///
+ StringMap<DIE*> Globals;
+
+ /// DiesSet - Used to uniquely define dies within the compile unit.
+ ///
+ FoldingSet<DIE> DiesSet;
+public:
+ CompileUnit(unsigned I, DIE *D)
+ : ID(I), Die(D), DiesSet(InitDiesSetSize) {}
+ ~CompileUnit() { delete Die; }
+
+ // Accessors.
+ unsigned getID() const { return ID; }
+ DIE* getDie() const { return Die; }
+ StringMap<DIE*> &getGlobals() { return Globals; }
+
+ /// hasContent - Return true if this compile unit has something to write out.
+ ///
+ bool hasContent() const { return !Die->getChildren().empty(); }
+
+ /// AddGlobal - Add a new global entity to the compile unit.
+ ///
+ void AddGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; }
+
+ /// getDieMapSlotFor - Returns the debug information entry map slot for the
+ /// specified debug variable.
+ DIE *&getDieMapSlotFor(GlobalVariable *GV) { return GVToDieMap[GV]; }
+
+ /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for the
+ /// specified debug variable.
+ DIEEntry *&getDIEEntrySlotFor(GlobalVariable *GV) {
+ return GVToDIEEntryMap[GV];
+ }
+
+ /// AddDie - Adds or interns the DIE to the compile unit.
+ ///
+ DIE *AddDie(DIE &Buffer) {
+ FoldingSetNodeID ID;
+ Buffer.Profile(ID);
+ void *Where;
+ DIE *Die = DiesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Die) {
+ Die = new DIE(Buffer);
+ DiesSet.InsertNode(Die, Where);
+ this->Die->AddChild(Die);
+ Buffer.Detach();
+ }
+
+ return Die;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// DbgVariable - This class is used to track local variable information.
+///
+class VISIBILITY_HIDDEN DbgVariable {
+ DIVariable Var; // Variable Descriptor.
+ unsigned FrameIndex; // Variable frame index.
+ bool InlinedFnVar; // Variable for an inlined function.
+public:
+ DbgVariable(DIVariable V, unsigned I, bool IFV)
+ : Var(V), FrameIndex(I), InlinedFnVar(IFV) {}
+
+ // Accessors.
+ DIVariable getVariable() const { return Var; }
+ unsigned getFrameIndex() const { return FrameIndex; }
+ bool isInlinedFnVar() const { return InlinedFnVar; }
+};
+
+//===----------------------------------------------------------------------===//
+/// DbgScope - This class is used to track scope information.
+///
+class DbgConcreteScope;
+class VISIBILITY_HIDDEN DbgScope {
+ DbgScope *Parent; // Parent to this scope.
+ DIDescriptor Desc; // Debug info descriptor for scope.
+ // Either subprogram or block.
+ unsigned StartLabelID; // Label ID of the beginning of scope.
+ unsigned EndLabelID; // Label ID of the end of scope.
+ SmallVector<DbgScope *, 4> Scopes; // Scopes defined in scope.
+ SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
+ SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs.
+public:
+ DbgScope(DbgScope *P, DIDescriptor D)
+ : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0) {}
+ virtual ~DbgScope();
+
+ // Accessors.
+ DbgScope *getParent() const { return Parent; }
+ DIDescriptor getDesc() const { return Desc; }
+ unsigned getStartLabelID() const { return StartLabelID; }
+ unsigned getEndLabelID() const { return EndLabelID; }
+ SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
+ SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
+ SmallVector<DbgConcreteScope*,8> &getConcreteInsts() { return ConcreteInsts; }
+ void setStartLabelID(unsigned S) { StartLabelID = S; }
+ void setEndLabelID(unsigned E) { EndLabelID = E; }
+
+ /// AddScope - Add a scope to the scope.
+ ///
+ void AddScope(DbgScope *S) { Scopes.push_back(S); }
+
+ /// AddVariable - Add a variable to the scope.
+ ///
+ void AddVariable(DbgVariable *V) { Variables.push_back(V); }
+
+ /// AddConcreteInst - Add a concrete instance to the scope.
+ ///
+ void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); }
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+#ifndef NDEBUG
+void DbgScope::dump() const {
+ static unsigned IndentLevel = 0;
+ std::string Indent(IndentLevel, ' ');
+
+ cerr << Indent; Desc.dump();
+ cerr << " [" << StartLabelID << ", " << EndLabelID << "]\n";
+
+ IndentLevel += 2;
+
+ for (unsigned i = 0, e = Scopes.size(); i != e; ++i)
+ if (Scopes[i] != this)
+ Scopes[i]->dump();
+
+ IndentLevel -= 2;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+/// DbgConcreteScope - This class is used to track a scope that holds concrete
+/// instance information.
+///
+class VISIBILITY_HIDDEN DbgConcreteScope : public DbgScope {
+ CompileUnit *Unit;
+ DIE *Die; // Debug info for this concrete scope.
+public:
+ DbgConcreteScope(DIDescriptor D) : DbgScope(NULL, D) {}
+
+ // Accessors.
+ DIE *getDie() const { return Die; }
+ void setDie(DIE *D) { Die = D; }
+};
+
+DbgScope::~DbgScope() {
+ for (unsigned i = 0, N = Scopes.size(); i < N; ++i)
+ delete Scopes[i];
+ for (unsigned j = 0, M = Variables.size(); j < M; ++j)
+ delete Variables[j];
+ for (unsigned k = 0, O = ConcreteInsts.size(); k < O; ++k)
+ delete ConcreteInsts[k];
+}
+
+} // end llvm namespace
+
+DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T)
+ : Dwarf(OS, A, T, "dbg"), MainCU(0),
+ AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
+ ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionMap(),
+ SectionSourceLines(), didInitial(false), shouldEmit(false),
+ FunctionDbgScope(0), DebugTimer(0) {
+ if (TimePassesIsEnabled)
+ DebugTimer = new Timer("Dwarf Debug Writer",
+ getDwarfTimerGroup());
+}
+DwarfDebug::~DwarfDebug() {
+ for (unsigned j = 0, M = Values.size(); j < M; ++j)
+ delete Values[j];
+
+ for (DenseMap<const GlobalVariable *, DbgScope *>::iterator
+ I = AbstractInstanceRootMap.begin(),
+ E = AbstractInstanceRootMap.end(); I != E;++I)
+ delete I->second;
+
+ delete DebugTimer;
+}
+
+/// AssignAbbrevNumber - Define a unique number for the abbreviation.
+///
+void DwarfDebug::AssignAbbrevNumber(DIEAbbrev &Abbrev) {
+ // Profile the node so that we can make it unique.
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+
+ // Check the set for priors.
+ DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+
+ // If it's newly added.
+ if (InSet == &Abbrev) {
+ // Add to abbreviation list.
+ Abbreviations.push_back(&Abbrev);
+
+ // Assign the vector position + 1 as its number.
+ Abbrev.setNumber(Abbreviations.size());
+ } else {
+ // Assign existing abbreviation number.
+ Abbrev.setNumber(InSet->getNumber());
+ }
+}
+
+/// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// information entry.
+DIEEntry *DwarfDebug::CreateDIEEntry(DIE *Entry) {
+ DIEEntry *Value;
+
+ if (Entry) {
+ FoldingSetNodeID ID;
+ DIEEntry::Profile(ID, Entry);
+ void *Where;
+ Value = static_cast<DIEEntry *>(ValuesSet.FindNodeOrInsertPos(ID, Where));
+
+ if (Value) return Value;
+
+ Value = new DIEEntry(Entry);
+ ValuesSet.InsertNode(Value, Where);
+ } else {
+ Value = new DIEEntry(Entry);
+ }
+
+ Values.push_back(Value);
+ return Value;
+}
+
+/// SetDIEEntry - Set a DIEEntry once the debug information entry is defined.
+///
+void DwarfDebug::SetDIEEntry(DIEEntry *Value, DIE *Entry) {
+ Value->setEntry(Entry);
+
+ // Add to values set if not already there. If it is, we merely have a
+ // duplicate in the values list (no harm.)
+ ValuesSet.GetOrInsertNode(Value);
+}
+
+/// AddUInt - Add an unsigned integer attribute data and value.
+///
+void DwarfDebug::AddUInt(DIE *Die, unsigned Attribute,
+ unsigned Form, uint64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(false, Integer);
+
+ FoldingSetNodeID ID;
+ DIEInteger::Profile(ID, Integer);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEInteger(Integer);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddSInt - Add an signed integer attribute data and value.
+///
+void DwarfDebug::AddSInt(DIE *Die, unsigned Attribute,
+ unsigned Form, int64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(true, Integer);
+
+ FoldingSetNodeID ID;
+ DIEInteger::Profile(ID, (uint64_t)Integer);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEInteger(Integer);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddString - Add a string attribute data and value.
+///
+void DwarfDebug::AddString(DIE *Die, unsigned Attribute, unsigned Form,
+ const std::string &String) {
+ FoldingSetNodeID ID;
+ DIEString::Profile(ID, String);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEString(String);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddLabel - Add a Dwarf label attribute data and value.
+///
+void DwarfDebug::AddLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label) {
+ FoldingSetNodeID ID;
+ DIEDwarfLabel::Profile(ID, Label);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEDwarfLabel(Label);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddObjectLabel - Add an non-Dwarf label attribute data and value.
+///
+void DwarfDebug::AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const std::string &Label) {
+ FoldingSetNodeID ID;
+ DIEObjectLabel::Profile(ID, Label);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEObjectLabel(Label);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddSectionOffset - Add a section offset label attribute data and value.
+///
+void DwarfDebug::AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label, const DWLabel &Section,
+ bool isEH, bool useSet) {
+ FoldingSetNodeID ID;
+ DIESectionOffset::Profile(ID, Label, Section);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIESectionOffset(Label, Section, isEH, useSet);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddDelta - Add a label delta attribute data and value.
+///
+void DwarfDebug::AddDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Hi, const DWLabel &Lo) {
+ FoldingSetNodeID ID;
+ DIEDelta::Profile(ID, Hi, Lo);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = new DIEDelta(Hi, Lo);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+}
+
+/// AddBlock - Add block data.
+///
+void DwarfDebug::AddBlock(DIE *Die, unsigned Attribute, unsigned Form,
+ DIEBlock *Block) {
+ Block->ComputeSize(TD);
+ FoldingSetNodeID ID;
+ Block->Profile(ID);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Value) {
+ Value = Block;
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ } else {
+ // Already exists, reuse the previous one.
+ delete Block;
+ Block = cast<DIEBlock>(Value);
+ }
+
+ Die->AddValue(Attribute, Block->BestForm(), Value);
+}
+
+/// AddSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::AddSourceLine(DIE *Die, const DIVariable *V) {
+ // If there is no compile unit specified, don't add a line #.
+ if (V->getCompileUnit().isNull())
+ return;
+
+ unsigned Line = V->getLineNumber();
+ unsigned FileID = FindCompileUnit(V->getCompileUnit()).getID();
+ assert(FileID && "Invalid file id");
+ AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// AddSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::AddSourceLine(DIE *Die, const DIGlobal *G) {
+ // If there is no compile unit specified, don't add a line #.
+ if (G->getCompileUnit().isNull())
+ return;
+
+ unsigned Line = G->getLineNumber();
+ unsigned FileID = FindCompileUnit(G->getCompileUnit()).getID();
+ assert(FileID && "Invalid file id");
+ AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) {
+ // If there is no compile unit specified, don't add a line #.
+ DICompileUnit CU = Ty->getCompileUnit();
+ if (CU.isNull())
+ return;
+
+ unsigned Line = Ty->getLineNumber();
+ unsigned FileID = FindCompileUnit(CU).getID();
+ assert(FileID && "Invalid file id");
+ AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+ AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// AddAddress - Add an address attribute to a die based on the location
+/// provided.
+void DwarfDebug::AddAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location) {
+ unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+ DIEBlock *Block = new DIEBlock();
+
+ if (Location.isReg()) {
+ if (Reg < 32) {
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+ } else {
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+ AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+ } else {
+ if (Reg < 32) {
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ } else {
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+ }
+
+ AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+ }
+
+ AddBlock(Die, Attribute, 0, Block);
+}
+
+/// AddType - Add a new type attribute to the specified entity.
+void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
+ if (Ty.isNull())
+ return;
+
+ // Check for pre-existence.
+ DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getGV());
+
+ // If it exists then use the existing value.
+ if (Slot) {
+ Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot);
+ return;
+ }
+
+ // Set up proxy.
+ Slot = CreateDIEEntry();
+
+ // Construct type.
+ DIE Buffer(dwarf::DW_TAG_base_type);
+ if (Ty.isBasicType(Ty.getTag()))
+ ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getGV()));
+ else if (Ty.isDerivedType(Ty.getTag()))
+ ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getGV()));
+ else {
+ assert(Ty.isCompositeType(Ty.getTag()) && "Unknown kind of DIType");
+ ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getGV()));
+ }
+
+ // Add debug information entry to entity and appropriate context.
+ DIE *Die = NULL;
+ DIDescriptor Context = Ty.getContext();
+ if (!Context.isNull())
+ Die = DW_Unit->getDieMapSlotFor(Context.getGV());
+
+ if (Die) {
+ DIE *Child = new DIE(Buffer);
+ Die->AddChild(Child);
+ Buffer.Detach();
+ SetDIEEntry(Slot, Child);
+ } else {
+ Die = DW_Unit->AddDie(Buffer);
+ SetDIEEntry(Slot, Die);
+ }
+
+ Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot);
+}
+
+/// ConstructTypeDIE - Construct basic type die from DIBasicType.
+void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DIBasicType BTy) {
+ // Get core information.
+ std::string Name;
+ BTy.getName(Name);
+ Buffer.setTag(dwarf::DW_TAG_base_type);
+ AddUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy.getEncoding());
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ uint64_t Size = BTy.getSizeInBits() >> 3;
+ AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+}
+
+/// ConstructTypeDIE - Construct derived type die from DIDerivedType.
+void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DIDerivedType DTy) {
+ // Get core information.
+ std::string Name;
+ DTy.getName(Name);
+ uint64_t Size = DTy.getSizeInBits() >> 3;
+ unsigned Tag = DTy.getTag();
+
+ // FIXME - Workaround for templates.
+ if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
+
+ Buffer.setTag(Tag);
+
+ // Map to main type, void will not have a type.
+ DIType FromTy = DTy.getTypeDerivedFrom();
+ AddType(DW_Unit, &Buffer, FromTy);
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size)
+ AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+
+ // Add source line info if available and TyDesc is not a forward declaration.
+ if (!DTy.isForwardDecl())
+ AddSourceLine(&Buffer, &DTy);
+}
+
+/// ConstructTypeDIE - Construct type DIE from DICompositeType.
+void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DICompositeType CTy) {
+ // Get core information.
+ std::string Name;
+ CTy.getName(Name);
+
+ uint64_t Size = CTy.getSizeInBits() >> 3;
+ unsigned Tag = CTy.getTag();
+ Buffer.setTag(Tag);
+
+ switch (Tag) {
+ case dwarf::DW_TAG_vector_type:
+ case dwarf::DW_TAG_array_type:
+ ConstructArrayTypeDIE(DW_Unit, Buffer, &CTy);
+ break;
+ case dwarf::DW_TAG_enumeration_type: {
+ DIArray Elements = CTy.getTypeArray();
+
+ // Add enumerators to enumeration type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIE *ElemDie = NULL;
+ DIEnumerator Enum(Elements.getElement(i).getGV());
+ ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum);
+ Buffer.AddChild(ElemDie);
+ }
+ }
+ break;
+ case dwarf::DW_TAG_subroutine_type: {
+ // Add return type.
+ DIArray Elements = CTy.getTypeArray();
+ DIDescriptor RTy = Elements.getElement(0);
+ AddType(DW_Unit, &Buffer, DIType(RTy.getGV()));
+
+ // Add prototype flag.
+ AddUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+
+ // Add arguments.
+ for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIDescriptor Ty = Elements.getElement(i);
+ AddType(DW_Unit, Arg, DIType(Ty.getGV()));
+ Buffer.AddChild(Arg);
+ }
+ }
+ break;
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_class_type: {
+ // Add elements to structure type.
+ DIArray Elements = CTy.getTypeArray();
+
+ // A forward struct declared type may not have elements available.
+ if (Elements.isNull())
+ break;
+
+ // Add elements to structure type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIE *ElemDie = NULL;
+ if (Element.getTag() == dwarf::DW_TAG_subprogram)
+ ElemDie = CreateSubprogramDIE(DW_Unit,
+ DISubprogram(Element.getGV()));
+ else if (Element.getTag() == dwarf::DW_TAG_variable) // ??
+ ElemDie = CreateGlobalVariableDIE(DW_Unit,
+ DIGlobalVariable(Element.getGV()));
+ else
+ ElemDie = CreateMemberDIE(DW_Unit,
+ DIDerivedType(Element.getGV()));
+ Buffer.AddChild(ElemDie);
+ }
+
+ // FIXME: We'd like an API to register additional attributes for the
+ // frontend to use while synthesizing, and then we'd use that api in clang
+ // instead of this.
+ if (Name == "__block_literal_generic")
+ AddUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+
+ unsigned RLang = CTy.getRunTimeLang();
+ if (RLang)
+ AddUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+ dwarf::DW_FORM_data1, RLang);
+ break;
+ }
+ default:
+ break;
+ }
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ if (Tag == dwarf::DW_TAG_enumeration_type ||
+ Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size)
+ AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+ else {
+ // Add zero size if it is not a forward declaration.
+ if (CTy.isForwardDecl())
+ AddUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ else
+ AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+ }
+
+ // Add source line info if available.
+ if (!CTy.isForwardDecl())
+ AddSourceLine(&Buffer, &CTy);
+ }
+}
+
+/// ConstructSubrangeDIE - Construct subrange DIE from DISubrange.
+void DwarfDebug::ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+ int64_t L = SR.getLo();
+ int64_t H = SR.getHi();
+ DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
+
+ if (L != H) {
+ AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+ if (L)
+ AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+ AddSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+ }
+
+ Buffer.AddChild(DW_Subrange);
+}
+
+/// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void DwarfDebug::ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DICompositeType *CTy) {
+ Buffer.setTag(dwarf::DW_TAG_array_type);
+ if (CTy->getTag() == dwarf::DW_TAG_vector_type)
+ AddUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+
+ // Emit derived type.
+ AddType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom());
+ DIArray Elements = CTy->getTypeArray();
+
+ // Construct an anonymous type for index type.
+ DIE IdxBuffer(dwarf::DW_TAG_base_type);
+ AddUInt(&IdxBuffer, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+ AddUInt(&IdxBuffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ dwarf::DW_ATE_signed);
+ DIE *IndexTy = DW_Unit->AddDie(IdxBuffer);
+
+ // Add subranges to array type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ if (Element.getTag() == dwarf::DW_TAG_subrange_type)
+ ConstructSubrangeDIE(Buffer, DISubrange(Element.getGV()), IndexTy);
+ }
+}
+
+/// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
+ DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
+ std::string Name;
+ ETy->getName(Name);
+ AddString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ int64_t Value = ETy->getEnumValue();
+ AddSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+ return Enumerator;
+}
+
+/// CreateGlobalVariableDIE - Create new DIE using GV.
+DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit,
+ const DIGlobalVariable &GV) {
+ DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
+ std::string Name;
+ GV.getDisplayName(Name);
+ AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ std::string LinkageName;
+ GV.getLinkageName(LinkageName);
+ if (!LinkageName.empty())
+ AddString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+ LinkageName);
+ AddType(DW_Unit, GVDie, GV.getType());
+ if (!GV.isLocalToUnit())
+ AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ AddSourceLine(GVDie, &GV);
+ return GVDie;
+}
+
+/// CreateMemberDIE - Create new member DIE.
+DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
+ DIE *MemberDie = new DIE(DT.getTag());
+ std::string Name;
+ DT.getName(Name);
+ if (!Name.empty())
+ AddString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ AddType(DW_Unit, MemberDie, DT.getTypeDerivedFrom());
+
+ AddSourceLine(MemberDie, &DT);
+
+ uint64_t Size = DT.getSizeInBits();
+ uint64_t FieldSize = DT.getOriginalTypeSize();
+
+ if (Size != FieldSize) {
+ // Handle bitfield.
+ AddUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+ AddUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+
+ uint64_t Offset = DT.getOffsetInBits();
+ uint64_t FieldOffset = Offset;
+ uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ FieldOffset = (HiMark - FieldSize);
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size);
+ AddUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+ }
+
+ DIEBlock *Block = new DIEBlock();
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ AddUInt(Block, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+ AddBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, Block);
+
+ if (DT.isProtected())
+ AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+ dwarf::DW_ACCESS_protected);
+ else if (DT.isPrivate())
+ AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+ dwarf::DW_ACCESS_private);
+
+ return MemberDie;
+}
+
+/// CreateSubprogramDIE - Create new DIE using SP.
+DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
+ const DISubprogram &SP,
+ bool IsConstructor,
+ bool IsInlined) {
+ DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram);
+
+ std::string Name;
+ SP.getName(Name);
+ AddString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ std::string LinkageName;
+ SP.getLinkageName(LinkageName);
+
+ if (!LinkageName.empty())
+ AddString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+ LinkageName);
+
+ AddSourceLine(SPDie, &SP);
+
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+
+ // Add prototyped tag, if C or ObjC.
+ unsigned Lang = SP.getCompileUnit().getLanguage();
+ if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 ||
+ Lang == dwarf::DW_LANG_ObjC)
+ AddUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+
+ // Add Return Type.
+ unsigned SPTag = SPTy.getTag();
+ if (!IsConstructor) {
+ if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
+ AddType(DW_Unit, SPDie, SPTy);
+ else
+ AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getGV()));
+ }
+
+ if (!SP.isDefinition()) {
+ AddUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+
+ // Add arguments. Do not add arguments for subprogram definition. They will
+ // be handled through RecordVariable.
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ AddType(DW_Unit, Arg, DIType(Args.getElement(i).getGV()));
+ AddUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
+ SPDie->AddChild(Arg);
+ }
+ }
+
+ if (!SP.isLocalToUnit() && !IsInlined)
+ AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getGV());
+ Slot = SPDie;
+ return SPDie;
+}
+
+/// FindCompileUnit - Get the compile unit for the given descriptor.
+///
+CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const {
+ DenseMap<Value *, CompileUnit *>::const_iterator I =
+ CompileUnitMap.find(Unit.getGV());
+ assert(I != CompileUnitMap.end() && "Missing compile unit.");
+ return *I->second;
+}
+
+/// CreateDbgScopeVariable - Create a new scope variable.
+///
+DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
+ // Get the descriptor.
+ const DIVariable &VD = DV->getVariable();
+
+ // Translate tag to proper Dwarf tag. The result variable is dropped for
+ // now.
+ unsigned Tag;
+ switch (VD.getTag()) {
+ case dwarf::DW_TAG_return_variable:
+ return NULL;
+ case dwarf::DW_TAG_arg_variable:
+ Tag = dwarf::DW_TAG_formal_parameter;
+ break;
+ case dwarf::DW_TAG_auto_variable: // fall thru
+ default:
+ Tag = dwarf::DW_TAG_variable;
+ break;
+ }
+
+ // Define variable debug information entry.
+ DIE *VariableDie = new DIE(Tag);
+ std::string Name;
+ VD.getName(Name);
+ AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+ // Add source line info if available.
+ AddSourceLine(VariableDie, &VD);
+
+ // Add variable type.
+ AddType(Unit, VariableDie, VD.getType());
+
+ // Add variable address.
+ if (!DV->isInlinedFnVar()) {
+ // Variables for abstract instances of inlined functions don't get a
+ // location.
+ MachineLocation Location;
+ Location.set(RI->getFrameRegister(*MF),
+ RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
+ AddAddress(VariableDie, dwarf::DW_AT_location, Location);
+ }
+
+ return VariableDie;
+}
+
+/// getOrCreateScope - Returns the scope associated with the given descriptor.
+///
+DbgScope *DwarfDebug::getOrCreateScope(GlobalVariable *V) {
+ DbgScope *&Slot = DbgScopeMap[V];
+ if (Slot) return Slot;
+
+ DbgScope *Parent = NULL;
+ DIBlock Block(V);
+
+ // Don't create a new scope if we already created one for an inlined function.
+ DenseMap<const GlobalVariable *, DbgScope *>::iterator
+ II = AbstractInstanceRootMap.find(V);
+ if (II != AbstractInstanceRootMap.end())
+ return LexicalScopeStack.back();
+
+ if (!Block.isNull()) {
+ DIDescriptor ParentDesc = Block.getContext();
+ Parent =
+ ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getGV());
+ }
+
+ Slot = new DbgScope(Parent, DIDescriptor(V));
+
+ if (Parent)
+ Parent->AddScope(Slot);
+ else
+ // First function is top level function.
+ FunctionDbgScope = Slot;
+
+ return Slot;
+}
+
+/// ConstructDbgScope - Construct the components of a scope.
+///
+void DwarfDebug::ConstructDbgScope(DbgScope *ParentScope,
+ unsigned ParentStartID,
+ unsigned ParentEndID,
+ DIE *ParentDie, CompileUnit *Unit) {
+ // Add variables to scope.
+ SmallVector<DbgVariable *, 8> &Variables = ParentScope->getVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+ DIE *VariableDie = CreateDbgScopeVariable(Variables[i], Unit);
+ if (VariableDie) ParentDie->AddChild(VariableDie);
+ }
+
+ // Add concrete instances to scope.
+ SmallVector<DbgConcreteScope *, 8> &ConcreteInsts =
+ ParentScope->getConcreteInsts();
+ for (unsigned i = 0, N = ConcreteInsts.size(); i < N; ++i) {
+ DbgConcreteScope *ConcreteInst = ConcreteInsts[i];
+ DIE *Die = ConcreteInst->getDie();
+
+ unsigned StartID = ConcreteInst->getStartLabelID();
+ unsigned EndID = ConcreteInst->getEndLabelID();
+
+ // Add the scope bounds.
+ if (StartID)
+ AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", StartID));
+ else
+ AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+
+ if (EndID)
+ AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", EndID));
+ else
+ AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+
+ ParentDie->AddChild(Die);
+ }
+
+ // Add nested scopes.
+ SmallVector<DbgScope *, 4> &Scopes = ParentScope->getScopes();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
+ // Define the Scope debug information entry.
+ DbgScope *Scope = Scopes[j];
+
+ unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+ unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+
+ // Ignore empty scopes.
+ if (StartID == EndID && StartID != 0) continue;
+
+ // Do not ignore inlined scopes even if they don't have any variables or
+ // scopes.
+ if (Scope->getScopes().empty() && Scope->getVariables().empty() &&
+ Scope->getConcreteInsts().empty())
+ continue;
+
+ if (StartID == ParentStartID && EndID == ParentEndID) {
+ // Just add stuff to the parent scope.
+ ConstructDbgScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit);
+ } else {
+ DIE *ScopeDie = new DIE(dwarf::DW_TAG_lexical_block);
+
+ // Add the scope bounds.
+ if (StartID)
+ AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", StartID));
+ else
+ AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+
+ if (EndID)
+ AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", EndID));
+ else
+ AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+
+ // Add the scope's contents.
+ ConstructDbgScope(Scope, StartID, EndID, ScopeDie, Unit);
+ ParentDie->AddChild(ScopeDie);
+ }
+ }
+}
+
+/// ConstructFunctionDbgScope - Construct the scope for the subprogram.
+///
+void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope,
+ bool AbstractScope) {
+ // Exit if there is no root scope.
+ if (!RootScope) return;
+ DIDescriptor Desc = RootScope->getDesc();
+ if (Desc.isNull())
+ return;
+
+ // Get the subprogram debug information entry.
+ DISubprogram SPD(Desc.getGV());
+
+ // Get the compile unit context.
+ CompileUnit *Unit = MainCU;
+ if (!Unit)
+ Unit = &FindCompileUnit(SPD.getCompileUnit());
+
+ // Get the subprogram die.
+ DIE *SPDie = Unit->getDieMapSlotFor(SPD.getGV());
+ assert(SPDie && "Missing subprogram descriptor");
+
+ if (!AbstractScope) {
+ // Add the function bounds.
+ AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+ AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+ MachineLocation Location(RI->getFrameRegister(*MF));
+ AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ }
+
+ ConstructDbgScope(RootScope, 0, 0, SPDie, Unit);
+}
+
+/// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
+///
+void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) {
+ const char *FnName = MF->getFunction()->getNameStart();
+ if (MainCU) {
+ StringMap<DIE*> &Globals = MainCU->getGlobals();
+ StringMap<DIE*>::iterator GI = Globals.find(FnName);
+ if (GI != Globals.end()) {
+ DIE *SPDie = GI->second;
+
+ // Add the function bounds.
+ AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+ AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+
+ MachineLocation Location(RI->getFrameRegister(*MF));
+ AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ return;
+ }
+ } else {
+ for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) {
+ CompileUnit *Unit = CompileUnits[i];
+ StringMap<DIE*> &Globals = Unit->getGlobals();
+ StringMap<DIE*>::iterator GI = Globals.find(FnName);
+ if (GI != Globals.end()) {
+ DIE *SPDie = GI->second;
+
+ // Add the function bounds.
+ AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+ AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+
+ MachineLocation Location(RI->getFrameRegister(*MF));
+ AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ return;
+ }
+ }
+ }
+
+#if 0
+ // FIXME: This is causing an abort because C++ mangled names are compared with
+ // their unmangled counterparts. See PR2885. Don't do this assert.
+ assert(0 && "Couldn't find DIE for machine function!");
+#endif
+}
+
+/// GetOrCreateSourceID - Look up the source id with the given directory and
+/// source file names. If none currently exists, create a new id and insert it
+/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
+/// maps as well.
+unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName,
+ const std::string &FileName) {
+ unsigned DId;
+ StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
+ if (DI != DirectoryIdMap.end()) {
+ DId = DI->getValue();
+ } else {
+ DId = DirectoryNames.size() + 1;
+ DirectoryIdMap[DirName] = DId;
+ DirectoryNames.push_back(DirName);
+ }
+
+ unsigned FId;
+ StringMap<unsigned>::iterator FI = SourceFileIdMap.find(FileName);
+ if (FI != SourceFileIdMap.end()) {
+ FId = FI->getValue();
+ } else {
+ FId = SourceFileNames.size() + 1;
+ SourceFileIdMap[FileName] = FId;
+ SourceFileNames.push_back(FileName);
+ }
+
+ DenseMap<std::pair<unsigned, unsigned>, unsigned>::iterator SI =
+ SourceIdMap.find(std::make_pair(DId, FId));
+ if (SI != SourceIdMap.end())
+ return SI->second;
+
+ unsigned SrcId = SourceIds.size() + 1; // DW_AT_decl_file cannot be 0.
+ SourceIdMap[std::make_pair(DId, FId)] = SrcId;
+ SourceIds.push_back(std::make_pair(DId, FId));
+
+ return SrcId;
+}
+
+void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) {
+ DICompileUnit DIUnit(GV);
+ std::string Dir, FN, Prod;
+ unsigned ID = GetOrCreateSourceID(DIUnit.getDirectory(Dir),
+ DIUnit.getFilename(FN));
+
+ DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+ AddSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ DWLabel("section_line", 0), DWLabel("section_line", 0),
+ false);
+ AddString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
+ DIUnit.getProducer(Prod));
+ AddUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
+ DIUnit.getLanguage());
+ AddString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+
+ if (!Dir.empty())
+ AddString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+ if (DIUnit.isOptimized())
+ AddUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+ std::string Flags;
+ DIUnit.getFlags(Flags);
+ if (!Flags.empty())
+ AddString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
+
+ unsigned RVer = DIUnit.getRunTimeVersion();
+ if (RVer)
+ AddUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+ dwarf::DW_FORM_data1, RVer);
+
+ CompileUnit *Unit = new CompileUnit(ID, Die);
+ if (DIUnit.isMain()) {
+ assert(!MainCU && "Multiple main compile units are found!");
+ MainCU = Unit;
+ }
+
+ CompileUnitMap[DIUnit.getGV()] = Unit;
+ CompileUnits.push_back(Unit);
+}
+
+/// ConstructCompileUnits - Create a compile unit DIEs.
+void DwarfDebug::ConstructCompileUnits() {
+ GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.compile_units");
+ if (!Root)
+ return;
+ assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
+ "Malformed compile unit descriptor anchor type");
+ Constant *RootC = cast<Constant>(*Root->use_begin());
+ assert(RootC->hasNUsesOrMore(1) &&
+ "Malformed compile unit descriptor anchor type");
+
+ for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
+ UI != UE; ++UI)
+ for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
+ UUI != UUE; ++UUI) {
+ GlobalVariable *GV = cast<GlobalVariable>(*UUI);
+ ConstructCompileUnit(GV);
+ }
+}
+
+bool DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
+ DIGlobalVariable DI_GV(GV);
+ CompileUnit *DW_Unit = MainCU;
+ if (!DW_Unit)
+ DW_Unit = &FindCompileUnit(DI_GV.getCompileUnit());
+
+ // Check for pre-existence.
+ DIE *&Slot = DW_Unit->getDieMapSlotFor(DI_GV.getGV());
+ if (Slot)
+ return false;
+
+ DIE *VariableDie = CreateGlobalVariableDIE(DW_Unit, DI_GV);
+
+ // Add address.
+ DIEBlock *Block = new DIEBlock();
+ AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ std::string GLN;
+ AddObjectLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->getGlobalLinkName(DI_GV.getGlobal(), GLN));
+ AddBlock(VariableDie, dwarf::DW_AT_location, 0, Block);
+
+ // Add to map.
+ Slot = VariableDie;
+
+ // Add to context owner.
+ DW_Unit->getDie()->AddChild(VariableDie);
+
+ // Expose as global. FIXME - need to check external flag.
+ std::string Name;
+ DW_Unit->AddGlobal(DI_GV.getName(Name), VariableDie);
+ return true;
+}
+
+/// ConstructGlobalVariableDIEs - Create DIEs for each of the externally visible
+/// global variables. Return true if at least one global DIE is created.
+bool DwarfDebug::ConstructGlobalVariableDIEs() {
+ GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.global_variables");
+ if (!Root)
+ return false;
+
+ assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
+ "Malformed global variable descriptor anchor type");
+ Constant *RootC = cast<Constant>(*Root->use_begin());
+ assert(RootC->hasNUsesOrMore(1) &&
+ "Malformed global variable descriptor anchor type");
+
+ bool Result = false;
+ for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
+ UI != UE; ++UI)
+ for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
+ UUI != UUE; ++UUI)
+ Result |= ConstructGlobalVariableDIE(cast<GlobalVariable>(*UUI));
+
+ return Result;
+}
+
+bool DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
+ DISubprogram SP(GV);
+ CompileUnit *Unit = MainCU;
+ if (!Unit)
+ Unit = &FindCompileUnit(SP.getCompileUnit());
+
+ // Check for pre-existence.
+ DIE *&Slot = Unit->getDieMapSlotFor(GV);
+ if (Slot)
+ return false;
+
+ if (!SP.isDefinition())
+ // This is a method declaration which will be handled while constructing
+ // class type.
+ return false;
+
+ DIE *SubprogramDie = CreateSubprogramDIE(Unit, SP);
+
+ // Add to map.
+ Slot = SubprogramDie;
+
+ // Add to context owner.
+ Unit->getDie()->AddChild(SubprogramDie);
+
+ // Expose as global.
+ std::string Name;
+ Unit->AddGlobal(SP.getName(Name), SubprogramDie);
+ return true;
+}
+
+/// ConstructSubprograms - Create DIEs for each of the externally visible
+/// subprograms. Return true if at least one subprogram DIE is created.
+bool DwarfDebug::ConstructSubprograms() {
+ GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.subprograms");
+ if (!Root)
+ return false;
+
+ assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
+ "Malformed subprogram descriptor anchor type");
+ Constant *RootC = cast<Constant>(*Root->use_begin());
+ assert(RootC->hasNUsesOrMore(1) &&
+ "Malformed subprogram descriptor anchor type");
+
+ bool Result = false;
+ for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
+ UI != UE; ++UI)
+ for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
+ UUI != UUE; ++UUI)
+ Result |= ConstructSubprogram(cast<GlobalVariable>(*UUI));
+
+ return Result;
+}
+
+/// SetDebugInfo - Create global DIEs and emit initial debug info sections.
+/// This is inovked by the target AsmPrinter.
+void DwarfDebug::SetDebugInfo(MachineModuleInfo *mmi) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ // Create all the compile unit DIEs.
+ ConstructCompileUnits();
+
+ if (CompileUnits.empty()) {
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return;
+ }
+
+ // Create DIEs for each of the externally visible global variables.
+ bool globalDIEs = ConstructGlobalVariableDIEs();
+
+ // Create DIEs for each of the externally visible subprograms.
+ bool subprogramDIEs = ConstructSubprograms();
+
+ // If there is not any debug info available for any global variables and any
+ // subprograms then there is not any debug info to emit.
+ if (!globalDIEs && !subprogramDIEs) {
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return;
+ }
+
+ MMI = mmi;
+ shouldEmit = true;
+ MMI->setDebugInfoAvailability(true);
+
+ // Prime section data.
+ SectionMap.insert(TAI->getTextSection());
+
+ // Print out .file directives to specify files for .loc directives. These are
+ // printed out early so that they precede any .loc directives.
+ if (TAI->hasDotLocAndDotFile()) {
+ for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) {
+ // Remember source id starts at 1.
+ std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i);
+ sys::Path FullPath(getSourceDirectoryName(Id.first));
+ bool AppendOk =
+ FullPath.appendComponent(getSourceFileName(Id.second));
+ assert(AppendOk && "Could not append filename to directory!");
+ AppendOk = false;
+ Asm->EmitFile(i, FullPath.toString());
+ Asm->EOL();
+ }
+ }
+
+ // Emit initial sections
+ EmitInitial();
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// EndModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfDebug::EndModule() {
+ if (!ShouldEmitDwarfDebug())
+ return;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ // Standard sections final addresses.
+ Asm->SwitchToSection(TAI->getTextSection());
+ EmitLabel("text_end", 0);
+ Asm->SwitchToSection(TAI->getDataSection());
+ EmitLabel("data_end", 0);
+
+ // End text sections.
+ for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
+ Asm->SwitchToSection(SectionMap[i]);
+ EmitLabel("section_end", i);
+ }
+
+ // Emit common frame information.
+ EmitCommonDebugFrame();
+
+ // Emit function debug frame information
+ for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(),
+ E = DebugFrames.end(); I != E; ++I)
+ EmitFunctionDebugFrame(*I);
+
+ // Compute DIE offsets and sizes.
+ SizeAndOffsets();
+
+ // Emit all the DIEs into a debug info section
+ EmitDebugInfo();
+
+ // Corresponding abbreviations into a abbrev section.
+ EmitAbbreviations();
+
+ // Emit source line correspondence into a debug line section.
+ EmitDebugLines();
+
+ // Emit info into a debug pubnames section.
+ EmitDebugPubNames();
+
+ // Emit info into a debug str section.
+ EmitDebugStr();
+
+ // Emit info into a debug loc section.
+ EmitDebugLoc();
+
+ // Emit info into a debug aranges section.
+ EmitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ EmitDebugRanges();
+
+ // Emit info into a debug macinfo section.
+ EmitDebugMacInfo();
+
+ // Emit inline info.
+ EmitDebugInlineInfo();
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// BeginFunction - Gather pre-function debug information. Assumes being
+/// emitted immediately after the function entry point.
+void DwarfDebug::BeginFunction(MachineFunction *MF) {
+ this->MF = MF;
+
+ if (!ShouldEmitDwarfDebug()) return;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ // Begin accumulating function debug information.
+ MMI->BeginFunction(MF);
+
+ // Assumes in correct section after the entry point.
+ EmitLabel("func_begin", ++SubprogramCount);
+
+ // Emit label for the implicitly defined dbg.stoppoint at the start of the
+ // function.
+ DebugLoc FDL = MF->getDefaultDebugLoc();
+ if (!FDL.isUnknown()) {
+ DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
+ unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col,
+ DICompileUnit(DLT.CompileUnit));
+ Asm->printLabel(LabelID);
+ }
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// EndFunction - Gather and emit post-function debug information.
+///
+void DwarfDebug::EndFunction(MachineFunction *MF) {
+ if (!ShouldEmitDwarfDebug()) return;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ // Define end label for subprogram.
+ EmitLabel("func_end", SubprogramCount);
+
+ // Get function line info.
+ if (!Lines.empty()) {
+ // Get section line info.
+ unsigned ID = SectionMap.insert(Asm->CurrentSection_);
+ if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
+ std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
+ // Append the function info to section info.
+ SectionLineInfos.insert(SectionLineInfos.end(),
+ Lines.begin(), Lines.end());
+ }
+
+ // Construct the DbgScope for abstract instances.
+ for (SmallVector<DbgScope *, 32>::iterator
+ I = AbstractInstanceRootList.begin(),
+ E = AbstractInstanceRootList.end(); I != E; ++I)
+ ConstructFunctionDbgScope(*I);
+
+ // Construct scopes for subprogram.
+ if (FunctionDbgScope)
+ ConstructFunctionDbgScope(FunctionDbgScope);
+ else
+ // FIXME: This is wrong. We are essentially getting past a problem with
+ // debug information not being able to handle unreachable blocks that have
+ // debug information in them. In particular, those unreachable blocks that
+ // have "region end" info in them. That situation results in the "root
+ // scope" not being created. If that's the case, then emit a "default"
+ // scope, i.e., one that encompasses the whole function. This isn't
+ // desirable. And a better way of handling this (and all of the debugging
+ // information) needs to be explored.
+ ConstructDefaultDbgScope(MF);
+
+ DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
+ MMI->getFrameMoves()));
+
+ // Clear debug info
+ if (FunctionDbgScope) {
+ delete FunctionDbgScope;
+ DbgScopeMap.clear();
+ DbgAbstractScopeMap.clear();
+ DbgConcreteScopeMap.clear();
+ InlinedVariableScopes.clear();
+ FunctionDbgScope = NULL;
+ LexicalScopeStack.clear();
+ AbstractInstanceRootList.clear();
+ }
+
+ Lines.clear();
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+/// RecordSourceLine - Records location information and associates it with a
+/// label. Returns a unique label ID used to generate a label and provide
+/// correspondence to the source line list.
+unsigned DwarfDebug::RecordSourceLine(Value *V, unsigned Line, unsigned Col) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ CompileUnit *Unit = CompileUnitMap[V];
+ assert(Unit && "Unable to find CompileUnit");
+ unsigned ID = MMI->NextLabelID();
+ Lines.push_back(SrcLineInfo(Line, Col, Unit->getID(), ID));
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return ID;
+}
+
+/// RecordSourceLine - Records location information and associates it with a
+/// label. Returns a unique label ID used to generate a label and provide
+/// correspondence to the source line list.
+unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col,
+ DICompileUnit CU) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ std::string Dir, Fn;
+ unsigned Src = GetOrCreateSourceID(CU.getDirectory(Dir),
+ CU.getFilename(Fn));
+ unsigned ID = MMI->NextLabelID();
+ Lines.push_back(SrcLineInfo(Line, Col, Src, ID));
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return ID;
+}
+
+/// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
+/// timed. Look up the source id with the given directory and source file
+/// names. If none currently exists, create a new id and insert it in the
+/// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
+/// well.
+unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
+ const std::string &FileName) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ unsigned SrcId = GetOrCreateSourceID(DirName, FileName);
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return SrcId;
+}
+
+/// RecordRegionStart - Indicate the start of a region.
+unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ DbgScope *Scope = getOrCreateScope(V);
+ unsigned ID = MMI->NextLabelID();
+ if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID);
+ LexicalScopeStack.push_back(Scope);
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return ID;
+}
+
+/// RecordRegionEnd - Indicate the end of a region.
+unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ DbgScope *Scope = getOrCreateScope(V);
+ unsigned ID = MMI->NextLabelID();
+ Scope->setEndLabelID(ID);
+ if (LexicalScopeStack.size() != 0)
+ LexicalScopeStack.pop_back();
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return ID;
+}
+
+/// RecordVariable - Indicate the declaration of a local variable.
+void DwarfDebug::RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
+ const MachineInstr *MI) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ DIDescriptor Desc(GV);
+ DbgScope *Scope = NULL;
+ bool InlinedFnVar = false;
+
+ if (Desc.getTag() == dwarf::DW_TAG_variable) {
+ // GV is a global variable.
+ DIGlobalVariable DG(GV);
+ Scope = getOrCreateScope(DG.getContext().getGV());
+ } else {
+ DenseMap<const MachineInstr *, DbgScope *>::iterator
+ SI = InlinedVariableScopes.find(MI);
+
+ if (SI != InlinedVariableScopes.end()) {
+ // or GV is an inlined local variable.
+ Scope = SI->second;
+ } else {
+ DIVariable DV(GV);
+ GlobalVariable *V = DV.getContext().getGV();
+
+ // FIXME: The code that checks for the inlined local variable is a hack!
+ DenseMap<const GlobalVariable *, DbgScope *>::iterator
+ AI = AbstractInstanceRootMap.find(V);
+
+ if (AI != AbstractInstanceRootMap.end()) {
+ // This method is called each time a DECLARE node is encountered. For an
+ // inlined function, this could be many, many times. We don't want to
+ // re-add variables to that DIE for each time. We just want to add them
+ // once. Check to make sure that we haven't added them already.
+ DenseMap<const GlobalVariable *,
+ SmallSet<const GlobalVariable *, 32> >::iterator
+ IP = InlinedParamMap.find(V);
+
+ if (IP != InlinedParamMap.end() && IP->second.count(GV) > 0) {
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+ return;
+ }
+
+ // or GV is an inlined local variable.
+ Scope = AI->second;
+ InlinedParamMap[V].insert(GV);
+ InlinedFnVar = true;
+ } else {
+ // or GV is a local variable.
+ Scope = getOrCreateScope(V);
+ }
+ }
+ }
+
+ assert(Scope && "Unable to find the variable's scope");
+ DbgVariable *DV = new DbgVariable(DIVariable(GV), FrameIndex, InlinedFnVar);
+ Scope->AddVariable(DV);
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+//// RecordInlinedFnStart - Indicate the start of inlined subroutine.
+unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
+ unsigned Line, unsigned Col) {
+ unsigned LabelID = MMI->NextLabelID();
+
+ if (!TAI->doesDwarfUsesInlineInfoSection())
+ return LabelID;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ GlobalVariable *GV = SP.getGV();
+ DenseMap<const GlobalVariable *, DbgScope *>::iterator
+ II = AbstractInstanceRootMap.find(GV);
+
+ if (II == AbstractInstanceRootMap.end()) {
+ // Create an abstract instance entry for this inlined function if it doesn't
+ // already exist.
+ DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV));
+
+ // Get the compile unit context.
+ CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit());
+ DIE *SPDie = Unit->getDieMapSlotFor(GV);
+ if (!SPDie)
+ SPDie = CreateSubprogramDIE(Unit, SP, false, true);
+
+ // Mark as being inlined. This makes this subprogram entry an abstract
+ // instance root.
+ // FIXME: Our debugger doesn't care about the value of DW_AT_inline, only
+ // that it's defined. That probably won't change in the future. However,
+ // this could be more elegant.
+ AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined);
+
+ // Keep track of the abstract scope for this function.
+ DbgAbstractScopeMap[GV] = Scope;
+
+ AbstractInstanceRootMap[GV] = Scope;
+ AbstractInstanceRootList.push_back(Scope);
+ }
+
+ // Create a concrete inlined instance for this inlined function.
+ DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV));
+ DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine);
+ CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit());
+ ScopeDie->setAbstractCompileUnit(Unit);
+
+ DIE *Origin = Unit->getDieMapSlotFor(GV);
+ AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, Origin);
+ AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, Unit->getID());
+ AddUInt(ScopeDie, dwarf::DW_AT_call_line, 0, Line);
+ AddUInt(ScopeDie, dwarf::DW_AT_call_column, 0, Col);
+
+ ConcreteScope->setDie(ScopeDie);
+ ConcreteScope->setStartLabelID(LabelID);
+ MMI->RecordUsedDbgLabel(LabelID);
+
+ LexicalScopeStack.back()->AddConcreteInst(ConcreteScope);
+
+ // Keep track of the concrete scope that's inlined into this function.
+ DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator
+ SI = DbgConcreteScopeMap.find(GV);
+
+ if (SI == DbgConcreteScopeMap.end())
+ DbgConcreteScopeMap[GV].push_back(ConcreteScope);
+ else
+ SI->second.push_back(ConcreteScope);
+
+ // Track the start label for this inlined function.
+ DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator
+ I = InlineInfo.find(GV);
+
+ if (I == InlineInfo.end())
+ InlineInfo[GV].push_back(LabelID);
+ else
+ I->second.push_back(LabelID);
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return LabelID;
+}
+
+/// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
+unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
+ if (!TAI->doesDwarfUsesInlineInfoSection())
+ return 0;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ GlobalVariable *GV = SP.getGV();
+ DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator
+ I = DbgConcreteScopeMap.find(GV);
+
+ if (I == DbgConcreteScopeMap.end()) {
+ // FIXME: Can this situation actually happen? And if so, should it?
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return 0;
+ }
+
+ SmallVector<DbgScope *, 8> &Scopes = I->second;
+ assert(!Scopes.empty() && "We should have at least one debug scope!");
+ DbgScope *Scope = Scopes.back(); Scopes.pop_back();
+ unsigned ID = MMI->NextLabelID();
+ MMI->RecordUsedDbgLabel(ID);
+ Scope->setEndLabelID(ID);
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return ID;
+}
+
+/// RecordVariableScope - Record scope for the variable declared by
+/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes
+/// for only inlined subroutine variables. Other variables's scopes are
+/// determined during RecordVariable().
+void DwarfDebug::RecordVariableScope(DIVariable &DV,
+ const MachineInstr *DeclareMI) {
+ if (TimePassesIsEnabled)
+ DebugTimer->startTimer();
+
+ DISubprogram SP(DV.getContext().getGV());
+
+ if (SP.isNull()) {
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+
+ return;
+ }
+
+ DenseMap<GlobalVariable *, DbgScope *>::iterator
+ I = DbgAbstractScopeMap.find(SP.getGV());
+ if (I != DbgAbstractScopeMap.end())
+ InlinedVariableScopes[DeclareMI] = I->second;
+
+ if (TimePassesIsEnabled)
+ DebugTimer->stopTimer();
+}
+
+//===----------------------------------------------------------------------===//
+// Emit Methods
+//===----------------------------------------------------------------------===//
+
+/// SizeAndOffsetDie - Compute the size and offset of a DIE.
+///
+unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
+ // Get the children.
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ // If not last sibling and has children then add sibling offset attribute.
+ if (!Last && !Children.empty()) Die->AddSiblingOffset();
+
+ // Record the abbreviation.
+ AssignAbbrevNumber(Die->getAbbrev());
+
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ // Set DIE offset
+ Die->setOffset(Offset);
+
+ // Start the size with the size of abbreviation code.
+ Offset += TargetAsmInfo::getULEB128Size(AbbrevNumber);
+
+ const SmallVector<DIEValue*, 32> &Values = Die->getValues();
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+ // Size the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ // Size attribute value.
+ Offset += Values[i]->SizeOf(TD, AbbrevData[i].getForm());
+
+ // Size the DIE children if any.
+ if (!Children.empty()) {
+ assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes &&
+ "Children flag not set");
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ Offset = SizeAndOffsetDie(Children[j], Offset, (j + 1) == M);
+
+ // End of children marker.
+ Offset += sizeof(int8_t);
+ }
+
+ Die->setSize(Offset - Die->getOffset());
+ return Offset;
+}
+
+/// SizeAndOffsets - Compute the size and offset of all the DIEs.
+///
+void DwarfDebug::SizeAndOffsets() {
+ // Compute size of compile unit header.
+ static unsigned Offset =
+ sizeof(int32_t) + // Length of Compilation Unit Info
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+
+ // Process base compile unit.
+ if (MainCU) {
+ SizeAndOffsetDie(MainCU->getDie(), Offset, true);
+ CompileUnitOffsets[MainCU] = 0;
+ return;
+ }
+
+ // Process all compile units.
+ unsigned PrevOffset = 0;
+
+ for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) {
+ CompileUnit *Unit = CompileUnits[i];
+ CompileUnitOffsets[Unit] = PrevOffset;
+ PrevOffset += SizeAndOffsetDie(Unit->getDie(), Offset, true)
+ + sizeof(int32_t); // FIXME - extra pad for gdb bug.
+ }
+}
+
+/// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc
+/// tools to recognize the object file contains Dwarf information.
+void DwarfDebug::EmitInitial() {
+ // Check to see if we already emitted intial headers.
+ if (didInitial) return;
+ didInitial = true;
+
+ // Dwarf sections base addresses.
+ if (TAI->doesDwarfRequireFrameSection()) {
+ Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+ EmitLabel("section_debug_frame", 0);
+ }
+
+ Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+ EmitLabel("section_info", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+ EmitLabel("section_abbrev", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+ EmitLabel("section_aranges", 0);
+
+ if (TAI->doesSupportMacInfoSection()) {
+ Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+ EmitLabel("section_macinfo", 0);
+ }
+
+ Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+ EmitLabel("section_line", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+ EmitLabel("section_loc", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+ EmitLabel("section_pubnames", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+ EmitLabel("section_str", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+ EmitLabel("section_ranges", 0);
+
+ Asm->SwitchToSection(TAI->getTextSection());
+ EmitLabel("text_begin", 0);
+ Asm->SwitchToSection(TAI->getDataSection());
+ EmitLabel("data_begin", 0);
+}
+
+/// EmitDIE - Recusively Emits a debug information entry.
+///
+void DwarfDebug::EmitDIE(DIE *Die) {
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ Asm->EOL();
+
+ // Emit the code (index) for the abbreviation.
+ Asm->EmitULEB128Bytes(AbbrevNumber);
+
+ if (Asm->isVerbose())
+ Asm->EOL(std::string("Abbrev [" +
+ utostr(AbbrevNumber) +
+ "] 0x" + utohexstr(Die->getOffset()) +
+ ":0x" + utohexstr(Die->getSize()) + " " +
+ dwarf::TagString(Abbrev->getTag())));
+ else
+ Asm->EOL();
+
+ SmallVector<DIEValue*, 32> &Values = Die->getValues();
+ const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+ // Emit the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ unsigned Attr = AbbrevData[i].getAttribute();
+ unsigned Form = AbbrevData[i].getForm();
+ assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+ switch (Attr) {
+ case dwarf::DW_AT_sibling:
+ Asm->EmitInt32(Die->SiblingOffset());
+ break;
+ case dwarf::DW_AT_abstract_origin: {
+ DIEEntry *E = cast<DIEEntry>(Values[i]);
+ DIE *Origin = E->getEntry();
+ unsigned Addr =
+ CompileUnitOffsets[Die->getAbstractCompileUnit()] +
+ Origin->getOffset();
+
+ Asm->EmitInt32(Addr);
+ break;
+ }
+ default:
+ // Emit an attribute using the defined form.
+ Values[i]->EmitValue(this, Form);
+ break;
+ }
+
+ Asm->EOL(dwarf::AttributeString(Attr));
+ }
+
+ // Emit the DIE children if any.
+ if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) {
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j)
+ EmitDIE(Children[j]);
+
+ Asm->EmitInt8(0); Asm->EOL("End Of Children Mark");
+ }
+}
+
+/// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section.
+///
+void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) {
+ DIE *Die = Unit->getDie();
+
+ // Emit the compile units header.
+ EmitLabel("info_begin", Unit->getID());
+
+ // Emit size of content not including length itself
+ unsigned ContentSize = Die->getSize() +
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ sizeof(int32_t); // FIXME - extra pad for gdb bug.
+
+ Asm->EmitInt32(ContentSize); Asm->EOL("Length of Compilation Unit Info");
+ Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF version number");
+ EmitSectionOffset("abbrev_begin", "section_abbrev", 0, 0, true, false);
+ Asm->EOL("Offset Into Abbrev. Section");
+ Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
+
+ EmitDIE(Die);
+ // FIXME - extra padding for gdb bug.
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ EmitLabel("info_end", Unit->getID());
+
+ Asm->EOL();
+}
+
+void DwarfDebug::EmitDebugInfo() {
+ // Start debug info section.
+ Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+
+ if (MainCU) {
+ EmitDebugInfoPerCU(MainCU);
+ return;
+ }
+
+ for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i)
+ EmitDebugInfoPerCU(CompileUnits[i]);
+}
+
+/// EmitAbbreviations - Emit the abbreviation section.
+///
+void DwarfDebug::EmitAbbreviations() const {
+ // Check to see if it is worth the effort.
+ if (!Abbreviations.empty()) {
+ // Start the debug abbrev section.
+ Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+
+ EmitLabel("abbrev_begin", 0);
+
+ // For each abbrevation.
+ for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
+ // Get abbreviation data
+ const DIEAbbrev *Abbrev = Abbreviations[i];
+
+ // Emit the abbrevations code (base 1 index.)
+ Asm->EmitULEB128Bytes(Abbrev->getNumber());
+ Asm->EOL("Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev->Emit(Asm);
+
+ Asm->EOL();
+ }
+
+ // Mark end of abbreviations.
+ Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(3)");
+
+ EmitLabel("abbrev_end", 0);
+ Asm->EOL();
+ }
+}
+
+/// EmitEndOfLineMatrix - Emit the last address of the section and the end of
+/// the line matrix.
+///
+void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) {
+ // Define last address of section.
+ Asm->EmitInt8(0); Asm->EOL("Extended Op");
+ Asm->EmitInt8(TD->getPointerSize() + 1); Asm->EOL("Op size");
+ Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->EOL("DW_LNE_set_address");
+ EmitReference("section_end", SectionEnd); Asm->EOL("Section end label");
+
+ // Mark end of matrix.
+ Asm->EmitInt8(0); Asm->EOL("DW_LNE_end_sequence");
+ Asm->EmitULEB128Bytes(1); Asm->EOL();
+ Asm->EmitInt8(1); Asm->EOL();
+}
+
+/// EmitDebugLines - Emit source line information.
+///
+void DwarfDebug::EmitDebugLines() {
+ // If the target is using .loc/.file, the assembler will be emitting the
+ // .debug_line table automatically.
+ if (TAI->hasDotLocAndDotFile())
+ return;
+
+ // Minimum line delta, thus ranging from -10..(255-10).
+ const int MinLineDelta = -(dwarf::DW_LNS_fixed_advance_pc + 1);
+ // Maximum line delta, thus ranging from -10..(255-10).
+ const int MaxLineDelta = 255 + MinLineDelta;
+
+ // Start the dwarf line section.
+ Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+
+ // Construct the section header.
+ EmitDifference("line_end", 0, "line_begin", 0, true);
+ Asm->EOL("Length of Source Line Info");
+ EmitLabel("line_begin", 0);
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF version number");
+
+ EmitDifference("line_prolog_end", 0, "line_prolog_begin", 0, true);
+ Asm->EOL("Prolog Length");
+ EmitLabel("line_prolog_begin", 0);
+
+ Asm->EmitInt8(1); Asm->EOL("Minimum Instruction Length");
+
+ Asm->EmitInt8(1); Asm->EOL("Default is_stmt_start flag");
+
+ Asm->EmitInt8(MinLineDelta); Asm->EOL("Line Base Value (Special Opcodes)");
+
+ Asm->EmitInt8(MaxLineDelta); Asm->EOL("Line Range Value (Special Opcodes)");
+
+ Asm->EmitInt8(-MinLineDelta); Asm->EOL("Special Opcode Base");
+
+ // Line number standard opcode encodings argument count
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_copy arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_pc arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_line arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_file arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_column arg count");
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_negate_stmt arg count");
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_set_basic_block arg count");
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_const_add_pc arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_fixed_advance_pc arg count");
+
+ // Emit directories.
+ for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) {
+ Asm->EmitString(getSourceDirectoryName(DI));
+ Asm->EOL("Directory");
+ }
+
+ Asm->EmitInt8(0); Asm->EOL("End of directories");
+
+ // Emit files.
+ for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) {
+ // Remember source id starts at 1.
+ std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(SI);
+ Asm->EmitString(getSourceFileName(Id.second));
+ Asm->EOL("Source");
+ Asm->EmitULEB128Bytes(Id.first);
+ Asm->EOL("Directory #");
+ Asm->EmitULEB128Bytes(0);
+ Asm->EOL("Mod date");
+ Asm->EmitULEB128Bytes(0);
+ Asm->EOL("File size");
+ }
+
+ Asm->EmitInt8(0); Asm->EOL("End of files");
+
+ EmitLabel("line_prolog_end", 0);
+
+ // A sequence for each text section.
+ unsigned SecSrcLinesSize = SectionSourceLines.size();
+
+ for (unsigned j = 0; j < SecSrcLinesSize; ++j) {
+ // Isolate current sections line info.
+ const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j];
+
+ if (Asm->isVerbose()) {
+ const Section* S = SectionMap[j + 1];
+ O << '\t' << TAI->getCommentString() << " Section"
+ << S->getName() << '\n';
+ } else {
+ Asm->EOL();
+ }
+
+ // Dwarf assumes we start with first line of first source file.
+ unsigned Source = 1;
+ unsigned Line = 1;
+
+ // Construct rows of the address, source, line, column matrix.
+ for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) {
+ const SrcLineInfo &LineInfo = LineInfos[i];
+ unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID());
+ if (!LabelID) continue;
+
+ if (!Asm->isVerbose())
+ Asm->EOL();
+ else {
+ std::pair<unsigned, unsigned> SourceID =
+ getSourceDirectoryAndFileIds(LineInfo.getSourceID());
+ O << '\t' << TAI->getCommentString() << ' '
+ << getSourceDirectoryName(SourceID.first) << ' '
+ << getSourceFileName(SourceID.second)
+ <<" :" << utostr_32(LineInfo.getLine()) << '\n';
+ }
+
+ // Define the line address.
+ Asm->EmitInt8(0); Asm->EOL("Extended Op");
+ Asm->EmitInt8(TD->getPointerSize() + 1); Asm->EOL("Op size");
+ Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->EOL("DW_LNE_set_address");
+ EmitReference("label", LabelID); Asm->EOL("Location label");
+
+ // If change of source, then switch to the new source.
+ if (Source != LineInfo.getSourceID()) {
+ Source = LineInfo.getSourceID();
+ Asm->EmitInt8(dwarf::DW_LNS_set_file); Asm->EOL("DW_LNS_set_file");
+ Asm->EmitULEB128Bytes(Source); Asm->EOL("New Source");
+ }
+
+ // If change of line.
+ if (Line != LineInfo.getLine()) {
+ // Determine offset.
+ int Offset = LineInfo.getLine() - Line;
+ int Delta = Offset - MinLineDelta;
+
+ // Update line.
+ Line = LineInfo.getLine();
+
+ // If delta is small enough and in range...
+ if (Delta >= 0 && Delta < (MaxLineDelta - 1)) {
+ // ... then use fast opcode.
+ Asm->EmitInt8(Delta - MinLineDelta); Asm->EOL("Line Delta");
+ } else {
+ // ... otherwise use long hand.
+ Asm->EmitInt8(dwarf::DW_LNS_advance_line);
+ Asm->EOL("DW_LNS_advance_line");
+ Asm->EmitSLEB128Bytes(Offset); Asm->EOL("Line Offset");
+ Asm->EmitInt8(dwarf::DW_LNS_copy); Asm->EOL("DW_LNS_copy");
+ }
+ } else {
+ // Copy the previous row (different address or source)
+ Asm->EmitInt8(dwarf::DW_LNS_copy); Asm->EOL("DW_LNS_copy");
+ }
+ }
+
+ EmitEndOfLineMatrix(j + 1);
+ }
+
+ if (SecSrcLinesSize == 0)
+ // Because we're emitting a debug_line section, we still need a line
+ // table. The linker and friends expect it to exist. If there's nothing to
+ // put into it, emit an empty table.
+ EmitEndOfLineMatrix(1);
+
+ EmitLabel("line_end", 0);
+ Asm->EOL();
+}
+
+/// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
+///
+void DwarfDebug::EmitCommonDebugFrame() {
+ if (!TAI->doesDwarfRequireFrameSection())
+ return;
+
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize();
+
+ // Start the dwarf frame section.
+ Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+
+ EmitLabel("debug_frame_common", 0);
+ EmitDifference("debug_frame_common_end", 0,
+ "debug_frame_common_begin", 0, true);
+ Asm->EOL("Length of Common Information Entry");
+
+ EmitLabel("debug_frame_common_begin", 0);
+ Asm->EmitInt32((int)dwarf::DW_CIE_ID);
+ Asm->EOL("CIE Identifier Tag");
+ Asm->EmitInt8(dwarf::DW_CIE_VERSION);
+ Asm->EOL("CIE Version");
+ Asm->EmitString("");
+ Asm->EOL("CIE Augmentation");
+ Asm->EmitULEB128Bytes(1);
+ Asm->EOL("CIE Code Alignment Factor");
+ Asm->EmitSLEB128Bytes(stackGrowth);
+ Asm->EOL("CIE Data Alignment Factor");
+ Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false));
+ Asm->EOL("CIE RA Column");
+
+ std::vector<MachineMove> Moves;
+ RI->getInitialFrameState(Moves);
+
+ EmitFrameMoves(NULL, 0, Moves, false);
+
+ Asm->EmitAlignment(2, 0, 0, false);
+ EmitLabel("debug_frame_common_end", 0);
+
+ Asm->EOL();
+}
+
+/// EmitFunctionDebugFrame - Emit per function frame info into a debug frame
+/// section.
+void
+DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
+ if (!TAI->doesDwarfRequireFrameSection())
+ return;
+
+ // Start the dwarf frame section.
+ Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+
+ EmitDifference("debug_frame_end", DebugFrameInfo.Number,
+ "debug_frame_begin", DebugFrameInfo.Number, true);
+ Asm->EOL("Length of Frame Information Entry");
+
+ EmitLabel("debug_frame_begin", DebugFrameInfo.Number);
+
+ EmitSectionOffset("debug_frame_common", "section_debug_frame",
+ 0, 0, true, false);
+ Asm->EOL("FDE CIE offset");
+
+ EmitReference("func_begin", DebugFrameInfo.Number);
+ Asm->EOL("FDE initial location");
+ EmitDifference("func_end", DebugFrameInfo.Number,
+ "func_begin", DebugFrameInfo.Number);
+ Asm->EOL("FDE address range");
+
+ EmitFrameMoves("func_begin", DebugFrameInfo.Number, DebugFrameInfo.Moves,
+ false);
+
+ Asm->EmitAlignment(2, 0, 0, false);
+ EmitLabel("debug_frame_end", DebugFrameInfo.Number);
+
+ Asm->EOL();
+}
+
+void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) {
+ EmitDifference("pubnames_end", Unit->getID(),
+ "pubnames_begin", Unit->getID(), true);
+ Asm->EOL("Length of Public Names Info");
+
+ EmitLabel("pubnames_begin", Unit->getID());
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF Version");
+
+ EmitSectionOffset("info_begin", "section_info",
+ Unit->getID(), 0, true, false);
+ Asm->EOL("Offset of Compilation Unit Info");
+
+ EmitDifference("info_end", Unit->getID(), "info_begin", Unit->getID(),
+ true);
+ Asm->EOL("Compilation Unit Length");
+
+ StringMap<DIE*> &Globals = Unit->getGlobals();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE * Entity = GI->second;
+
+ Asm->EmitInt32(Entity->getOffset()); Asm->EOL("DIE offset");
+ Asm->EmitString(Name, strlen(Name)); Asm->EOL("External Name");
+ }
+
+ Asm->EmitInt32(0); Asm->EOL("End Mark");
+ EmitLabel("pubnames_end", Unit->getID());
+
+ Asm->EOL();
+}
+
+/// EmitDebugPubNames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::EmitDebugPubNames() {
+ // Start the dwarf pubnames section.
+ Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+
+ if (MainCU) {
+ EmitDebugPubNamesPerCU(MainCU);
+ return;
+ }
+
+ for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i)
+ EmitDebugPubNamesPerCU(CompileUnits[i]);
+}
+
+/// EmitDebugStr - Emit visible names into a debug str section.
+///
+void DwarfDebug::EmitDebugStr() {
+ // Check to see if it is worth the effort.
+ if (!StringPool.empty()) {
+ // Start the dwarf str section.
+ Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+
+ // For each of strings in the string pool.
+ for (unsigned StringID = 1, N = StringPool.size();
+ StringID <= N; ++StringID) {
+ // Emit a label for reference from debug information entries.
+ EmitLabel("string", StringID);
+
+ // Emit the string itself.
+ const std::string &String = StringPool[StringID];
+ Asm->EmitString(String); Asm->EOL();
+ }
+
+ Asm->EOL();
+ }
+}
+
+/// EmitDebugLoc - Emit visible names into a debug loc section.
+///
+void DwarfDebug::EmitDebugLoc() {
+ // Start the dwarf loc section.
+ Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+ Asm->EOL();
+}
+
+/// EmitDebugARanges - Emit visible names into a debug aranges section.
+///
+void DwarfDebug::EmitDebugARanges() {
+ // Start the dwarf aranges section.
+ Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+
+ // FIXME - Mock up
+#if 0
+ CompileUnit *Unit = GetBaseCompileUnit();
+
+ // Don't include size of length
+ Asm->EmitInt32(0x1c); Asm->EOL("Length of Address Ranges Info");
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version");
+
+ EmitReference("info_begin", Unit->getID());
+ Asm->EOL("Offset of Compilation Unit Info");
+
+ Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Size of Address");
+
+ Asm->EmitInt8(0); Asm->EOL("Size of Segment Descriptor");
+
+ Asm->EmitInt16(0); Asm->EOL("Pad (1)");
+ Asm->EmitInt16(0); Asm->EOL("Pad (2)");
+
+ // Range 1
+ EmitReference("text_begin", 0); Asm->EOL("Address");
+ EmitDifference("text_end", 0, "text_begin", 0, true); Asm->EOL("Length");
+
+ Asm->EmitInt32(0); Asm->EOL("EOM (1)");
+ Asm->EmitInt32(0); Asm->EOL("EOM (2)");
+#endif
+
+ Asm->EOL();
+}
+
+/// EmitDebugRanges - Emit visible names into a debug ranges section.
+///
+void DwarfDebug::EmitDebugRanges() {
+ // Start the dwarf ranges section.
+ Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+ Asm->EOL();
+}
+
+/// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
+///
+void DwarfDebug::EmitDebugMacInfo() {
+ if (TAI->doesSupportMacInfoSection()) {
+ // Start the dwarf macinfo section.
+ Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+ Asm->EOL();
+ }
+}
+
+/// EmitDebugInlineInfo - Emit inline info using following format.
+/// Section Header:
+/// 1. length of section
+/// 2. Dwarf version number
+/// 3. address size.
+///
+/// Entries (one "entry" for each function that was inlined):
+///
+/// 1. offset into __debug_str section for MIPS linkage name, if exists;
+/// otherwise offset into __debug_str for regular function name.
+/// 2. offset into __debug_str section for regular function name.
+/// 3. an unsigned LEB128 number indicating the number of distinct inlining
+/// instances for the function.
+///
+/// The rest of the entry consists of a {die_offset, low_pc} pair for each
+/// inlined instance; the die_offset points to the inlined_subroutine die in the
+/// __debug_info section, and the low_pc is the starting address for the
+/// inlining instance.
+void DwarfDebug::EmitDebugInlineInfo() {
+ if (!TAI->doesDwarfUsesInlineInfoSection())
+ return;
+
+ if (!MainCU)
+ return;
+
+ Asm->SwitchToDataSection(TAI->getDwarfDebugInlineSection());
+ Asm->EOL();
+ EmitDifference("debug_inlined_end", 1,
+ "debug_inlined_begin", 1, true);
+ Asm->EOL("Length of Debug Inlined Information Entry");
+
+ EmitLabel("debug_inlined_begin", 1);
+
+ Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version");
+ Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
+
+ for (DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator
+ I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
+ GlobalVariable *GV = I->first;
+ SmallVector<unsigned, 4> &Labels = I->second;
+ DISubprogram SP(GV);
+ std::string Name;
+ std::string LName;
+
+ SP.getLinkageName(LName);
+ SP.getName(Name);
+
+ Asm->EmitString(LName.empty() ? Name : LName);
+ Asm->EOL("MIPS linkage name");
+
+ Asm->EmitString(Name); Asm->EOL("Function name");
+
+ Asm->EmitULEB128Bytes(Labels.size()); Asm->EOL("Inline count");
+
+ for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(),
+ LE = Labels.end(); LI != LE; ++LI) {
+ DIE *SP = MainCU->getDieMapSlotFor(GV);
+ Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset");
+
+ if (TD->getPointerSize() == sizeof(int32_t))
+ O << TAI->getData32bitsDirective();
+ else
+ O << TAI->getData64bitsDirective();
+
+ PrintLabelName("label", *LI); Asm->EOL("low_pc");
+ }
+ }
+
+ EmitLabel("debug_inlined_end", 1);
+ Asm->EOL();
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
new file mode 100644
index 0000000..9824566
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -0,0 +1,561 @@
+//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+
+#include "DIE.h"
+#include "DwarfPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include <string>
+
+namespace llvm {
+
+class CompileUnit;
+class DbgVariable;
+class DbgScope;
+class DbgConcreteScope;
+class MachineFrameInfo;
+class MachineModuleInfo;
+class TargetAsmInfo;
+class Timer;
+
+//===----------------------------------------------------------------------===//
+/// SrcLineInfo - This class is used to record source line correspondence.
+///
+class VISIBILITY_HIDDEN SrcLineInfo {
+ unsigned Line; // Source line number.
+ unsigned Column; // Source column.
+ unsigned SourceID; // Source ID number.
+ unsigned LabelID; // Label in code ID number.
+public:
+ SrcLineInfo(unsigned L, unsigned C, unsigned S, unsigned I)
+ : Line(L), Column(C), SourceID(S), LabelID(I) {}
+
+ // Accessors
+ unsigned getLine() const { return Line; }
+ unsigned getColumn() const { return Column; }
+ unsigned getSourceID() const { return SourceID; }
+ unsigned getLabelID() const { return LabelID; }
+};
+
+class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
+ //===--------------------------------------------------------------------===//
+ // Attributes used to construct specific Dwarf sections.
+ //
+
+ /// CompileUnitMap - A map of global variables representing compile units to
+ /// compile units.
+ DenseMap<Value *, CompileUnit *> CompileUnitMap;
+
+ /// CompileUnits - All the compile units in this module.
+ ///
+ SmallVector<CompileUnit *, 8> CompileUnits;
+
+ /// MainCU - Some platform prefers one compile unit per .o file. In such
+ /// cases, all dies are inserted in MainCU.
+ CompileUnit *MainCU;
+
+ /// AbbreviationsSet - Used to uniquely define abbreviations.
+ ///
+ FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+ /// Abbreviations - A list of all the unique abbreviations in use.
+ ///
+ std::vector<DIEAbbrev *> Abbreviations;
+
+ /// DirectoryIdMap - Directory name to directory id map.
+ ///
+ StringMap<unsigned> DirectoryIdMap;
+
+ /// DirectoryNames - A list of directory names.
+ SmallVector<std::string, 8> DirectoryNames;
+
+ /// SourceFileIdMap - Source file name to source file id map.
+ ///
+ StringMap<unsigned> SourceFileIdMap;
+
+ /// SourceFileNames - A list of source file names.
+ SmallVector<std::string, 8> SourceFileNames;
+
+ /// SourceIdMap - Source id map, i.e. pair of directory id and source file
+ /// id mapped to a unique id.
+ DenseMap<std::pair<unsigned, unsigned>, unsigned> SourceIdMap;
+
+ /// SourceIds - Reverse map from source id to directory id + file id pair.
+ ///
+ SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds;
+
+ /// Lines - List of of source line correspondence.
+ std::vector<SrcLineInfo> Lines;
+
+ /// ValuesSet - Used to uniquely define values.
+ ///
+ FoldingSet<DIEValue> ValuesSet;
+
+ /// Values - A list of all the unique values in use.
+ ///
+ std::vector<DIEValue *> Values;
+
+ /// StringPool - A UniqueVector of strings used by indirect references.
+ ///
+ UniqueVector<std::string> StringPool;
+
+ /// SectionMap - Provides a unique id per text section.
+ ///
+ UniqueVector<const Section*> SectionMap;
+
+ /// SectionSourceLines - Tracks line numbers per text section.
+ ///
+ std::vector<std::vector<SrcLineInfo> > SectionSourceLines;
+
+ /// didInitial - Flag to indicate if initial emission has been done.
+ ///
+ bool didInitial;
+
+ /// shouldEmit - Flag to indicate if debug information should be emitted.
+ ///
+ bool shouldEmit;
+
+ // FunctionDbgScope - Top level scope for the current function.
+ //
+ DbgScope *FunctionDbgScope;
+
+ /// DbgScopeMap - Tracks the scopes in the current function.
+ DenseMap<GlobalVariable *, DbgScope *> DbgScopeMap;
+
+ /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current
+ /// function.
+ DenseMap<GlobalVariable *, DbgScope *> DbgAbstractScopeMap;
+
+ /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current
+ /// function.
+ DenseMap<GlobalVariable *,
+ SmallVector<DbgScope *, 8> > DbgConcreteScopeMap;
+
+ /// InlineInfo - Keep track of inlined functions and their location. This
+ /// information is used to populate debug_inlined section.
+ DenseMap<GlobalVariable *, SmallVector<unsigned, 4> > InlineInfo;
+
+ /// InlinedVariableScopes - Scopes information for the inlined subroutine
+ /// variables.
+ DenseMap<const MachineInstr *, DbgScope *> InlinedVariableScopes;
+
+ /// AbstractInstanceRootMap - Map of abstract instance roots of inlined
+ /// functions. These are subroutine entries that contain a DW_AT_inline
+ /// attribute.
+ DenseMap<const GlobalVariable *, DbgScope *> AbstractInstanceRootMap;
+
+ /// InlinedParamMap - A map keeping track of which parameters are assigned to
+ /// which abstract instance.
+ DenseMap<const GlobalVariable *,
+ SmallSet<const GlobalVariable *, 32> > InlinedParamMap;
+
+ /// AbstractInstanceRootList - List of abstract instance roots of inlined
+ /// functions. These are subroutine entries that contain a DW_AT_inline
+ /// attribute.
+ SmallVector<DbgScope *, 32> AbstractInstanceRootList;
+
+ /// LexicalScopeStack - A stack of lexical scopes. The top one is the current
+ /// scope.
+ SmallVector<DbgScope *, 16> LexicalScopeStack;
+
+ /// CompileUnitOffsets - A vector of the offsets of the compile units. This is
+ /// used when calculating the "origin" of a concrete instance of an inlined
+ /// function.
+ DenseMap<CompileUnit *, unsigned> CompileUnitOffsets;
+
+ /// DebugTimer - Timer for the Dwarf debug writer.
+ Timer *DebugTimer;
+
+ struct FunctionDebugFrameInfo {
+ unsigned Number;
+ std::vector<MachineMove> Moves;
+
+ FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M)
+ : Number(Num), Moves(M) {}
+ };
+
+ std::vector<FunctionDebugFrameInfo> DebugFrames;
+
+ /// getSourceDirectoryAndFileIds - Return the directory and file ids that
+ /// maps to the source id. Source id starts at 1.
+ std::pair<unsigned, unsigned>
+ getSourceDirectoryAndFileIds(unsigned SId) const {
+ return SourceIds[SId-1];
+ }
+
+ /// getNumSourceDirectories - Return the number of source directories in the
+ /// debug info.
+ unsigned getNumSourceDirectories() const {
+ return DirectoryNames.size();
+ }
+
+ /// getSourceDirectoryName - Return the name of the directory corresponding
+ /// to the id.
+ const std::string &getSourceDirectoryName(unsigned Id) const {
+ return DirectoryNames[Id - 1];
+ }
+
+ /// getSourceFileName - Return the name of the source file corresponding
+ /// to the id.
+ const std::string &getSourceFileName(unsigned Id) const {
+ return SourceFileNames[Id - 1];
+ }
+
+ /// getNumSourceIds - Return the number of unique source ids.
+ unsigned getNumSourceIds() const {
+ return SourceIds.size();
+ }
+
+ /// AssignAbbrevNumber - Define a unique number for the abbreviation.
+ ///
+ void AssignAbbrevNumber(DIEAbbrev &Abbrev);
+
+ /// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+ /// information entry.
+ DIEEntry *CreateDIEEntry(DIE *Entry = NULL);
+
+ /// SetDIEEntry - Set a DIEEntry once the debug information entry is defined.
+ ///
+ void SetDIEEntry(DIEEntry *Value, DIE *Entry);
+
+ /// AddUInt - Add an unsigned integer attribute data and value.
+ ///
+ void AddUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+
+ /// AddSInt - Add an signed integer attribute data and value.
+ ///
+ void AddSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+
+ /// AddString - Add a string attribute data and value.
+ ///
+ void AddString(DIE *Die, unsigned Attribute, unsigned Form,
+ const std::string &String);
+
+ /// AddLabel - Add a Dwarf label attribute data and value.
+ ///
+ void AddLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label);
+
+ /// AddObjectLabel - Add an non-Dwarf label attribute data and value.
+ ///
+ void AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const std::string &Label);
+
+ /// AddSectionOffset - Add a section offset label attribute data and value.
+ ///
+ void AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label, const DWLabel &Section,
+ bool isEH = false, bool useSet = true);
+
+ /// AddDelta - Add a label delta attribute data and value.
+ ///
+ void AddDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Hi, const DWLabel &Lo);
+
+ /// AddDIEEntry - Add a DIE attribute data and value.
+ ///
+ void AddDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) {
+ Die->AddValue(Attribute, Form, CreateDIEEntry(Entry));
+ }
+
+ /// AddBlock - Add block data.
+ ///
+ void AddBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+
+ /// AddSourceLine - Add location information to specified debug information
+ /// entry.
+ void AddSourceLine(DIE *Die, const DIVariable *V);
+
+ /// AddSourceLine - Add location information to specified debug information
+ /// entry.
+ void AddSourceLine(DIE *Die, const DIGlobal *G);
+
+ void AddSourceLine(DIE *Die, const DIType *Ty);
+
+ /// AddAddress - Add an address attribute to a die based on the location
+ /// provided.
+ void AddAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location);
+
+ /// AddType - Add a new type attribute to the specified entity.
+ void AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty);
+
+ /// ConstructTypeDIE - Construct basic type die from DIBasicType.
+ void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DIBasicType BTy);
+
+ /// ConstructTypeDIE - Construct derived type die from DIDerivedType.
+ void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DIDerivedType DTy);
+
+ /// ConstructTypeDIE - Construct type DIE from DICompositeType.
+ void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DICompositeType CTy);
+
+ /// ConstructSubrangeDIE - Construct subrange DIE from DISubrange.
+ void ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+ /// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType.
+ void ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ DICompositeType *CTy);
+
+ /// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+ DIE *ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy);
+
+ /// CreateGlobalVariableDIE - Create new DIE using GV.
+ DIE *CreateGlobalVariableDIE(CompileUnit *DW_Unit,
+ const DIGlobalVariable &GV);
+
+ /// CreateMemberDIE - Create new member DIE.
+ DIE *CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT);
+
+ /// CreateSubprogramDIE - Create new DIE using SP.
+ DIE *CreateSubprogramDIE(CompileUnit *DW_Unit,
+ const DISubprogram &SP,
+ bool IsConstructor = false,
+ bool IsInlined = false);
+
+ /// FindCompileUnit - Get the compile unit for the given descriptor.
+ ///
+ CompileUnit &FindCompileUnit(DICompileUnit Unit) const;
+
+ /// CreateDbgScopeVariable - Create a new scope variable.
+ ///
+ DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
+
+ /// getOrCreateScope - Returns the scope associated with the given descriptor.
+ ///
+ DbgScope *getOrCreateScope(GlobalVariable *V);
+
+ /// ConstructDbgScope - Construct the components of a scope.
+ ///
+ void ConstructDbgScope(DbgScope *ParentScope,
+ unsigned ParentStartID, unsigned ParentEndID,
+ DIE *ParentDie, CompileUnit *Unit);
+
+ /// ConstructFunctionDbgScope - Construct the scope for the subprogram.
+ ///
+ void ConstructFunctionDbgScope(DbgScope *RootScope,
+ bool AbstractScope = false);
+
+ /// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
+ ///
+ void ConstructDefaultDbgScope(MachineFunction *MF);
+
+ /// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc
+ /// tools to recognize the object file contains Dwarf information.
+ void EmitInitial();
+
+ /// EmitDIE - Recusively Emits a debug information entry.
+ ///
+ void EmitDIE(DIE *Die);
+
+ /// SizeAndOffsetDie - Compute the size and offset of a DIE.
+ ///
+ unsigned SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last);
+
+ /// SizeAndOffsets - Compute the size and offset of all the DIEs.
+ ///
+ void SizeAndOffsets();
+
+ /// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section.
+ ///
+ void EmitDebugInfoPerCU(CompileUnit *Unit);
+
+ void EmitDebugInfo();
+
+ /// EmitAbbreviations - Emit the abbreviation section.
+ ///
+ void EmitAbbreviations() const;
+
+ /// EmitEndOfLineMatrix - Emit the last address of the section and the end of
+ /// the line matrix.
+ ///
+ void EmitEndOfLineMatrix(unsigned SectionEnd);
+
+ /// EmitDebugLines - Emit source line information.
+ ///
+ void EmitDebugLines();
+
+ /// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
+ ///
+ void EmitCommonDebugFrame();
+
+ /// EmitFunctionDebugFrame - Emit per function frame info into a debug frame
+ /// section.
+ void EmitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
+
+ void EmitDebugPubNamesPerCU(CompileUnit *Unit);
+
+ /// EmitDebugPubNames - Emit visible names into a debug pubnames section.
+ ///
+ void EmitDebugPubNames();
+
+ /// EmitDebugStr - Emit visible names into a debug str section.
+ ///
+ void EmitDebugStr();
+
+ /// EmitDebugLoc - Emit visible names into a debug loc section.
+ ///
+ void EmitDebugLoc();
+
+ /// EmitDebugARanges - Emit visible names into a debug aranges section.
+ ///
+ void EmitDebugARanges();
+
+ /// EmitDebugRanges - Emit visible names into a debug ranges section.
+ ///
+ void EmitDebugRanges();
+
+ /// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
+ ///
+ void EmitDebugMacInfo();
+
+ /// EmitDebugInlineInfo - Emit inline info using following format.
+ /// Section Header:
+ /// 1. length of section
+ /// 2. Dwarf version number
+ /// 3. address size.
+ ///
+ /// Entries (one "entry" for each function that was inlined):
+ ///
+ /// 1. offset into __debug_str section for MIPS linkage name, if exists;
+ /// otherwise offset into __debug_str for regular function name.
+ /// 2. offset into __debug_str section for regular function name.
+ /// 3. an unsigned LEB128 number indicating the number of distinct inlining
+ /// instances for the function.
+ ///
+ /// The rest of the entry consists of a {die_offset, low_pc} pair for each
+ /// inlined instance; the die_offset points to the inlined_subroutine die in
+ /// the __debug_info section, and the low_pc is the starting address for the
+ /// inlining instance.
+ void EmitDebugInlineInfo();
+
+ /// GetOrCreateSourceID - Look up the source id with the given directory and
+ /// source file names. If none currently exists, create a new id and insert it
+ /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps
+ /// as well.
+ unsigned GetOrCreateSourceID(const std::string &DirName,
+ const std::string &FileName);
+
+ void ConstructCompileUnit(GlobalVariable *GV);
+
+ /// ConstructCompileUnits - Create a compile unit DIEs.
+ void ConstructCompileUnits();
+
+ bool ConstructGlobalVariableDIE(GlobalVariable *GV);
+
+ /// ConstructGlobalVariableDIEs - Create DIEs for each of the externally
+ /// visible global variables. Return true if at least one global DIE is
+ /// created.
+ bool ConstructGlobalVariableDIEs();
+
+ bool ConstructSubprogram(GlobalVariable *GV);
+
+ /// ConstructSubprograms - Create DIEs for each of the externally visible
+ /// subprograms. Return true if at least one subprogram DIE is created.
+ bool ConstructSubprograms();
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T);
+ virtual ~DwarfDebug();
+
+ /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
+ /// be emitted.
+ bool ShouldEmitDwarfDebug() const { return shouldEmit; }
+
+ /// SetDebugInfo - Create global DIEs and emit initial debug info sections.
+ /// This is inovked by the target AsmPrinter.
+ void SetDebugInfo(MachineModuleInfo *mmi);
+
+ /// BeginModule - Emit all Dwarf sections that should come prior to the
+ /// content.
+ void BeginModule(Module *M) {
+ this->M = M;
+ }
+
+ /// EndModule - Emit all Dwarf sections that should come after the content.
+ ///
+ void EndModule();
+
+ /// BeginFunction - Gather pre-function debug information. Assumes being
+ /// emitted immediately after the function entry point.
+ void BeginFunction(MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function debug information.
+ ///
+ void EndFunction(MachineFunction *MF);
+
+ /// RecordSourceLine - Records location information and associates it with a
+ /// label. Returns a unique label ID used to generate a label and provide
+ /// correspondence to the source line list.
+ unsigned RecordSourceLine(Value *V, unsigned Line, unsigned Col);
+
+ /// RecordSourceLine - Records location information and associates it with a
+ /// label. Returns a unique label ID used to generate a label and provide
+ /// correspondence to the source line list.
+ unsigned RecordSourceLine(unsigned Line, unsigned Col, DICompileUnit CU);
+
+ /// getRecordSourceLineCount - Return the number of source lines in the debug
+ /// info.
+ unsigned getRecordSourceLineCount() const {
+ return Lines.size();
+ }
+
+ /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be
+ /// timed. Look up the source id with the given directory and source file
+ /// names. If none currently exists, create a new id and insert it in the
+ /// SourceIds map. This can update DirectoryNames and SourceFileNames maps as
+ /// well.
+ unsigned getOrCreateSourceID(const std::string &DirName,
+ const std::string &FileName);
+
+ /// RecordRegionStart - Indicate the start of a region.
+ unsigned RecordRegionStart(GlobalVariable *V);
+
+ /// RecordRegionEnd - Indicate the end of a region.
+ unsigned RecordRegionEnd(GlobalVariable *V);
+
+ /// RecordVariable - Indicate the declaration of a local variable.
+ void RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
+ const MachineInstr *MI);
+
+ //// RecordInlinedFnStart - Indicate the start of inlined subroutine.
+ unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
+ unsigned Line, unsigned Col);
+
+ /// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
+ unsigned RecordInlinedFnEnd(DISubprogram &SP);
+
+ /// RecordVariableScope - Record scope for the variable declared by
+ /// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes
+ /// for only inlined subroutine variables. Other variables's scopes are
+ /// determined during RecordVariable().
+ void RecordVariableScope(DIVariable &DV, const MachineInstr *DeclareMI);
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
new file mode 100644
index 0000000..37466ab
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -0,0 +1,706 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+static TimerGroup &getDwarfTimerGroup() {
+ static TimerGroup DwarfTimerGroup("Dwarf Exception");
+ return DwarfTimerGroup;
+}
+
+DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A,
+ const TargetAsmInfo *T)
+ : Dwarf(OS, A, T, "eh"), shouldEmitTable(false), shouldEmitMoves(false),
+ shouldEmitTableModule(false), shouldEmitMovesModule(false),
+ ExceptionTimer(0) {
+ if (TimePassesIsEnabled)
+ ExceptionTimer = new Timer("Dwarf Exception Writer",
+ getDwarfTimerGroup());
+}
+
+DwarfException::~DwarfException() {
+ delete ExceptionTimer;
+}
+
+void DwarfException::EmitCommonEHFrame(const Function *Personality,
+ unsigned Index) {
+ // Size and sign of stack growth.
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize();
+
+ // Begin eh frame section.
+ Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
+
+ if (!TAI->doesRequireNonLocalEHFrameLabel())
+ O << TAI->getEHGlobalPrefix();
+
+ O << "EH_frame" << Index << ":\n";
+ EmitLabel("section_eh_frame", Index);
+
+ // Define base labels.
+ EmitLabel("eh_frame_common", Index);
+
+ // Define the eh frame length.
+ EmitDifference("eh_frame_common_end", Index,
+ "eh_frame_common_begin", Index, true);
+ Asm->EOL("Length of Common Information Entry");
+
+ // EH frame header.
+ EmitLabel("eh_frame_common_begin", Index);
+ Asm->EmitInt32((int)0);
+ Asm->EOL("CIE Identifier Tag");
+ Asm->EmitInt8(dwarf::DW_CIE_VERSION);
+ Asm->EOL("CIE Version");
+
+ // The personality presence indicates that language specific information will
+ // show up in the eh frame.
+ Asm->EmitString(Personality ? "zPLR" : "zR");
+ Asm->EOL("CIE Augmentation");
+
+ // Round out reader.
+ Asm->EmitULEB128Bytes(1);
+ Asm->EOL("CIE Code Alignment Factor");
+ Asm->EmitSLEB128Bytes(stackGrowth);
+ Asm->EOL("CIE Data Alignment Factor");
+ Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
+ Asm->EOL("CIE Return Address Column");
+
+ // If there is a personality, we need to indicate the functions location.
+ if (Personality) {
+ Asm->EmitULEB128Bytes(7);
+ Asm->EOL("Augmentation Size");
+
+ if (TAI->getNeedsIndirectEncoding()) {
+ Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 |
+ dwarf::DW_EH_PE_indirect);
+ Asm->EOL("Personality (pcrel sdata4 indirect)");
+ } else {
+ Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ Asm->EOL("Personality (pcrel sdata4)");
+ }
+
+ PrintRelDirective(true);
+ O << TAI->getPersonalityPrefix();
+ Asm->EmitExternalGlobal((const GlobalVariable *)(Personality));
+ O << TAI->getPersonalitySuffix();
+ if (strcmp(TAI->getPersonalitySuffix(), "+4@GOTPCREL"))
+ O << "-" << TAI->getPCSymbol();
+ Asm->EOL("Personality");
+
+ Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ Asm->EOL("LSDA Encoding (pcrel sdata4)");
+
+ Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ Asm->EOL("FDE Encoding (pcrel sdata4)");
+ } else {
+ Asm->EmitULEB128Bytes(1);
+ Asm->EOL("Augmentation Size");
+
+ Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ Asm->EOL("FDE Encoding (pcrel sdata4)");
+ }
+
+ // Indicate locations of general callee saved registers in frame.
+ std::vector<MachineMove> Moves;
+ RI->getInitialFrameState(Moves);
+ EmitFrameMoves(NULL, 0, Moves, true);
+
+ // On Darwin the linker honors the alignment of eh_frame, which means it must
+ // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get
+ // holes which confuse readers of eh_frame.
+ Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3,
+ 0, 0, false);
+ EmitLabel("eh_frame_common_end", Index);
+
+ Asm->EOL();
+}
+
+/// EmitEHFrame - Emit function exception frame information.
+///
+void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
+ assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
+ "Should not emit 'available externally' functions at all");
+
+ Function::LinkageTypes linkage = EHFrameInfo.function->getLinkage();
+ Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
+
+ // Externally visible entry into the functions eh frame info. If the
+ // corresponding function is static, this should not be externally visible.
+ if (linkage != Function::InternalLinkage &&
+ linkage != Function::PrivateLinkage) {
+ if (const char *GlobalEHDirective = TAI->getGlobalEHDirective())
+ O << GlobalEHDirective << EHFrameInfo.FnName << "\n";
+ }
+
+ // If corresponding function is weak definition, this should be too.
+ if ((linkage == Function::WeakAnyLinkage ||
+ linkage == Function::WeakODRLinkage ||
+ linkage == Function::LinkOnceAnyLinkage ||
+ linkage == Function::LinkOnceODRLinkage) &&
+ TAI->getWeakDefDirective())
+ O << TAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n";
+
+ // If there are no calls then you can't unwind. This may mean we can omit the
+ // EH Frame, but some environments do not handle weak absolute symbols. If
+ // UnwindTablesMandatory is set we cannot do this optimization; the unwind
+ // info is to be available for non-EH uses.
+ if (!EHFrameInfo.hasCalls &&
+ !UnwindTablesMandatory &&
+ ((linkage != Function::WeakAnyLinkage &&
+ linkage != Function::WeakODRLinkage &&
+ linkage != Function::LinkOnceAnyLinkage &&
+ linkage != Function::LinkOnceODRLinkage) ||
+ !TAI->getWeakDefDirective() ||
+ TAI->getSupportsWeakOmittedEHFrame())) {
+ O << EHFrameInfo.FnName << " = 0\n";
+ // This name has no connection to the function, so it might get
+ // dead-stripped when the function is not, erroneously. Prohibit
+ // dead-stripping unconditionally.
+ if (const char *UsedDirective = TAI->getUsedDirective())
+ O << UsedDirective << EHFrameInfo.FnName << "\n\n";
+ } else {
+ O << EHFrameInfo.FnName << ":\n";
+
+ // EH frame header.
+ EmitDifference("eh_frame_end", EHFrameInfo.Number,
+ "eh_frame_begin", EHFrameInfo.Number, true);
+ Asm->EOL("Length of Frame Information Entry");
+
+ EmitLabel("eh_frame_begin", EHFrameInfo.Number);
+
+ if (TAI->doesRequireNonLocalEHFrameLabel()) {
+ PrintRelDirective(true, true);
+ PrintLabelName("eh_frame_begin", EHFrameInfo.Number);
+
+ if (!TAI->isAbsoluteEHSectionOffsets())
+ O << "-EH_frame" << EHFrameInfo.PersonalityIndex;
+ } else {
+ EmitSectionOffset("eh_frame_begin", "eh_frame_common",
+ EHFrameInfo.Number, EHFrameInfo.PersonalityIndex,
+ true, true, false);
+ }
+
+ Asm->EOL("FDE CIE offset");
+
+ EmitReference("eh_func_begin", EHFrameInfo.Number, true, true);
+ Asm->EOL("FDE initial location");
+ EmitDifference("eh_func_end", EHFrameInfo.Number,
+ "eh_func_begin", EHFrameInfo.Number, true);
+ Asm->EOL("FDE address range");
+
+ // If there is a personality and landing pads then point to the language
+ // specific data area in the exception table.
+ if (EHFrameInfo.PersonalityIndex) {
+ Asm->EmitULEB128Bytes(4);
+ Asm->EOL("Augmentation size");
+
+ if (EHFrameInfo.hasLandingPads)
+ EmitReference("exception", EHFrameInfo.Number, true, true);
+ else
+ Asm->EmitInt32((int)0);
+ Asm->EOL("Language Specific Data Area");
+ } else {
+ Asm->EmitULEB128Bytes(0);
+ Asm->EOL("Augmentation size");
+ }
+
+ // Indicate locations of function specific callee saved registers in frame.
+ EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves,
+ true);
+
+ // On Darwin the linker honors the alignment of eh_frame, which means it
+ // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you
+ // get holes which confuse readers of eh_frame.
+ Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3,
+ 0, 0, false);
+ EmitLabel("eh_frame_end", EHFrameInfo.Number);
+
+ // If the function is marked used, this table should be also. We cannot
+ // make the mark unconditional in this case, since retaining the table also
+ // retains the function in this case, and there is code around that depends
+ // on unused functions (calling undefined externals) being dead-stripped to
+ // link correctly. Yes, there really is.
+ if (MMI->getUsedFunctions().count(EHFrameInfo.function))
+ if (const char *UsedDirective = TAI->getUsedDirective())
+ O << UsedDirective << EHFrameInfo.FnName << "\n\n";
+ }
+}
+
+/// EmitExceptionTable - Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy. First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+/// 1. The landing pad site information describes the range of code covered by
+/// the try. In our case it's an accumulation of the ranges covered by the
+/// invokes in the try. There is also a reference to the landing pad that
+/// handles the exception once processed. Finally an index into the actions
+/// table.
+/// 2. The action table, in our case, is composed of pairs of type ids and next
+/// action offset. Starting with the action index from the landing pad
+/// site, each type Id is checked for a match to the current exception. If
+/// it matches then the exception and type id are passed on to the landing
+/// pad. Otherwise the next action is looked up. This chain is terminated
+/// with a next action of zero. If no type id is found the the frame is
+/// unwound and handling continues.
+/// 3. Type id table contains references to all the C++ typeinfo for all
+/// catches in the function. This tables is reversed indexed base 1.
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+}
+
+/// PadLT - Order landing pads lexicographically by type id.
+bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+ for (unsigned i = 0; i != MinSize; ++i)
+ if (LIds[i] != RIds[i])
+ return LIds[i] < RIds[i];
+
+ return LSize < RSize;
+}
+
+void DwarfException::EmitExceptionTable() {
+ const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+ if (PadInfos.empty()) return;
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Negative type ids index into FilterIds, positive type ids index into
+ // TypeInfos. The value written for a positive type id is just the type id
+ // itself. For a negative type id, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type id, because the FilterIds entries are
+ // written using a variable width encoding which outputs one byte per entry as
+ // long as the value written is not too large, but can differ. This kind of
+ // complication does not occur for positive type ids because type infos are
+ // output using a fixed width encoding. FilterOffsets[i] holds the byte
+ // offset corresponding to FilterIds[i].
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+ for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
+ E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= TargetAsmInfo::getULEB128Size(*I);
+ }
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LP = LandingPads[i];
+ const std::vector<int> &TypeIds = LP->TypeIds;
+ const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ ActionEntry *PrevAction = 0;
+
+ if (NumShared) {
+ const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = &Actions.back();
+ SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+ TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ SizeAction -=
+ TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ SizeAction += -PrevAction->NextAction;
+ PrevAction = PrevAction->Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
+ int TypeID = TypeIds[I];
+ assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+ Actions.push_back(Action);
+
+ PrevAction = &Actions.back();
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+ }
+
+ // Compute the call-site table. The entry for an invoke has a try-range
+ // containing the call, a non-zero landing pad and an appropriate action. The
+ // entry for an ordinary call has a try-range containing the call and zero for
+ // the landing pad and the action. Calls marked 'nounwind' have no entry and
+ // must not be contained in the try-range of any entry - they form gaps in the
+ // table. Entries must be ordered by try-range address.
+ SmallVector<CallSiteEntry, 64> CallSites;
+
+ RangeMapType PadMap;
+
+ // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+ // by try-range labels when lowered). Ordinary calls do not, so appropriate
+ // try-ranges for them need be deduced.
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ unsigned BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ // The end label of the previous invoke or nounwind try-range.
+ unsigned LastLabel = 0;
+
+ // Whether there is a potentially throwing instruction (currently this means
+ // an ordinary call) between the end of the previous try-range and now.
+ bool SawPotentiallyThrowing = false;
+
+ // Whether the last callsite entry was for an invoke.
+ bool PreviousIsInvoke = false;
+
+ // Visit all instructions in order of address.
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (!MI->isLabel()) {
+ SawPotentiallyThrowing |= MI->getDesc().isCall();
+ continue;
+ }
+
+ unsigned BeginLabel = MI->getOperand(0).getImm();
+ assert(BeginLabel && "Invalid label!");
+
+ // End of the previous try-range?
+ if (BeginLabel == LastLabel)
+ SawPotentiallyThrowing = false;
+
+ // Beginning of a new try-range?
+ RangeMapType::iterator L = PadMap.find(BeginLabel);
+ if (L == PadMap.end())
+ // Nope, it was just some random label.
+ continue;
+
+ PadRange P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // If some instruction between the previous try-range and this one may
+ // throw, create a call-site entry with no landing pad for the region
+ // between the try-ranges.
+ if (SawPotentiallyThrowing) {
+ CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
+ CallSites.push_back(Site);
+ PreviousIsInvoke = false;
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ assert(BeginLabel && LastLabel && "Invalid landing pad!");
+
+ if (LandingPad->LandingPadLabel) {
+ // This try-range is for an invoke.
+ CallSiteEntry Site = {BeginLabel, LastLabel,
+ LandingPad->LandingPadLabel,
+ FirstActions[P.PadIndex]};
+
+ // Try to merge with the previous call-site.
+ if (PreviousIsInvoke) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ CallSites.push_back(Site);
+ PreviousIsInvoke = true;
+ } else {
+ // Create a gap.
+ PreviousIsInvoke = false;
+ }
+ }
+ }
+
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (SawPotentiallyThrowing) {
+ CallSiteEntry Site = {LastLabel, 0, 0, 0};
+ CallSites.push_back(Site);
+ }
+
+ // Final tallies.
+
+ // Call sites.
+ const unsigned SiteStartSize = sizeof(int32_t); // DW_EH_PE_udata4
+ const unsigned SiteLengthSize = sizeof(int32_t); // DW_EH_PE_udata4
+ const unsigned LandingPadSize = sizeof(int32_t); // DW_EH_PE_udata4
+ unsigned SizeSites = CallSites.size() * (SiteStartSize +
+ SiteLengthSize +
+ LandingPadSize);
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
+ SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action);
+
+ // Type infos.
+ const unsigned TypeInfoSize = TD->getPointerSize(); // DW_EH_PE_absptr
+ unsigned SizeTypes = TypeInfos.size() * TypeInfoSize;
+
+ unsigned TypeOffset = sizeof(int8_t) + // Call site format
+ TargetAsmInfo::getULEB128Size(SizeSites) + // Call-site table length
+ SizeSites + SizeActions + SizeTypes;
+
+ unsigned TotalSize = sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset
+ TypeOffset;
+
+ unsigned SizeAlign = (4 - TotalSize) & 3;
+
+ // Begin the exception table.
+ Asm->SwitchToDataSection(TAI->getDwarfExceptionSection());
+ Asm->EmitAlignment(2, 0, 0, false);
+ O << "GCC_except_table" << SubprogramCount << ":\n";
+
+ for (unsigned i = 0; i != SizeAlign; ++i) {
+ Asm->EmitInt8(0);
+ Asm->EOL("Padding");
+ }
+
+ EmitLabel("exception", SubprogramCount);
+
+ // Emit the header.
+ Asm->EmitInt8(dwarf::DW_EH_PE_omit);
+ Asm->EOL("LPStart format (DW_EH_PE_omit)");
+ Asm->EmitInt8(dwarf::DW_EH_PE_absptr);
+ Asm->EOL("TType format (DW_EH_PE_absptr)");
+ Asm->EmitULEB128Bytes(TypeOffset);
+ Asm->EOL("TType base offset");
+ Asm->EmitInt8(dwarf::DW_EH_PE_udata4);
+ Asm->EOL("Call site format (DW_EH_PE_udata4)");
+ Asm->EmitULEB128Bytes(SizeSites);
+ Asm->EOL("Call-site table length");
+
+ // Emit the landing pad site information.
+ for (unsigned i = 0; i < CallSites.size(); ++i) {
+ CallSiteEntry &S = CallSites[i];
+ const char *BeginTag;
+ unsigned BeginNumber;
+
+ if (!S.BeginLabel) {
+ BeginTag = "eh_func_begin";
+ BeginNumber = SubprogramCount;
+ } else {
+ BeginTag = "label";
+ BeginNumber = S.BeginLabel;
+ }
+
+ EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount,
+ true, true);
+ Asm->EOL("Region start");
+
+ if (!S.EndLabel)
+ EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber,
+ true);
+ else
+ EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true);
+
+ Asm->EOL("Region length");
+
+ if (!S.PadLabel)
+ Asm->EmitInt32(0);
+ else
+ EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount,
+ true, true);
+
+ Asm->EOL("Landing pad");
+
+ Asm->EmitULEB128Bytes(S.Action);
+ Asm->EOL("Action");
+ }
+
+ // Emit the actions.
+ for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
+ ActionEntry &Action = Actions[I];
+
+ Asm->EmitSLEB128Bytes(Action.ValueForTypeID);
+ Asm->EOL("TypeInfo index");
+ Asm->EmitSLEB128Bytes(Action.NextAction);
+ Asm->EOL("Next action");
+ }
+
+ // Emit the type ids.
+ for (unsigned M = TypeInfos.size(); M; --M) {
+ GlobalVariable *GV = TypeInfos[M - 1];
+ PrintRelDirective();
+
+ if (GV) {
+ std::string GLN;
+ O << Asm->getGlobalLinkName(GV, GLN);
+ } else {
+ O << "0";
+ }
+
+ Asm->EOL("TypeInfo");
+ }
+
+ // Emit the filter typeids.
+ for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
+ unsigned TypeID = FilterIds[j];
+ Asm->EmitULEB128Bytes(TypeID);
+ Asm->EOL("Filter TypeInfo index");
+ }
+
+ Asm->EmitAlignment(2, 0, 0, false);
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfException::EndModule() {
+ if (TimePassesIsEnabled)
+ ExceptionTimer->startTimer();
+
+ if (shouldEmitMovesModule || shouldEmitTableModule) {
+ const std::vector<Function *> Personalities = MMI->getPersonalities();
+ for (unsigned i = 0; i < Personalities.size(); ++i)
+ EmitCommonEHFrame(Personalities[i], i);
+
+ for (std::vector<FunctionEHFrameInfo>::iterator I = EHFrames.begin(),
+ E = EHFrames.end(); I != E; ++I)
+ EmitEHFrame(*I);
+ }
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->stopTimer();
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes being
+/// emitted immediately after the function entry point.
+void DwarfException::BeginFunction(MachineFunction *MF) {
+ if (TimePassesIsEnabled)
+ ExceptionTimer->startTimer();
+
+ this->MF = MF;
+ shouldEmitTable = shouldEmitMoves = false;
+
+ if (MMI && TAI->doesSupportExceptionHandling()) {
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ // If any landing pads survive, we need an EH table.
+ if (MMI->getLandingPads().size())
+ shouldEmitTable = true;
+
+ // See if we need frame move info.
+ if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
+ shouldEmitMoves = true;
+
+ if (shouldEmitMoves || shouldEmitTable)
+ // Assumes in correct section after the entry point.
+ EmitLabel("eh_func_begin", ++SubprogramCount);
+ }
+
+ shouldEmitTableModule |= shouldEmitTable;
+ shouldEmitMovesModule |= shouldEmitMoves;
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->stopTimer();
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfException::EndFunction() {
+ if (TimePassesIsEnabled)
+ ExceptionTimer->startTimer();
+
+ if (shouldEmitMoves || shouldEmitTable) {
+ EmitLabel("eh_func_end", SubprogramCount);
+ EmitExceptionTable();
+
+ // Save EH frame information
+ std::string Name;
+ EHFrames.push_back(
+ FunctionEHFrameInfo(getAsm()->getCurrentFunctionEHName(MF, Name),
+ SubprogramCount,
+ MMI->getPersonalityIndex(),
+ MF->getFrameInfo()->hasCalls(),
+ !MMI->getLandingPads().empty(),
+ MMI->getFrameMoves(),
+ MF->getFunction()));
+ }
+
+ if (TimePassesIsEnabled)
+ ExceptionTimer->stopTimer();
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
new file mode 100644
index 0000000..4479af2
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -0,0 +1,178 @@
+//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__
+#define CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__
+
+#include "DIE.h"
+#include "DwarfPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/DenseMap.h"
+#include <string>
+
+namespace llvm {
+
+struct LandingPadInfo;
+class MachineModuleInfo;
+class TargetAsmInfo;
+class Timer;
+class raw_ostream;
+
+//===----------------------------------------------------------------------===//
+/// DwarfException - Emits Dwarf exception handling directives.
+///
+class VISIBILITY_HIDDEN DwarfException : public Dwarf {
+ struct FunctionEHFrameInfo {
+ std::string FnName;
+ unsigned Number;
+ unsigned PersonalityIndex;
+ bool hasCalls;
+ bool hasLandingPads;
+ std::vector<MachineMove> Moves;
+ const Function * function;
+
+ FunctionEHFrameInfo(const std::string &FN, unsigned Num, unsigned P,
+ bool hC, bool hL,
+ const std::vector<MachineMove> &M,
+ const Function *f):
+ FnName(FN), Number(Num), PersonalityIndex(P),
+ hasCalls(hC), hasLandingPads(hL), Moves(M), function (f) { }
+ };
+
+ std::vector<FunctionEHFrameInfo> EHFrames;
+
+ /// shouldEmitTable - Per-function flag to indicate if EH tables should
+ /// be emitted.
+ bool shouldEmitTable;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+ /// should be emitted.
+ bool shouldEmitTableModule;
+
+ /// shouldEmitFrameModule - Per-module flag to indicate if frame moves
+ /// should be emitted.
+ bool shouldEmitMovesModule;
+
+ /// ExceptionTimer - Timer for the Dwarf exception writer.
+ Timer *ExceptionTimer;
+
+ /// EmitCommonEHFrame - Emit the common eh unwind frame.
+ ///
+ void EmitCommonEHFrame(const Function *Personality, unsigned Index);
+
+ /// EmitEHFrame - Emit function exception frame information.
+ ///
+ void EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo);
+
+ /// EmitExceptionTable - Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
+
+ /// SharedTypeIds - How many leading type ids two landing pads have in common.
+ static unsigned SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R);
+
+ /// PadLT - Order landing pads lexicographically by type id.
+ static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R);
+
+ struct KeyInfo {
+ static inline unsigned getEmptyKey() { return -1U; }
+ static inline unsigned getTombstoneKey() { return -2U; }
+ static unsigned getHashValue(const unsigned &Key) { return Key; }
+ static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
+ static bool isPod() { return true; }
+ };
+
+ /// ActionEntry - Structure describing an entry in the actions table.
+ struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ struct ActionEntry *Previous;
+ };
+
+ /// PadRange - Structure holding a try-range and the associated landing pad.
+ struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+ };
+
+ typedef DenseMap<unsigned, PadRange, KeyInfo> RangeMapType;
+
+ /// CallSiteEntry - Structure describing an entry in the call-site table.
+ struct CallSiteEntry {
+ // The 'try-range' is BeginLabel .. EndLabel.
+ unsigned BeginLabel; // zero indicates the start of the function.
+ unsigned EndLabel; // zero indicates the end of the function.
+ // The landing pad starts at PadLabel.
+ unsigned PadLabel; // zero indicates that there is no landing pad.
+ unsigned Action;
+ };
+
+ void EmitExceptionTable();
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfException(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T);
+ virtual ~DwarfException();
+
+ /// SetModuleInfo - Set machine module information when it's known that pass
+ /// manager has created it. Set by the target AsmPrinter.
+ void SetModuleInfo(MachineModuleInfo *mmi) {
+ MMI = mmi;
+ }
+
+ /// BeginModule - Emit all exception information that should come prior to the
+ /// content.
+ void BeginModule(Module *M) {
+ this->M = M;
+ }
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ void BeginFunction(MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ void EndFunction();
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
new file mode 100644
index 0000000..8021b7c
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp
@@ -0,0 +1,35 @@
+//===--- lib/CodeGen/DwarfLabel.cpp - Dwarf Label -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// DWARF Labels
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfLabel.h"
+#include "llvm/ADT/FoldingSet.h"
+#include <ostream>
+
+using namespace llvm;
+
+/// Profile - Used to gather unique data for the folding set.
+///
+void DWLabel::Profile(FoldingSetNodeID &ID) const {
+ ID.AddString(Tag);
+ ID.AddInteger(Number);
+}
+
+#ifndef NDEBUG
+void DWLabel::print(std::ostream *O) const {
+ if (O) print(*O);
+}
+void DWLabel::print(std::ostream &O) const {
+ O << "." << Tag;
+ if (Number) O << Number;
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.h b/lib/CodeGen/AsmPrinter/DwarfLabel.h
new file mode 100644
index 0000000..b493903
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfLabel.h
@@ -0,0 +1,56 @@
+//===--- lib/CodeGen/DwarfLabel.h - Dwarf Label -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// DWARF Labels.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFLABEL_H__
+#define CODEGEN_ASMPRINTER_DWARFLABEL_H__
+
+#include "llvm/Support/Compiler.h"
+#include <iosfwd>
+#include <vector>
+
+namespace llvm {
+ class FoldingSetNodeID;
+
+ //===--------------------------------------------------------------------===//
+ /// DWLabel - Labels are used to track locations in the assembler file.
+ /// Labels appear in the form @verbatim <prefix><Tag><Number> @endverbatim,
+ /// where the tag is a category of label (Ex. location) and number is a value
+ /// unique in that category.
+ class VISIBILITY_HIDDEN DWLabel {
+ /// Tag - Label category tag. Should always be a statically declared C
+ /// string.
+ ///
+ const char *Tag;
+
+ /// Number - Value to make label unique.
+ ///
+ unsigned Number;
+ public:
+ DWLabel(const char *T, unsigned N) : Tag(T), Number(N) {}
+
+ // Accessors.
+ const char *getTag() const { return Tag; }
+ unsigned getNumber() const { return Number; }
+
+ /// Profile - Used to gather unique data for the folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) const;
+
+#ifndef NDEBUG
+ void print(std::ostream *O) const;
+ void print(std::ostream &O) const;
+#endif
+ };
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
new file mode 100644
index 0000000..45e7dd3
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -0,0 +1,235 @@
+//===--- lib/CodeGen/DwarfPrinter.cpp - Dwarf Printer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit general DWARF directives.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfPrinter.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <ostream>
+
+using namespace llvm;
+
+Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T,
+ const char *flavor)
+: O(OS), Asm(A), TAI(T), TD(Asm->TM.getTargetData()),
+ RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL),
+ SubprogramCount(0), Flavor(flavor), SetCounter(1) {}
+
+void Dwarf::PrintRelDirective(bool Force32Bit, bool isInSection) const {
+ if (isInSection && TAI->getDwarfSectionOffsetDirective())
+ O << TAI->getDwarfSectionOffsetDirective();
+ else if (Force32Bit || TD->getPointerSize() == sizeof(int32_t))
+ O << TAI->getData32bitsDirective();
+ else
+ O << TAI->getData64bitsDirective();
+}
+
+/// PrintLabelName - Print label name in form used by Dwarf writer.
+///
+void Dwarf::PrintLabelName(const char *Tag, unsigned Number) const {
+ O << TAI->getPrivateGlobalPrefix() << Tag;
+ if (Number) O << Number;
+}
+void Dwarf::PrintLabelName(const char *Tag, unsigned Number,
+ const char *Suffix) const {
+ O << TAI->getPrivateGlobalPrefix() << Tag;
+ if (Number) O << Number;
+ O << Suffix;
+}
+
+/// EmitLabel - Emit location label for internal use by Dwarf.
+///
+void Dwarf::EmitLabel(const char *Tag, unsigned Number) const {
+ PrintLabelName(Tag, Number);
+ O << ":\n";
+}
+
+/// EmitReference - Emit a reference to a label.
+///
+void Dwarf::EmitReference(const char *Tag, unsigned Number,
+ bool IsPCRelative, bool Force32Bit) const {
+ PrintRelDirective(Force32Bit);
+ PrintLabelName(Tag, Number);
+ if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+}
+void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative,
+ bool Force32Bit) const {
+ PrintRelDirective(Force32Bit);
+ O << Name;
+ if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+}
+
+/// EmitDifference - Emit the difference between two labels. Some assemblers do
+/// not behave with absolute expressions with data directives, so there is an
+/// option (needsSet) to use an intermediary set expression.
+void Dwarf::EmitDifference(const char *TagHi, unsigned NumberHi,
+ const char *TagLo, unsigned NumberLo,
+ bool IsSmall) {
+ if (TAI->needsSet()) {
+ O << "\t.set\t";
+ PrintLabelName("set", SetCounter, Flavor);
+ O << ",";
+ PrintLabelName(TagHi, NumberHi);
+ O << "-";
+ PrintLabelName(TagLo, NumberLo);
+ O << "\n";
+
+ PrintRelDirective(IsSmall);
+ PrintLabelName("set", SetCounter, Flavor);
+ ++SetCounter;
+ } else {
+ PrintRelDirective(IsSmall);
+ PrintLabelName(TagHi, NumberHi);
+ O << "-";
+ PrintLabelName(TagLo, NumberLo);
+ }
+}
+
+void Dwarf::EmitSectionOffset(const char* Label, const char* Section,
+ unsigned LabelNumber, unsigned SectionNumber,
+ bool IsSmall, bool isEH,
+ bool useSet) {
+ bool printAbsolute = false;
+ if (isEH)
+ printAbsolute = TAI->isAbsoluteEHSectionOffsets();
+ else
+ printAbsolute = TAI->isAbsoluteDebugSectionOffsets();
+
+ if (TAI->needsSet() && useSet) {
+ O << "\t.set\t";
+ PrintLabelName("set", SetCounter, Flavor);
+ O << ",";
+ PrintLabelName(Label, LabelNumber);
+
+ if (!printAbsolute) {
+ O << "-";
+ PrintLabelName(Section, SectionNumber);
+ }
+
+ O << "\n";
+ PrintRelDirective(IsSmall);
+ PrintLabelName("set", SetCounter, Flavor);
+ ++SetCounter;
+ } else {
+ PrintRelDirective(IsSmall, true);
+ PrintLabelName(Label, LabelNumber);
+
+ if (!printAbsolute) {
+ O << "-";
+ PrintLabelName(Section, SectionNumber);
+ }
+ }
+}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
+ const std::vector<MachineMove> &Moves, bool isEH) {
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize();
+ bool IsLocal = BaseLabel && strcmp(BaseLabel, "label") == 0;
+
+ for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+ const MachineMove &Move = Moves[i];
+ unsigned LabelID = Move.getLabelID();
+
+ if (LabelID) {
+ LabelID = MMI->MappedLabel(LabelID);
+
+ // Throw out move if the label is invalid.
+ if (!LabelID) continue;
+ }
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // Advance row if new location.
+ if (BaseLabel && LabelID && (BaseLabelID != LabelID || !IsLocal)) {
+ Asm->EmitInt8(dwarf::DW_CFA_advance_loc4);
+ Asm->EOL("DW_CFA_advance_loc4");
+ EmitDifference("label", LabelID, BaseLabel, BaseLabelID, true);
+ Asm->EOL();
+
+ BaseLabelID = LabelID;
+ BaseLabel = "label";
+ IsLocal = true;
+ }
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (!Src.isReg()) {
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ Asm->EmitInt8(dwarf::DW_CFA_def_cfa_offset);
+ Asm->EOL("DW_CFA_def_cfa_offset");
+ } else {
+ Asm->EmitInt8(dwarf::DW_CFA_def_cfa);
+ Asm->EOL("DW_CFA_def_cfa");
+ Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), isEH));
+ Asm->EOL("Register");
+ }
+
+ int Offset = -Src.getOffset();
+
+ Asm->EmitULEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ } else {
+ assert(0 && "Machine move no supported yet.");
+ }
+ } else if (Src.isReg() &&
+ Src.getReg() == MachineLocation::VirtualFP) {
+ if (Dst.isReg()) {
+ Asm->EmitInt8(dwarf::DW_CFA_def_cfa_register);
+ Asm->EOL("DW_CFA_def_cfa_register");
+ Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH));
+ Asm->EOL("Register");
+ } else {
+ assert(0 && "Machine move no supported yet.");
+ }
+ } else {
+ unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
+ int Offset = Dst.getOffset() / stackGrowth;
+
+ if (Offset < 0) {
+ Asm->EmitInt8(dwarf::DW_CFA_offset_extended_sf);
+ Asm->EOL("DW_CFA_offset_extended_sf");
+ Asm->EmitULEB128Bytes(Reg);
+ Asm->EOL("Reg");
+ Asm->EmitSLEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ } else if (Reg < 64) {
+ Asm->EmitInt8(dwarf::DW_CFA_offset + Reg);
+ if (Asm->isVerbose())
+ Asm->EOL("DW_CFA_offset + Reg (" + utostr(Reg) + ")");
+ else
+ Asm->EOL();
+ Asm->EmitULEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ } else {
+ Asm->EmitInt8(dwarf::DW_CFA_offset_extended);
+ Asm->EOL("DW_CFA_offset_extended");
+ Asm->EmitULEB128Bytes(Reg);
+ Asm->EOL("Reg");
+ Asm->EmitULEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ }
+ }
+ }
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
new file mode 100644
index 0000000..6e75992
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -0,0 +1,153 @@
+//===--- lib/CodeGen/DwarfPrinter.h - Dwarf Printer -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit general DWARF directives.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFPRINTER_H__
+#define CODEGEN_ASMPRINTER_DWARFPRINTER_H__
+
+#include "DwarfLabel.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+namespace llvm {
+ class AsmPrinter;
+ class MachineFunction;
+ class MachineModuleInfo;
+ class Module;
+ class TargetAsmInfo;
+ class TargetData;
+ class TargetRegisterInfo;
+
+ class VISIBILITY_HIDDEN Dwarf {
+ protected:
+ //===-------------------------------------------------------------==---===//
+ // Core attributes used by the DWARF printer.
+ //
+
+ /// O - Stream to .s file.
+ ///
+ raw_ostream &O;
+
+ /// Asm - Target of Dwarf emission.
+ ///
+ AsmPrinter *Asm;
+
+ /// TAI - Target asm information.
+ ///
+ const TargetAsmInfo *TAI;
+
+ /// TD - Target data.
+ ///
+ const TargetData *TD;
+
+ /// RI - Register Information.
+ ///
+ const TargetRegisterInfo *RI;
+
+ /// M - Current module.
+ ///
+ Module *M;
+
+ /// MF - Current machine function.
+ ///
+ MachineFunction *MF;
+
+ /// MMI - Collected machine module information.
+ ///
+ MachineModuleInfo *MMI;
+
+ /// SubprogramCount - The running count of functions being compiled.
+ ///
+ unsigned SubprogramCount;
+
+ /// Flavor - A unique string indicating what dwarf producer this is, used to
+ /// unique labels.
+ ///
+ const char * const Flavor;
+
+ /// SetCounter - A unique number for each '.set' directive.
+ ///
+ unsigned SetCounter;
+
+ Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T,
+ const char *flavor);
+ public:
+ //===------------------------------------------------------------------===//
+ // Accessors.
+ //
+ const AsmPrinter *getAsm() const { return Asm; }
+ MachineModuleInfo *getMMI() const { return MMI; }
+ const TargetAsmInfo *getTargetAsmInfo() const { return TAI; }
+ const TargetData *getTargetData() const { return TD; }
+
+ void PrintRelDirective(bool Force32Bit = false,
+ bool isInSection = false) const;
+
+
+ /// PrintLabelName - Print label name in form used by Dwarf writer.
+ ///
+ void PrintLabelName(const DWLabel &Label) const {
+ PrintLabelName(Label.getTag(), Label.getNumber());
+ }
+ void PrintLabelName(const char *Tag, unsigned Number) const;
+ void PrintLabelName(const char *Tag, unsigned Number,
+ const char *Suffix) const;
+
+ /// EmitLabel - Emit location label for internal use by Dwarf.
+ ///
+ void EmitLabel(const DWLabel &Label) const {
+ EmitLabel(Label.getTag(), Label.getNumber());
+ }
+ void EmitLabel(const char *Tag, unsigned Number) const;
+
+ /// EmitReference - Emit a reference to a label.
+ ///
+ void EmitReference(const DWLabel &Label, bool IsPCRelative = false,
+ bool Force32Bit = false) const {
+ EmitReference(Label.getTag(), Label.getNumber(),
+ IsPCRelative, Force32Bit);
+ }
+ void EmitReference(const char *Tag, unsigned Number,
+ bool IsPCRelative = false,
+ bool Force32Bit = false) const;
+ void EmitReference(const std::string &Name, bool IsPCRelative = false,
+ bool Force32Bit = false) const;
+
+ /// EmitDifference - Emit the difference between two labels. Some
+ /// assemblers do not behave with absolute expressions with data directives,
+ /// so there is an option (needsSet) to use an intermediary set expression.
+ void EmitDifference(const DWLabel &LabelHi, const DWLabel &LabelLo,
+ bool IsSmall = false) {
+ EmitDifference(LabelHi.getTag(), LabelHi.getNumber(),
+ LabelLo.getTag(), LabelLo.getNumber(),
+ IsSmall);
+ }
+ void EmitDifference(const char *TagHi, unsigned NumberHi,
+ const char *TagLo, unsigned NumberLo,
+ bool IsSmall = false);
+
+ void EmitSectionOffset(const char* Label, const char* Section,
+ unsigned LabelNumber, unsigned SectionNumber,
+ bool IsSmall = false, bool isEH = false,
+ bool useSet = true);
+
+ /// EmitFrameMoves - Emit frame instructions to describe the layout of the
+ /// frame.
+ void EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
+ const std::vector<MachineMove> &Moves, bool isEH);
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
new file mode 100644
index 0000000..483ee559
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -0,0 +1,129 @@
+//===-- llvm/CodeGen/DwarfWriter.cpp - Dwarf Framework --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+using namespace llvm;
+
+static RegisterPass<DwarfWriter>
+X("dwarfwriter", "DWARF Information Writer");
+char DwarfWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+/// DwarfWriter Implementation
+///
+
+DwarfWriter::DwarfWriter()
+ : ImmutablePass(&ID), DD(0), DE(0) {}
+
+DwarfWriter::~DwarfWriter() {
+ delete DE;
+ delete DD;
+}
+
+/// BeginModule - Emit all Dwarf sections that should come prior to the
+/// content.
+void DwarfWriter::BeginModule(Module *M,
+ MachineModuleInfo *MMI,
+ raw_ostream &OS, AsmPrinter *A,
+ const TargetAsmInfo *T) {
+ DE = new DwarfException(OS, A, T);
+ DD = new DwarfDebug(OS, A, T);
+ DE->BeginModule(M);
+ DD->BeginModule(M);
+ DD->SetDebugInfo(MMI);
+ DE->SetModuleInfo(MMI);
+}
+
+/// EndModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfWriter::EndModule() {
+ DE->EndModule();
+ DD->EndModule();
+}
+
+/// BeginFunction - Gather pre-function debug information. Assumes being
+/// emitted immediately after the function entry point.
+void DwarfWriter::BeginFunction(MachineFunction *MF) {
+ DE->BeginFunction(MF);
+ DD->BeginFunction(MF);
+}
+
+/// EndFunction - Gather and emit post-function debug information.
+///
+void DwarfWriter::EndFunction(MachineFunction *MF) {
+ DD->EndFunction(MF);
+ DE->EndFunction();
+
+ if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI())
+ // Clear function debug information.
+ MMI->EndFunction();
+}
+
+/// RecordSourceLine - Records location information and associates it with a
+/// label. Returns a unique label ID used to generate a label and provide
+/// correspondence to the source line list.
+unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col,
+ DICompileUnit CU) {
+ return DD->RecordSourceLine(Line, Col, CU);
+}
+
+/// RecordRegionStart - Indicate the start of a region.
+unsigned DwarfWriter::RecordRegionStart(GlobalVariable *V) {
+ return DD->RecordRegionStart(V);
+}
+
+/// RecordRegionEnd - Indicate the end of a region.
+unsigned DwarfWriter::RecordRegionEnd(GlobalVariable *V) {
+ return DD->RecordRegionEnd(V);
+}
+
+/// getRecordSourceLineCount - Count source lines.
+unsigned DwarfWriter::getRecordSourceLineCount() {
+ return DD->getRecordSourceLineCount();
+}
+
+/// RecordVariable - Indicate the declaration of a local variable.
+///
+void DwarfWriter::RecordVariable(GlobalVariable *GV, unsigned FrameIndex,
+ const MachineInstr *MI) {
+ DD->RecordVariable(GV, FrameIndex, MI);
+}
+
+/// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
+/// be emitted.
+bool DwarfWriter::ShouldEmitDwarfDebug() const {
+ return DD && DD->ShouldEmitDwarfDebug();
+}
+
+//// RecordInlinedFnStart - Global variable GV is inlined at the location marked
+//// by LabelID label.
+unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU,
+ unsigned Line, unsigned Col) {
+ return DD->RecordInlinedFnStart(SP, CU, Line, Col);
+}
+
+/// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
+unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) {
+ return DD->RecordInlinedFnEnd(SP);
+}
+
+/// RecordVariableScope - Record scope for the variable declared by
+/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE.
+void DwarfWriter::RecordVariableScope(DIVariable &DV,
+ const MachineInstr *DeclareMI) {
+ DD->RecordVariableScope(DV, DeclareMI);
+}
diff --git a/lib/CodeGen/AsmPrinter/Makefile b/lib/CodeGen/AsmPrinter/Makefile
new file mode 100644
index 0000000..cb5b3f6
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMAsmPrinter
+PARALLEL_DIRS =
+BUILD_ARCHIVE = 1
+DONT_BUILD_RELINKED = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
new file mode 100644
index 0000000..8ba903a
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -0,0 +1,160 @@
+//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements printing the assembly code for an Ocaml frametable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+ class VISIBILITY_HIDDEN OcamlGCMetadataPrinter : public GCMetadataPrinter {
+ public:
+ void beginAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI);
+
+ void finishAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI);
+ };
+
+}
+
+static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
+Y("ocaml", "ocaml 3.10-compatible collector");
+
+void llvm::linkOcamlGCPrinter() { }
+
+static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI, const char *Id) {
+ const std::string &MId = M.getModuleIdentifier();
+
+ std::string Mangled;
+ Mangled += TAI.getGlobalPrefix();
+ Mangled += "caml";
+ size_t Letter = Mangled.size();
+ Mangled.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+ Mangled += "__";
+ Mangled += Id;
+
+ // Capitalize the first letter of the module name.
+ Mangled[Letter] = toupper(Mangled[Letter]);
+
+ if (const char *GlobalDirective = TAI.getGlobalDirective())
+ OS << GlobalDirective << Mangled << "\n";
+ OS << Mangled << ":\n";
+}
+
+void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
+ AP.SwitchToSection(TAI.getTextSection());
+ EmitCamlGlobal(getModule(), OS, AP, TAI, "code_begin");
+
+ AP.SwitchToSection(TAI.getDataSection());
+ EmitCamlGlobal(getModule(), OS, AP, TAI, "data_begin");
+}
+
+/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
+///
+/// extern "C" struct align(sizeof(intptr_t)) {
+/// uint16_t NumDescriptors;
+/// struct align(sizeof(intptr_t)) {
+/// void *ReturnAddress;
+/// uint16_t FrameSize;
+/// uint16_t NumLiveOffsets;
+/// uint16_t LiveOffsets[NumLiveOffsets];
+/// } Descriptors[NumDescriptors];
+/// } caml${module}__frametable;
+///
+/// Note that this precludes programs from stack frames larger than 64K
+/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
+/// either condition is detected in a function which uses the GC.
+///
+void OcamlGCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
+ const char *AddressDirective;
+ int AddressAlignLog;
+ if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) {
+ AddressDirective = TAI.getData32bitsDirective();
+ AddressAlignLog = 2;
+ } else {
+ AddressDirective = TAI.getData64bitsDirective();
+ AddressAlignLog = 3;
+ }
+
+ AP.SwitchToSection(TAI.getTextSection());
+ EmitCamlGlobal(getModule(), OS, AP, TAI, "code_end");
+
+ AP.SwitchToSection(TAI.getDataSection());
+ EmitCamlGlobal(getModule(), OS, AP, TAI, "data_end");
+
+ OS << AddressDirective << 0; // FIXME: Why does ocaml emit this??
+ AP.EOL();
+
+ AP.SwitchToSection(TAI.getDataSection());
+ EmitCamlGlobal(getModule(), OS, AP, TAI, "frametable");
+
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+
+ uint64_t FrameSize = FI.getFrameSize();
+ if (FrameSize >= 1<<16) {
+ cerr << "Function '" << FI.getFunction().getNameStart()
+ << "' is too large for the ocaml GC! "
+ << "Frame size " << FrameSize << " >= 65536.\n";
+ cerr << "(" << uintptr_t(&FI) << ")\n";
+ abort(); // Very rude!
+ }
+
+ OS << "\t" << TAI.getCommentString() << " live roots for "
+ << FI.getFunction().getNameStart() << "\n";
+
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ size_t LiveCount = FI.live_size(J);
+ if (LiveCount >= 1<<16) {
+ cerr << "Function '" << FI.getFunction().getNameStart()
+ << "' is too large for the ocaml GC! "
+ << "Live root count " << LiveCount << " >= 65536.\n";
+ abort(); // Very rude!
+ }
+
+ OS << AddressDirective
+ << TAI.getPrivateGlobalPrefix() << "label" << J->Num;
+ AP.EOL("call return address");
+
+ AP.EmitInt16(FrameSize);
+ AP.EOL("stack frame size");
+
+ AP.EmitInt16(LiveCount);
+ AP.EOL("live root count");
+
+ for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
+ KE = FI.live_end(J); K != KE; ++K) {
+ assert(K->StackOffset < 1<<16 &&
+ "GC root stack offset is outside of fixed stack frame and out "
+ "of range for ocaml GC!");
+
+ OS << "\t.word\t" << K->StackOffset;
+ AP.EOL("stack offset");
+ }
+
+ AP.EmitAlignment(AddressAlignLog);
+ }
+ }
+}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 0000000..2635303
--- /dev/null
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,1204 @@
+//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block. This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "branchfolding"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+// Throttle for huge numbers of predecessors (compile speed problems)
+static cl::opt<unsigned>
+TailMergeThreshold("tail-merge-threshold",
+ cl::desc("Max number of predecessors to consider tail merging"),
+ cl::init(150), cl::Hidden);
+
+namespace {
+ struct VISIBILITY_HIDDEN BranchFolder : public MachineFunctionPass {
+ static char ID;
+ explicit BranchFolder(bool defaultEnableTailMerge) :
+ MachineFunctionPass(&ID) {
+ switch (FlagEnableTailMerge) {
+ case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+ case cl::BOU_TRUE: EnableTailMerge = true; break;
+ case cl::BOU_FALSE: EnableTailMerge = false; break;
+ }
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+ const TargetInstrInfo *TII;
+ MachineModuleInfo *MMI;
+ bool MadeChange;
+ private:
+ // Tail Merging.
+ bool EnableTailMerge;
+ bool TailMergeBlocks(MachineFunction &MF);
+ bool TryMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest);
+ MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1);
+ unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength);
+ void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength);
+
+ typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt;
+ typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+ std::vector<MergePotentialsElt> MergePotentials;
+
+ typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt;
+ std::vector<SameTailElt> SameTails;
+
+ const TargetRegisterInfo *RegInfo;
+ RegScavenger *RS;
+ // Branch optzn.
+ bool OptimizeBranches(MachineFunction &MF);
+ void OptimizeBlock(MachineBasicBlock *MBB);
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+ bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+
+ bool CanFallThrough(MachineBasicBlock *CurBB);
+ bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable,
+ MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond);
+ };
+ char BranchFolder::ID = 0;
+}
+
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
+ return new BranchFolder(DefaultEnableTailMerge); }
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DOUT << "\nRemoving MBB: " << *MBB;
+
+ MachineFunction *MF = MBB->getParent();
+ // drop all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // If there are any labels in the basic block, unregister them from
+ // MachineModuleInfo.
+ if (MMI && !MBB->empty()) {
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->isLabel())
+ // The label ID # is always operand #0, an immediate.
+ MMI->InvalidateLabel(I->getOperand(0).getImm());
+ }
+ }
+
+ // Remove the block.
+ MF->erase(MBB);
+}
+
+/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
+/// followed by terminators, and if the implicitly defined registers are not
+/// used by the terminators, remove those implicit_def's. e.g.
+/// BB1:
+/// r0 = implicit_def
+/// r1 = implicit_def
+/// br
+/// This block can be optimized away later if the implicit instructions are
+/// removed.
+bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
+ SmallSet<unsigned, 4> ImpDefRegs;
+ MachineBasicBlock::iterator I = MBB->begin();
+ while (I != MBB->end()) {
+ if (I->getOpcode() != TargetInstrInfo::IMPLICIT_DEF)
+ break;
+ unsigned Reg = I->getOperand(0).getReg();
+ ImpDefRegs.insert(Reg);
+ for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ ImpDefRegs.insert(SubReg);
+ ++I;
+ }
+ if (ImpDefRegs.empty())
+ return false;
+
+ MachineBasicBlock::iterator FirstTerm = I;
+ while (I != MBB->end()) {
+ if (!TII->isUnpredicatedTerminator(I))
+ return false;
+ // See if it uses any of the implicitly defined registers.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (ImpDefRegs.count(Reg))
+ return false;
+ }
+ ++I;
+ }
+
+ I = MBB->begin();
+ while (I != FirstTerm) {
+ MachineInstr *ImpDefMI = &*I;
+ ++I;
+ MBB->erase(ImpDefMI);
+ }
+
+ return true;
+}
+
+bool BranchFolder::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ if (!TII) return false;
+
+ RegInfo = MF.getTarget().getRegisterInfo();
+
+ // Fix CFG. The later algorithms expect it to be right.
+ bool EverMadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
+ MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
+ EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+ EverMadeChange |= OptimizeImpDefsBlock(MBB);
+ }
+
+ RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+
+ bool MadeChangeThisIteration = true;
+ while (MadeChangeThisIteration) {
+ MadeChangeThisIteration = false;
+ MadeChangeThisIteration |= TailMergeBlocks(MF);
+ MadeChangeThisIteration |= OptimizeBranches(MF);
+ EverMadeChange |= MadeChangeThisIteration;
+ }
+
+ // See if any jump tables have become mergable or dead as the code generator
+ // did its thing.
+ MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables();
+ if (!JTs.empty()) {
+ // Figure out how these jump tables should be merged.
+ std::vector<unsigned> JTMapping;
+ JTMapping.reserve(JTs.size());
+
+ // We always keep the 0th jump table.
+ JTMapping.push_back(0);
+
+ // Scan the jump tables, seeing if there are any duplicates. Note that this
+ // is N^2, which should be fixed someday.
+ for (unsigned i = 1, e = JTs.size(); i != e; ++i)
+ JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
+
+ // If a jump table was merge with another one, walk the function rewriting
+ // references to jump tables to reference the new JT ID's. Keep track of
+ // whether we see a jump table idx, if not, we can delete the JT.
+ BitVector JTIsLive(JTs.size());
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+ BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I)
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ MachineOperand &Op = I->getOperand(op);
+ if (!Op.isJTI()) continue;
+ unsigned NewIdx = JTMapping[Op.getIndex()];
+ Op.setIndex(NewIdx);
+
+ // Remember that this JT is live.
+ JTIsLive.set(NewIdx);
+ }
+ }
+
+ // Finally, remove dead jump tables. This happens either because the
+ // indirect jump was unreachable (and thus deleted) or because the jump
+ // table was merged with some other one.
+ for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+ if (!JTIsLive.test(i)) {
+ JTI->RemoveJumpTable(i);
+ EverMadeChange = true;
+ }
+ }
+
+ delete RS;
+ return EverMadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr *MI) {
+ unsigned Hash = MI->getOpcode();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+
+ // Merge in bits from the operand if easy.
+ unsigned OperandHash = 0;
+ switch (Op.getType()) {
+ case MachineOperand::MO_Register: OperandHash = Op.getReg(); break;
+ case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OperandHash = Op.getMBB()->getNumber();
+ break;
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ OperandHash = Op.getIndex();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ // Global address / external symbol are too hard, don't bother, but do
+ // pull in the offset.
+ OperandHash = Op.getOffset();
+ break;
+ default: break;
+ }
+
+ Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
+ }
+ return Hash;
+}
+
+/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks
+/// with no successors, we hash two instructions, because cross-jumping
+/// only saves code when at least two instructions are removed (since a
+/// branch must be inserted). For blocks with a successor, one of the
+/// two blocks to be tail-merged will end with a branch already, so
+/// it gains to cross-jump even for one instruction.
+
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
+ unsigned minCommonTailLength) {
+ MachineBasicBlock::const_iterator I = MBB->end();
+ if (I == MBB->begin())
+ return 0; // Empty MBB.
+
+ --I;
+ unsigned Hash = HashMachineInstr(I);
+
+ if (I == MBB->begin() || minCommonTailLength == 1)
+ return Hash; // Single instr MBB.
+
+ --I;
+ // Hash in the second-to-last instruction.
+ Hash ^= HashMachineInstr(I) << 2;
+ return Hash;
+}
+
+/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
+/// of instructions they actually have in common together at their end. Return
+/// iterators for the first shared instruction in each block.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2) {
+ I1 = MBB1->end();
+ I2 = MBB2->end();
+
+ unsigned TailLen = 0;
+ while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
+ --I1; --I2;
+ if (!I1->isIdenticalTo(I2) ||
+ // FIXME: This check is dubious. It's used to get around a problem where
+ // people incorrectly expect inline asm directives to remain in the same
+ // relative order. This is untenable because normal compiler
+ // optimizations (like this one) may reorder and/or merge these
+ // directives.
+ I1->getOpcode() == TargetInstrInfo::INLINEASM) {
+ ++I1; ++I2;
+ break;
+ }
+ ++TailLen;
+ }
+ return TailLen;
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest. This
+/// returns true if OldInst's block is modified, false if NewDest is modified.
+void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest) {
+ MachineBasicBlock *OldBB = OldInst->getParent();
+
+ // Remove all the old successors of OldBB from the CFG.
+ while (!OldBB->succ_empty())
+ OldBB->removeSuccessor(OldBB->succ_begin());
+
+ // Remove all the dead instructions from the end of OldBB.
+ OldBB->erase(OldInst, OldBB->end());
+
+ // If OldBB isn't immediately before OldBB, insert a branch to it.
+ if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest))
+ TII->InsertBranch(*OldBB, NewDest, 0, SmallVector<MachineOperand, 0>());
+ OldBB->addSuccessor(NewDest);
+ ++NumTailMerge;
+}
+
+/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
+/// MBB so that the part before the iterator falls into the part starting at the
+/// iterator. This returns the new MBB.
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1) {
+ MachineFunction &MF = *CurMBB.getParent();
+
+ // Create the fall-through block.
+ MachineFunction::iterator MBBI = &CurMBB;
+ MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock());
+ CurMBB.getParent()->insert(++MBBI, NewMBB);
+
+ // Move all the successors of this block to the specified block.
+ NewMBB->transferSuccessors(&CurMBB);
+
+ // Add an edge from CurMBB to NewMBB for the fall-through.
+ CurMBB.addSuccessor(NewMBB);
+
+ // Splice the code over.
+ NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+ // For targets that use the register scavenger, we must maintain LiveIns.
+ if (RS) {
+ RS->enterBasicBlock(&CurMBB);
+ if (!CurMBB.empty())
+ RS->forward(prior(CurMBB.end()));
+ BitVector RegsLiveAtExit(RegInfo->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (unsigned int i=0, e=RegInfo->getNumRegs(); i!=e; i++)
+ if (RegsLiveAtExit[i])
+ NewMBB->addLiveIn(i);
+ }
+
+ return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ unsigned Time = 0;
+ for (; I != E; ++I) {
+ const TargetInstrDesc &TID = I->getDesc();
+ if (TID.isCall())
+ Time += 10;
+ else if (TID.mayLoad() || TID.mayStore())
+ Time += 2;
+ else
+ ++Time;
+ }
+ return Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging). In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+
+static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = CurMBB->getParent();
+ MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB));
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (I != MF->end() &&
+ !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+ MachineBasicBlock *NextBB = I;
+ if (TBB == NextBB && !Cond.empty() && !FBB) {
+ if (!TII->ReverseBranchCondition(Cond)) {
+ TII->RemoveBranch(*CurMBB);
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond);
+ return;
+ }
+ }
+ }
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>());
+}
+
+static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p,
+ const std::pair<unsigned,MachineBasicBlock*> &q) {
+ if (p.first < q.first)
+ return true;
+ else if (p.first > q.first)
+ return false;
+ else if (p.second->getNumber() < q.second->getNumber())
+ return true;
+ else if (p.second->getNumber() > q.second->getNumber())
+ return false;
+ else {
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+ assert(0 && "Predecessor appears twice");
+#endif
+ return false;
+ }
+}
+
+/// ComputeSameTails - Look through all the blocks in MergePotentials that have
+/// hash CurHash (guaranteed to match the last element). Build the vector
+/// SameTails of all those that have the (same) largest number of instructions
+/// in common of any pair of these blocks. SameTails entries contain an
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
+/// instruction where the matching code sequence begins.
+/// Order of elements in SameTails is the reverse of the order in which
+/// those blocks appear in MergePotentials (where they are not necessarily
+/// consecutive).
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+ unsigned minCommonTailLength) {
+ unsigned maxCommonTailLength = 0U;
+ SameTails.clear();
+ MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+ MPIterator HighestMPIter = prior(MergePotentials.end());
+ for (MPIterator CurMPIter = prior(MergePotentials.end()),
+ B = MergePotentials.begin();
+ CurMPIter!=B && CurMPIter->first==CurHash;
+ --CurMPIter) {
+ for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) {
+ unsigned CommonTailLen = ComputeCommonTailLength(
+ CurMPIter->second,
+ I->second,
+ TrialBBI1, TrialBBI2);
+ // If we will have to split a block, there should be at least
+ // minCommonTailLength instructions in common; if not, at worst
+ // we will be replacing a fallthrough into the common tail with a
+ // branch, which at worst breaks even with falling through into
+ // the duplicated common tail, so 1 instruction in common is enough.
+ // We will always pick a block we do not have to split as the common
+ // tail if there is one.
+ // (Empty blocks will get forwarded and need not be considered.)
+ if (CommonTailLen >= minCommonTailLength ||
+ (CommonTailLen > 0 &&
+ (TrialBBI1==CurMPIter->second->begin() ||
+ TrialBBI2==I->second->begin()))) {
+ if (CommonTailLen > maxCommonTailLength) {
+ SameTails.clear();
+ maxCommonTailLength = CommonTailLen;
+ HighestMPIter = CurMPIter;
+ SameTails.push_back(std::make_pair(CurMPIter, TrialBBI1));
+ }
+ if (HighestMPIter == CurMPIter &&
+ CommonTailLen == maxCommonTailLength)
+ SameTails.push_back(std::make_pair(I, TrialBBI2));
+ }
+ if (I==B)
+ break;
+ }
+ }
+ return maxCommonTailLength;
+}
+
+/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
+/// MergePotentials, restoring branches at ends of blocks as appropriate.
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+ MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB) {
+ MPIterator CurMPIter, B;
+ for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+ CurMPIter->first==CurHash;
+ --CurMPIter) {
+ // Put the unconditional branch back, if we need one.
+ MachineBasicBlock *CurMBB = CurMPIter->second;
+ if (SuccBB && CurMBB != PredBB)
+ FixTail(CurMBB, SuccBB, TII);
+ if (CurMPIter==B)
+ break;
+ }
+ if (CurMPIter->first!=CurHash)
+ CurMPIter++;
+ MergePotentials.erase(CurMPIter, MergePotentials.end());
+}
+
+/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
+/// only of the common tail. Create a block that does by splitting one.
+unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength) {
+ unsigned i, commonTailIndex;
+ unsigned TimeEstimate = ~0U;
+ for (i=0, commonTailIndex=0; i<SameTails.size(); i++) {
+ // Use PredBB if possible; that doesn't require a new branch.
+ if (SameTails[i].first->second==PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ // Otherwise, make a (fairly bogus) choice based on estimate of
+ // how long it will take the various blocks to execute.
+ unsigned t = EstimateRuntime(SameTails[i].first->second->begin(),
+ SameTails[i].second);
+ if (t<=TimeEstimate) {
+ TimeEstimate = t;
+ commonTailIndex = i;
+ }
+ }
+
+ MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second;
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
+
+ DOUT << "\nSplitting " << MBB->getNumber() << ", size " <<
+ maxCommonTailLength;
+
+ MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
+ SameTails[commonTailIndex].first->second = newMBB;
+ SameTails[commonTailIndex].second = newMBB->begin();
+ // If we split PredBB, newMBB is the new predecessor.
+ if (PredBB==MBB)
+ PredBB = newMBB;
+
+ return commonTailIndex;
+}
+
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged. If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock* PredBB) {
+ // It doesn't make sense to save a single instruction since tail merging
+ // will add a jump.
+ // FIXME: Ask the target to provide the threshold?
+ unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1;
+ MadeChange = false;
+
+ DOUT << "\nTryMergeBlocks " << MergePotentials.size() << '\n';
+
+ // Sort by hash value so that blocks with identical end sequences sort
+ // together.
+ std::stable_sort(MergePotentials.begin(), MergePotentials.end(),MergeCompare);
+
+ // Walk through equivalence sets looking for actual exact matches.
+ while (MergePotentials.size() > 1) {
+ unsigned CurHash = prior(MergePotentials.end())->first;
+
+ // Build SameTails, identifying the set of blocks with this hash code
+ // and with the maximum number of instructions in common.
+ unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+ minCommonTailLength);
+
+ // If we didn't find any pair that has at least minCommonTailLength
+ // instructions in common, remove all blocks with this hash code and retry.
+ if (SameTails.empty()) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+
+ // If one of the blocks is the entire common tail (and not the entry
+ // block, which we can't jump to), we can treat all blocks with this same
+ // tail at once. Use PredBB if that is one of the possibilities, as that
+ // will not introduce any extra branches.
+ MachineBasicBlock *EntryBB = MergePotentials.begin()->second->
+ getParent()->begin();
+ unsigned int commonTailIndex, i;
+ for (commonTailIndex=SameTails.size(), i=0; i<SameTails.size(); i++) {
+ MachineBasicBlock *MBB = SameTails[i].first->second;
+ if (MBB->begin() == SameTails[i].second && MBB != EntryBB) {
+ commonTailIndex = i;
+ if (MBB==PredBB)
+ break;
+ }
+ }
+
+ if (commonTailIndex==SameTails.size()) {
+ // None of the blocks consist entirely of the common tail.
+ // Split a block so that one does.
+ commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
+ }
+
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
+ // MBB is common tail. Adjust all other BB's to jump to this one.
+ // Traversal must be forwards so erases work.
+ DOUT << "\nUsing common tail " << MBB->getNumber() << " for ";
+ for (unsigned int i=0; i<SameTails.size(); ++i) {
+ if (commonTailIndex==i)
+ continue;
+ DOUT << SameTails[i].first->second->getNumber() << ",";
+ // Hack the end off BB i, making it jump to BB commonTailIndex instead.
+ ReplaceTailWithBranchTo(SameTails[i].second, MBB);
+ // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
+ MergePotentials.erase(SameTails[i].first);
+ }
+ DOUT << "\n";
+ // We leave commonTailIndex in the worklist in case there are other blocks
+ // that match it with a smaller number of instructions.
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+
+ if (!EnableTailMerge) return false;
+
+ MadeChange = false;
+
+ // First find blocks with no successors.
+ MergePotentials.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ if (I->succ_empty())
+ MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I));
+ }
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() < TailMergeThreshold &&
+ MergePotentials.size() >= 2)
+ MadeChange |= TryMergeBlocks(NULL, NULL);
+
+ // Look at blocks (IBB) with multiple predecessors (PBB).
+ // We change each predecessor to a canonical form, by
+ // (1) temporarily removing any unconditional branch from the predecessor
+ // to IBB, and
+ // (2) alter conditional branches so they branch to the other block
+ // not IBB; this may require adding back an unconditional branch to IBB
+ // later, where there wasn't one coming in. E.g.
+ // Bcc IBB
+ // fallthrough to QBB
+ // here becomes
+ // Bncc QBB
+ // with a conceptual B to IBB after that, which never actually exists.
+ // With those changes, we see whether the predecessors' tails match,
+ // and merge them if so. We change things out of canonical form and
+ // back to the way they were later in the process. (OptimizeBranches
+ // would undo some of this, but we can't use it, because we'd get into
+ // a compile-time infinite loop repeatedly doing and undoing the same
+ // transformations.)
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
+ MachineBasicBlock *IBB = I;
+ MachineBasicBlock *PredBB = prior(I);
+ MergePotentials.clear();
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+ E2 = I->pred_end();
+ P != E2; ++P) {
+ MachineBasicBlock* PBB = *P;
+ // Skip blocks that loop to themselves, can't tail merge these.
+ if (PBB==IBB)
+ continue;
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ // Failing case: IBB is the target of a cbr, and
+ // we cannot reverse the branch.
+ SmallVector<MachineOperand, 4> NewCond(Cond);
+ if (!Cond.empty() && TBB==IBB) {
+ if (TII->ReverseBranchCondition(NewCond))
+ continue;
+ // This is the QBB case described above
+ if (!FBB)
+ FBB = next(MachineFunction::iterator(PBB));
+ }
+ // Failing case: the only way IBB can be reached from PBB is via
+ // exception handling. Happens for landing pads. Would be nice
+ // to have a bit in the edge so we didn't have to do all this.
+ if (IBB->isLandingPad()) {
+ MachineFunction::iterator IP = PBB; IP++;
+ MachineBasicBlock* PredNextBB = NULL;
+ if (IP!=MF.end())
+ PredNextBB = IP;
+ if (TBB==NULL) {
+ if (IBB!=PredNextBB) // fallthrough
+ continue;
+ } else if (FBB) {
+ if (TBB!=IBB && FBB!=IBB) // cbr then ubr
+ continue;
+ } else if (Cond.empty()) {
+ if (TBB!=IBB) // ubr
+ continue;
+ } else {
+ if (TBB!=IBB && IBB!=PredNextBB) // cbr
+ continue;
+ }
+ }
+ // Remove the unconditional branch at the end, if any.
+ if (TBB && (Cond.empty() || FBB)) {
+ TII->RemoveBranch(*PBB);
+ if (!Cond.empty())
+ // reinsert conditional branch only, for now
+ TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond);
+ }
+ MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P));
+ }
+ }
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryMergeBlocks(I, PredBB);
+ // Reinsert an unconditional branch if needed.
+ // The 1 below can occur as a result of removing blocks in TryMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryMergeBlocks
+ if (MergePotentials.size()==1 &&
+ MergePotentials.begin()->second != PredBB)
+ FixTail(MergePotentials.begin()->second, I, TII);
+ }
+ }
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+ MadeChange = false;
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ OptimizeBlock(MBB);
+
+ // If it is dead, remove it.
+ if (MBB->pred_empty()) {
+ RemoveDeadBlock(MBB);
+ MadeChange = true;
+ ++NumDeadBlocks;
+ }
+ }
+ return MadeChange;
+}
+
+
+/// CanFallThrough - Return true if the specified block (with the specified
+/// branch condition) can implicitly transfer control to the block after it by
+/// falling off the end of it. This should return false if it can reach the
+/// block after it, but it uses an explicit branch to do so (e.g. a table jump).
+///
+/// True is a conservative answer.
+///
+bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
+ bool BranchUnAnalyzable,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) {
+ MachineFunction::iterator Fallthrough = CurBB;
+ ++Fallthrough;
+ // If FallthroughBlock is off the end of the function, it can't fall through.
+ if (Fallthrough == CurBB->getParent()->end())
+ return false;
+
+ // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible.
+ if (!CurBB->isSuccessor(Fallthrough))
+ return false;
+
+ // If we couldn't analyze the branch, assume it could fall through.
+ if (BranchUnAnalyzable) return true;
+
+ // If there is no branch, control always falls through.
+ if (TBB == 0) return true;
+
+ // If there is some explicit branch to the fallthrough block, it can obviously
+ // reach, even though the branch should get folded to fall through implicitly.
+ if (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough)
+ return true;
+
+ // If it's an unconditional branch to some block not the fall through, it
+ // doesn't fall through.
+ if (Cond.empty()) return false;
+
+ // Otherwise, if it is conditional and has no explicit false block, it falls
+ // through.
+ return FBB == 0;
+}
+
+/// CanFallThrough - Return true if the specified can implicitly transfer
+/// control to the block after it by falling off the end of it. This should
+/// return false if it can reach the block after it, but it uses an explicit
+/// branch to do so (e.g. a table jump).
+///
+/// True is a conservative answer.
+///
+bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond, true);
+ return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond);
+}
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2. This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2) {
+ // Right now, we use a simple heuristic. If MBB2 ends with a call, and
+ // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
+ // optimize branches that branch to either a return block or an assert block
+ // into a fallthrough to the return.
+ if (MBB1->empty() || MBB2->empty()) return false;
+
+ // If there is a clear successor ordering we make sure that one block
+ // will fall through to the next
+ if (MBB1->isSuccessor(MBB2)) return true;
+ if (MBB2->isSuccessor(MBB1)) return false;
+
+ MachineInstr *MBB1I = --MBB1->end();
+ MachineInstr *MBB2I = --MBB2->end();
+ return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+}
+
+/// OptimizeBlock - Analyze and optimize control flow related to the specified
+/// block. This is never called on the entry block.
+void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+ MachineFunction::iterator FallThrough = MBB;
+ ++FallThrough;
+
+ // If this block is empty, make everyone use its fall-through, not the block
+ // explicitly. Landing pads should not do this since the landing-pad table
+ // points to this block.
+ if (MBB->empty() && !MBB->isLandingPad()) {
+ // Dead block? Leave for cleanup later.
+ if (MBB->pred_empty()) return;
+
+ if (FallThrough == MBB->getParent()->end()) {
+ // TODO: Simplify preds to not branch here if possible!
+ } else {
+ // Rewrite all predecessors of the old block to go to the fallthrough
+ // instead.
+ while (!MBB->pred_empty()) {
+ MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+ Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+ }
+
+ // If MBB was the target of a jump table, update jump tables to go to the
+ // fallthrough instead.
+ MBB->getParent()->getJumpTableInfo()->
+ ReplaceMBBInJumpTables(MBB, FallThrough);
+ MadeChange = true;
+ }
+ return;
+ }
+
+ // Check to see if we can simplify the terminator of the block before this
+ // one.
+ MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB));
+
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ if (!PriorUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
+ !PriorCond.empty());
+
+ // If the previous branch is conditional and both conditions go to the same
+ // destination, remove the branch, replacing it with an unconditional one or
+ // a fall-through.
+ if (PriorTBB && PriorTBB == PriorFBB) {
+ TII->RemoveBranch(PrevBB);
+ PriorCond.clear();
+ if (PriorTBB != MBB)
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+
+ // If the previous branch *only* branches to *this* block (conditional or
+ // not) remove the branch.
+ if (PriorTBB == MBB && PriorFBB == 0) {
+ TII->RemoveBranch(PrevBB);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+
+ // If the prior block branches somewhere else on the condition and here if
+ // the condition is false, remove the uncond second branch.
+ if (PriorFBB == MBB) {
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+
+ // If the prior block branches here on true and somewhere else on false, and
+ // if the branch condition is reversible, reverse the branch to create a
+ // fall-through.
+ if (PriorTBB == MBB) {
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+ }
+
+ // If this block doesn't fall through (e.g. it ends with an uncond branch or
+ // has no successors) and if the pred falls through into this block, and if
+ // it would otherwise fall through into the block after this, move this
+ // block to the end of the function.
+ //
+ // We consider it more likely that execution will stay in the function (e.g.
+ // due to loops) than it is to exit it. This asserts in loops etc, moving
+ // the assert condition out of the loop body.
+ if (!PriorCond.empty() && PriorFBB == 0 &&
+ MachineFunction::iterator(PriorTBB) == FallThrough &&
+ !CanFallThrough(MBB)) {
+ bool DoTransform = true;
+
+ // We have to be careful that the succs of PredBB aren't both no-successor
+ // blocks. If neither have successors and if PredBB is the second from
+ // last block in the function, we'd just keep swapping the two blocks for
+ // last. Only do the swap if one is clearly better to fall through than
+ // the other.
+ if (FallThrough == --MBB->getParent()->end() &&
+ !IsBetterFallthrough(PriorTBB, MBB))
+ DoTransform = false;
+
+ // We don't want to do this transformation if we have control flow like:
+ // br cond BB2
+ // BB1:
+ // ..
+ // jmp BBX
+ // BB2:
+ // ..
+ // ret
+ //
+ // In this case, we could actually be moving the return block *into* a
+ // loop!
+ if (DoTransform && !MBB->succ_empty() &&
+ (!CanFallThrough(PriorTBB) || PriorTBB->empty()))
+ DoTransform = false;
+
+
+ if (DoTransform) {
+ // Reverse the branch so we will fall through on the previous true cond.
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DOUT << "\nMoving MBB: " << *MBB;
+ DOUT << "To make fallthrough to: " << *PriorTBB << "\n";
+
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
+
+ // Move this block to the end of the function.
+ MBB->moveAfter(--MBB->getParent()->end());
+ MadeChange = true;
+ ++NumBranchOpts;
+ return;
+ }
+ }
+ }
+ }
+
+ // Analyze the branch in the current block.
+ MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
+ SmallVector<MachineOperand, 4> CurCond;
+ bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+ if (!CurUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
+
+ // If this is a two-way branch, and the FBB branches to this block, reverse
+ // the condition so the single-basic-block loop is faster. Instead of:
+ // Loop: xxx; jcc Out; jmp Loop
+ // we want:
+ // Loop: xxx; jncc Loop; jmp Out
+ if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+ SmallVector<MachineOperand, 4> NewCond(CurCond);
+ if (!TII->ReverseBranchCondition(NewCond)) {
+ TII->RemoveBranch(*MBB);
+ TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+ }
+
+
+ // If this branch is the only thing in its block, see if we can forward
+ // other blocks across it.
+ if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+ MBB->begin()->getDesc().isBranch() && CurTBB != MBB) {
+ // This block may contain just an unconditional branch. Because there can
+ // be 'non-branch terminators' in the block, try removing the branch and
+ // then seeing if the block is empty.
+ TII->RemoveBranch(*MBB);
+
+ // If this block is just an unconditional branch to CurTBB, we can
+ // usually completely eliminate the block. The only case we cannot
+ // completely eliminate the block is when the block before this one
+ // falls through into MBB and we can't understand the prior block's branch
+ // condition.
+ if (MBB->empty()) {
+ bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB);
+ if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+ !PrevBB.isSuccessor(MBB)) {
+ // If the prior block falls through into us, turn it into an
+ // explicit branch to us to make updates simpler.
+ if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+ PriorTBB != MBB && PriorFBB != MBB) {
+ if (PriorTBB == 0) {
+ assert(PriorCond.empty() && PriorFBB == 0 &&
+ "Bad branch analysis");
+ PriorTBB = MBB;
+ } else {
+ assert(PriorFBB == 0 && "Machine CFG out of date!");
+ PriorFBB = MBB;
+ }
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond);
+ }
+
+ // Iterate through all the predecessors, revectoring each in-turn.
+ size_t PI = 0;
+ bool DidChange = false;
+ bool HasBranchToSelf = false;
+ while(PI != MBB->pred_size()) {
+ MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+ if (PMBB == MBB) {
+ // If this block has an uncond branch to itself, leave it.
+ ++PI;
+ HasBranchToSelf = true;
+ } else {
+ DidChange = true;
+ PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+ // If this change resulted in PMBB ending in a conditional
+ // branch where both conditions go to the same destination,
+ // change this to an unconditional branch (and fix the CFG).
+ MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0;
+ SmallVector<MachineOperand, 4> NewCurCond;
+ bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
+ NewCurFBB, NewCurCond, true);
+ if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+ TII->RemoveBranch(*PMBB);
+ NewCurCond.clear();
+ TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ PMBB->CorrectExtraCFGEdges(NewCurTBB, NewCurFBB, false);
+ }
+ }
+ }
+
+ // Change any jumptables to go to the new MBB.
+ MBB->getParent()->getJumpTableInfo()->
+ ReplaceMBBInJumpTables(MBB, CurTBB);
+ if (DidChange) {
+ ++NumBranchOpts;
+ MadeChange = true;
+ if (!HasBranchToSelf) return;
+ }
+ }
+ }
+
+ // Add the branch back if the block is more than just an uncond branch.
+ TII->InsertBranch(*MBB, CurTBB, 0, CurCond);
+ }
+ }
+
+ // If the prior block doesn't fall through into this block, and if this
+ // block doesn't fall through into some other block, see if we can find a
+ // place to move this block where a fall-through will happen.
+ if (!CanFallThrough(&PrevBB, PriorUnAnalyzable,
+ PriorTBB, PriorFBB, PriorCond)) {
+ // Now we know that there was no fall-through into this block, check to
+ // see if it has a fall-through into its successor.
+ bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB,
+ CurCond);
+
+ if (!MBB->isLandingPad()) {
+ // Check all the predecessors of this block. If one of them has no fall
+ // throughs, move this block right after it.
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI) {
+ // Analyze the branch at the end of the pred.
+ MachineBasicBlock *PredBB = *PI;
+ MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
+ if (PredBB != MBB && !CanFallThrough(PredBB)
+ && (!CurFallsThru || !CurTBB || !CurFBB)
+ && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+ // If the current block doesn't fall through, just move it.
+ // If the current block can fall through and does not end with a
+ // conditional branch, we need to append an unconditional jump to
+ // the (current) next block. To avoid a possible compile-time
+ // infinite loop, move blocks only backward in this case.
+ // Also, if there are already 2 branches here, we cannot add a third;
+ // this means we have the case
+ // Bcc next
+ // B elsewhere
+ // next:
+ if (CurFallsThru) {
+ MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+ CurCond.clear();
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond);
+ }
+ MBB->moveAfter(PredBB);
+ MadeChange = true;
+ return OptimizeBlock(MBB);
+ }
+ }
+ }
+
+ if (!CurFallsThru) {
+ // Check all successors to see if we can move this block before it.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++SI) {
+ // Analyze the branch at the end of the block before the succ.
+ MachineBasicBlock *SuccBB = *SI;
+ MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+ std::vector<MachineOperand> SuccPrevCond;
+
+ // If this block doesn't already fall-through to that successor, and if
+ // the succ doesn't already have a block that can fall through into it,
+ // and if the successor isn't an EH destination, we can arrange for the
+ // fallthrough to happen.
+ if (SuccBB != MBB && !CanFallThrough(SuccPrev) &&
+ !SuccBB->isLandingPad()) {
+ MBB->moveBefore(SuccBB);
+ MadeChange = true;
+ return OptimizeBlock(MBB);
+ }
+ }
+
+ // Okay, there is no really great place to put this block. If, however,
+ // the block before this one would be a fall-through if this block were
+ // removed, move this block to the end of the function.
+ if (FallThrough != MBB->getParent()->end() &&
+ PrevBB.isSuccessor(FallThrough)) {
+ MBB->moveAfter(--MBB->getParent()->end());
+ MadeChange = true;
+ return;
+ }
+ }
+ }
+}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
new file mode 100644
index 0000000..ca4b31c
--- /dev/null
+++ b/lib/CodeGen/CMakeLists.txt
@@ -0,0 +1,62 @@
+add_llvm_library(LLVMCodeGen
+ BranchFolding.cpp
+ CodePlacementOpt.cpp
+ DeadMachineInstructionElim.cpp
+ DwarfEHPrepare.cpp
+ ELFWriter.cpp
+ GCMetadata.cpp
+ GCMetadataPrinter.cpp
+ GCStrategy.cpp
+ IfConversion.cpp
+ IntrinsicLowering.cpp
+ LLVMTargetMachine.cpp
+ LatencyPriorityQueue.cpp
+ LiveInterval.cpp
+ LiveIntervalAnalysis.cpp
+ LiveStackAnalysis.cpp
+ LiveVariables.cpp
+ LowerSubregs.cpp
+ MachOWriter.cpp
+ MachineBasicBlock.cpp
+ MachineDominators.cpp
+ MachineFunction.cpp
+ MachineInstr.cpp
+ MachineLICM.cpp
+ MachineLoopInfo.cpp
+ MachineModuleInfo.cpp
+ MachinePassRegistry.cpp
+ MachineRegisterInfo.cpp
+ MachineSink.cpp
+ MachineVerifier.cpp
+ OcamlGC.cpp
+ PBQP.cpp
+ PHIElimination.cpp
+ Passes.cpp
+ PostRASchedulerList.cpp
+ PreAllocSplitting.cpp
+ PrologEpilogInserter.cpp
+ PseudoSourceValue.cpp
+ RegAllocBigBlock.cpp
+ RegAllocLinearScan.cpp
+ RegAllocLocal.cpp
+ RegAllocPBQP.cpp
+ RegAllocSimple.cpp
+ RegisterCoalescer.cpp
+ RegisterScavenging.cpp
+ ScheduleDAG.cpp
+ ScheduleDAGEmit.cpp
+ ScheduleDAGInstrs.cpp
+ ScheduleDAGPrinter.cpp
+ ShadowStackGC.cpp
+ ShrinkWrapping.cpp
+ SimpleRegisterCoalescing.cpp
+ Spiller.cpp
+ StackProtector.cpp
+ StackSlotColoring.cpp
+ StrongPHIElimination.cpp
+ TargetInstrInfoImpl.cpp
+ TwoAddressInstructionPass.cpp
+ UnreachableBlockElim.cpp
+ VirtRegMap.cpp
+ VirtRegRewriter.cpp
+ )
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
new file mode 100644
index 0000000..383098e
--- /dev/null
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -0,0 +1,358 @@
+//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the pass that optimize code placement and align loop
+// headers to target specific alignment boundary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "code-placement"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumHeaderAligned, "Number of loop header aligned");
+STATISTIC(NumIntraElim, "Number of intra loop branches eliminated");
+STATISTIC(NumIntraMoved, "Number of intra loop branches moved");
+
+namespace {
+ class CodePlacementOpt : public MachineFunctionPass {
+ const MachineLoopInfo *MLI;
+ const TargetInstrInfo *TII;
+ const TargetLowering *TLI;
+
+ /// ChangedMBBs - BBs which are modified by OptimizeIntraLoopEdges.
+ SmallPtrSet<MachineBasicBlock*, 8> ChangedMBBs;
+
+ /// UncondJmpMBBs - A list of BBs which are in loops and end with
+ /// unconditional branches.
+ SmallVector<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 4>
+ UncondJmpMBBs;
+
+ /// LoopHeaders - A list of BBs which are loop headers.
+ SmallVector<MachineBasicBlock*, 4> LoopHeaders;
+
+ public:
+ static char ID;
+ CodePlacementOpt() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const {
+ return "Code Placement Optimizater";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ bool OptimizeIntraLoopEdges();
+ bool HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L,
+ SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign);
+ bool AlignLoops(MachineFunction &MF);
+ };
+
+ char CodePlacementOpt::ID = 0;
+} // end anonymous namespace
+
+FunctionPass *llvm::createCodePlacementOptPass() {
+ return new CodePlacementOpt();
+}
+
+/// OptimizeBackEdges - Place loop back edges to move unconditional branches
+/// out of the loop.
+///
+/// A:
+/// ...
+/// <fallthrough to B>
+///
+/// B: --> loop header
+/// ...
+/// jcc <cond> C, [exit]
+///
+/// C:
+/// ...
+/// jmp B
+///
+/// ==>
+///
+/// A:
+/// ...
+/// jmp B
+///
+/// C: --> new loop header
+/// ...
+/// <fallthough to B>
+///
+/// B:
+/// ...
+/// jcc <cond> C, [exit]
+///
+bool CodePlacementOpt::OptimizeIntraLoopEdges() {
+ if (!TLI->shouldOptimizeCodePlacement())
+ return false;
+
+ bool Changed = false;
+ for (unsigned i = 0, e = UncondJmpMBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = UncondJmpMBBs[i].first;
+ MachineBasicBlock *SuccMBB = UncondJmpMBBs[i].second;
+ MachineLoop *L = MLI->getLoopFor(MBB);
+ assert(L && "BB is expected to be in a loop!");
+
+ if (ChangedMBBs.count(MBB)) {
+ // BB has been modified, re-analyze.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty())
+ continue;
+ if (MLI->getLoopFor(TBB) != L || TBB->isLandingPad())
+ continue;
+ SuccMBB = TBB;
+ } else {
+ assert(MLI->getLoopFor(SuccMBB) == L &&
+ "Successor is not in the same loop!");
+ }
+
+ if (MBB->isLayoutSuccessor(SuccMBB)) {
+ // Successor is right after MBB, just eliminate the unconditional jmp.
+ // Can this happen?
+ TII->RemoveBranch(*MBB);
+ ChangedMBBs.insert(MBB);
+ ++NumIntraElim;
+ Changed = true;
+ continue;
+ }
+
+ // Now check if the predecessor is fallthrough from any BB. If there is,
+ // that BB should be from outside the loop since edge will become a jmp.
+ bool OkToMove = true;
+ MachineBasicBlock *FtMBB = 0, *FtTBB = 0, *FtFBB = 0;
+ SmallVector<MachineOperand, 4> FtCond;
+ for (MachineBasicBlock::pred_iterator PI = SuccMBB->pred_begin(),
+ PE = SuccMBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredMBB = *PI;
+ if (PredMBB->isLayoutSuccessor(SuccMBB)) {
+ if (TII->AnalyzeBranch(*PredMBB, FtTBB, FtFBB, FtCond)) {
+ OkToMove = false;
+ break;
+ }
+ if (!FtTBB)
+ FtTBB = SuccMBB;
+ else if (!FtFBB) {
+ assert(FtFBB != SuccMBB && "Unexpected control flow!");
+ FtFBB = SuccMBB;
+ }
+
+ // A fallthrough.
+ FtMBB = PredMBB;
+ MachineLoop *PL = MLI->getLoopFor(PredMBB);
+ if (PL && (PL == L || PL->getLoopDepth() >= L->getLoopDepth()))
+ OkToMove = false;
+
+ break;
+ }
+ }
+
+ if (!OkToMove)
+ continue;
+
+ // Is it profitable? If SuccMBB can fallthrough itself, that can be changed
+ // into a jmp.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*SuccMBB, TBB, FBB, Cond))
+ continue;
+ if (!TBB && Cond.empty())
+ TBB = next(MachineFunction::iterator(SuccMBB));
+ else if (!FBB && !Cond.empty())
+ FBB = next(MachineFunction::iterator(SuccMBB));
+
+ // This calculate the cost of the transformation. Also, it finds the *only*
+ // intra-loop edge if there is one.
+ int Cost = 0;
+ bool HasOneIntraSucc = true;
+ MachineBasicBlock *IntraSucc = 0;
+ for (MachineBasicBlock::succ_iterator SI = SuccMBB->succ_begin(),
+ SE = SuccMBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SSMBB = *SI;
+ if (MLI->getLoopFor(SSMBB) == L) {
+ if (!IntraSucc)
+ IntraSucc = SSMBB;
+ else
+ HasOneIntraSucc = false;
+ }
+
+ if (SuccMBB->isLayoutSuccessor(SSMBB))
+ // This will become a jmp.
+ ++Cost;
+ else if (MBB->isLayoutSuccessor(SSMBB)) {
+ // One of the successor will become the new fallthrough.
+ if (SSMBB == FBB) {
+ FBB = 0;
+ --Cost;
+ } else if (!FBB && SSMBB == TBB && Cond.empty()) {
+ TBB = 0;
+ --Cost;
+ } else if (!Cond.empty() && !TII->ReverseBranchCondition(Cond)) {
+ assert(SSMBB == TBB);
+ TBB = FBB;
+ FBB = 0;
+ --Cost;
+ }
+ }
+ }
+ if (Cost)
+ continue;
+
+ // Now, let's move the successor to below the BB to eliminate the jmp.
+ SuccMBB->moveAfter(MBB);
+ TII->RemoveBranch(*MBB);
+ TII->RemoveBranch(*SuccMBB);
+ if (TBB)
+ TII->InsertBranch(*SuccMBB, TBB, FBB, Cond);
+ ChangedMBBs.insert(MBB);
+ ChangedMBBs.insert(SuccMBB);
+ if (FtMBB) {
+ TII->RemoveBranch(*FtMBB);
+ TII->InsertBranch(*FtMBB, FtTBB, FtFBB, FtCond);
+ ChangedMBBs.insert(FtMBB);
+ }
+ Changed = true;
+
+ // If BB is the loop latch, we may have a new loop headr.
+ if (MBB == L->getLoopLatch()) {
+ assert(MLI->isLoopHeader(SuccMBB) &&
+ "Only succ of loop latch is not the header?");
+ if (HasOneIntraSucc && IntraSucc)
+ std::replace(LoopHeaders.begin(),LoopHeaders.end(), SuccMBB, IntraSucc);
+ }
+ }
+
+ ++NumIntraMoved;
+ return Changed;
+}
+
+/// HeaderShouldBeAligned - Return true if the specified loop header block
+/// should be aligned. For now, we will not align it if all the predcessors
+/// (i.e. loop back edges) are laid out above the header. FIXME: Do not
+/// align small loops.
+bool
+CodePlacementOpt::HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L,
+ SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign) {
+ if (DoNotAlign.count(MBB))
+ return false;
+
+ bool BackEdgeBelow = false;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredMBB = *PI;
+ if (PredMBB == MBB || PredMBB->getNumber() > MBB->getNumber()) {
+ BackEdgeBelow = true;
+ break;
+ }
+ }
+
+ if (!BackEdgeBelow)
+ return false;
+
+ // Ok, we are going to align this loop header. If it's an inner loop,
+ // do not align its outer loop.
+ MachineBasicBlock *PreHeader = L->getLoopPreheader();
+ if (PreHeader) {
+ MachineLoop *L = MLI->getLoopFor(PreHeader);
+ if (L) {
+ MachineBasicBlock *HeaderBlock = L->getHeader();
+ HeaderBlock->setAlignment(0);
+ DoNotAlign.insert(HeaderBlock);
+ }
+ }
+ return true;
+}
+
+/// AlignLoops - Align loop headers to target preferred alignments.
+///
+bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ return false;
+
+ unsigned Align = TLI->getPrefLoopAlignment();
+ if (!Align)
+ return false; // Don't care about loop alignment.
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+
+ bool Changed = false;
+ SmallPtrSet<MachineBasicBlock*, 4> DoNotAlign;
+ for (unsigned i = 0, e = LoopHeaders.size(); i != e; ++i) {
+ MachineBasicBlock *HeaderMBB = LoopHeaders[i];
+ MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(HeaderMBB));
+ MachineLoop *L = MLI->getLoopFor(HeaderMBB);
+ if (L == MLI->getLoopFor(PredMBB))
+ // If previously BB is in the same loop, don't align this BB. We want
+ // to prevent adding noop's inside a loop.
+ continue;
+ if (HeaderShouldBeAligned(HeaderMBB, L, DoNotAlign)) {
+ HeaderMBB->setAlignment(Align);
+ Changed = true;
+ ++NumHeaderAligned;
+ }
+ }
+
+ return Changed;
+}
+
+bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
+ MLI = &getAnalysis<MachineLoopInfo>();
+ if (MLI->empty())
+ return false; // No loops.
+
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+
+ // Analyze the BBs first and keep track of loop headers and BBs that
+ // end with an unconditional jmp to another block in the same loop.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ if (MBB->isLandingPad())
+ continue;
+ MachineLoop *L = MLI->getLoopFor(MBB);
+ if (!L)
+ continue;
+ if (MLI->isLoopHeader(MBB))
+ LoopHeaders.push_back(MBB);
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty())
+ continue;
+ if (MLI->getLoopFor(TBB) == L && !TBB->isLandingPad())
+ UncondJmpMBBs.push_back(std::make_pair(MBB, TBB));
+ }
+
+ bool Changed = OptimizeIntraLoopEdges();
+
+ Changed |= AlignLoops(MF);
+
+ ChangedMBBs.clear();
+ UncondJmpMBBs.clear();
+ LoopHeaders.clear();
+
+ return Changed;
+}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
new file mode 100644
index 0000000..4832a5e
--- /dev/null
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -0,0 +1,161 @@
+//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level dead-code-elimination pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+namespace {
+ class VISIBILITY_HIDDEN DeadMachineInstructionElim :
+ public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ BitVector LivePhysRegs;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DeadMachineInstructionElim() : MachineFunctionPass(&ID) {}
+
+ private:
+ bool isDead(MachineInstr *MI) const;
+ };
+}
+char DeadMachineInstructionElim::ID = 0;
+
+static RegisterPass<DeadMachineInstructionElim>
+Y("dead-mi-elimination",
+ "Remove dead machine instructions");
+
+FunctionPass *llvm::createDeadMachineInstructionElimPass() {
+ return new DeadMachineInstructionElim();
+}
+
+bool DeadMachineInstructionElim::isDead(MachineInstr *MI) const {
+ // Don't delete instructions with side effects.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(TII, SawStore))
+ return false;
+
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
+ LivePhysRegs[Reg] : !MRI->use_empty(Reg)) {
+ // This def has a use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
+ bool AnyChanges = false;
+ MRI = &MF.getRegInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+
+ // Compute a bitvector to represent all non-allocatable physregs.
+ BitVector NonAllocatableRegs = TRI->getAllocatableSet(MF);
+ NonAllocatableRegs.flip();
+
+ // Loop over all instructions in all blocks, from bottom to top, so that it's
+ // more likely that chains of dependent but ultimately dead instructions will
+ // be cleaned up.
+ for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
+ I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ // Start out assuming that all non-allocatable registers are live
+ // out of this block.
+ LivePhysRegs = NonAllocatableRegs;
+
+ // Also add any explicit live-out physregs for this block.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn())
+ for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
+ LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
+ unsigned Reg = *LOI;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ LivePhysRegs.set(Reg);
+ }
+
+ // Now scan the instructions and delete dead ones, tracking physreg
+ // liveness as we go.
+ for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
+ MIE = MBB->rend(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // If the instruction is dead, delete it!
+ if (isDead(MI)) {
+ DOUT << "DeadMachineInstructionElim: DELETING: " << *MI;
+ AnyChanges = true;
+ MI->eraseFromParent();
+ MIE = MBB->rend();
+ // MII is now pointing to the next instruction to process,
+ // so don't increment it.
+ continue;
+ }
+
+ // Record the physreg defs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ LivePhysRegs.reset(Reg);
+ // Check the subreg set, not the alias set, because a def
+ // of a super-register may still be partially live after
+ // this def.
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs)
+ LivePhysRegs.reset(*SubRegs);
+ }
+ }
+ }
+ // Record the physreg uses, after the defs, in case a physreg is
+ // both defined and used in the same instruction.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned Reg = MO.getReg();
+ if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ LivePhysRegs.set(Reg);
+ for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
+ *AliasSet; ++AliasSet)
+ LivePhysRegs.set(*AliasSet);
+ }
+ }
+ }
+
+ // We didn't delete the current instruction, so increment MII to
+ // the next one.
+ ++MII;
+ }
+ }
+
+ LivePhysRegs.clear();
+ return AnyChanges;
+}
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
new file mode 100644
index 0000000..720e3d1
--- /dev/null
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -0,0 +1,397 @@
+//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mulches exception handling code into a form adapted to code
+// generation. Required if using dwarf exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfehprepare"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+using namespace llvm;
+
+STATISTIC(NumLandingPadsSplit, "Number of landing pads split");
+STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered");
+STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
+STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced");
+
+namespace {
+ class VISIBILITY_HIDDEN DwarfEHPrepare : public FunctionPass {
+ const TargetLowering *TLI;
+ bool CompileFast;
+
+ // The eh.exception intrinsic.
+ Function *ExceptionValueIntrinsic;
+
+ // _Unwind_Resume or the target equivalent.
+ Constant *RewindFunction;
+
+ // Dominator info is used when turning stack temporaries into registers.
+ DominatorTree *DT;
+ DominanceFrontier *DF;
+
+ // The function we are running on.
+ Function *F;
+
+ // The landing pads for this function.
+ typedef SmallPtrSet<BasicBlock*, 8> BBSet;
+ BBSet LandingPads;
+
+ // Stack temporary used to hold eh.exception values.
+ AllocaInst *ExceptionValueVar;
+
+ bool NormalizeLandingPads();
+ bool LowerUnwinds();
+ bool MoveExceptionValueCalls();
+ bool FinishStackTemporaries();
+ bool PromoteStackTemporaries();
+
+ Instruction *CreateExceptionValueCall(BasicBlock *BB);
+ Instruction *CreateValueLoad(BasicBlock *BB);
+
+ /// CreateReadOfExceptionValue - Return the result of the eh.exception
+ /// intrinsic by calling the intrinsic if in a landing pad, or loading
+ /// it from the exception value variable otherwise.
+ Instruction *CreateReadOfExceptionValue(BasicBlock *BB) {
+ return LandingPads.count(BB) ?
+ CreateExceptionValueCall(BB) : CreateValueLoad(BB);
+ }
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ DwarfEHPrepare(const TargetLowering *tli, bool fast) :
+ FunctionPass(&ID), TLI(tli), CompileFast(fast),
+ ExceptionValueIntrinsic(0), RewindFunction(0) {}
+
+ virtual bool runOnFunction(Function &Fn);
+
+ // getAnalysisUsage - We need dominance frontiers for memory promotion.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ if (!CompileFast)
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ if (!CompileFast)
+ AU.addRequired<DominanceFrontier>();
+ AU.addPreserved<DominanceFrontier>();
+ }
+
+ const char *getPassName() const {
+ return "Exception handling preparation";
+ }
+
+ };
+} // end anonymous namespace
+
+char DwarfEHPrepare::ID = 0;
+
+FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) {
+ return new DwarfEHPrepare(tli, fast);
+}
+
+/// NormalizeLandingPads - Normalize and discover landing pads, noting them
+/// in the LandingPads set. A landing pad is normal if the only CFG edges
+/// that end at it are unwind edges from invoke instructions.
+/// Abnormal landing pads are fixed up by redirecting all unwind edges to
+/// a new basic block which falls through to the original.
+bool DwarfEHPrepare::NormalizeLandingPads() {
+ bool Changed = false;
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (!isa<InvokeInst>(TI))
+ continue;
+ BasicBlock *LPad = TI->getSuccessor(1);
+ // Skip landing pads that have already been normalized.
+ if (LandingPads.count(LPad))
+ continue;
+
+ // Check that only invoke unwind edges end at the landing pad.
+ bool OnlyUnwoundTo = true;
+ for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
+ PI != PE; ++PI) {
+ TerminatorInst *PT = (*PI)->getTerminator();
+ if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
+ OnlyUnwoundTo = false;
+ break;
+ }
+ }
+ if (OnlyUnwoundTo) {
+ // Only unwind edges lead to the landing pad. Remember the landing pad.
+ LandingPads.insert(LPad);
+ continue;
+ }
+
+ // At least one normal edge ends at the landing pad. Redirect the unwind
+ // edges to a new basic block which falls through into this one.
+
+ // Create the new basic block.
+ BasicBlock *NewBB = BasicBlock::Create(LPad->getName() + "_unwind_edge");
+
+ // Insert it into the function right before the original landing pad.
+ LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
+
+ // Redirect unwind edges from the original landing pad to NewBB.
+ for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
+ TerminatorInst *PT = (*PI++)->getTerminator();
+ if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
+ // Unwind to the new block.
+ PT->setSuccessor(1, NewBB);
+ }
+
+ // If there are any PHI nodes in LPad, we need to update them so that they
+ // merge incoming values from NewBB instead.
+ for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
+ PHINode *PN = cast<PHINode>(II);
+ pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);
+
+ // Check to see if all of the values coming in via unwind edges are the
+ // same. If so, we don't need to create a new PHI node.
+ Value *InVal = PN->getIncomingValueForBlock(*PB);
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
+ InVal = 0;
+ break;
+ }
+ }
+
+ if (InVal == 0) {
+ // Different unwind edges have different values. Create a new PHI node
+ // in NewBB.
+ PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind",
+ NewBB);
+ // Add an entry for each unwind edge, using the value from the old PHI.
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
+
+ // Now use this new PHI as the common incoming value for NewBB in PN.
+ InVal = NewPN;
+ }
+
+ // Revector exactly one entry in the PHI node to come from NewBB
+ // and delete all other entries that come from unwind edges. If
+ // there are both normal and unwind edges from the same predecessor,
+ // this leaves an entry for the normal edge.
+ for (pred_iterator PI = PB; PI != PE; ++PI)
+ PN->removeIncomingValue(*PI);
+ PN->addIncoming(InVal, NewBB);
+ }
+
+ // Add a fallthrough from NewBB to the original landing pad.
+ BranchInst::Create(LPad, NewBB);
+
+ // Now update DominatorTree and DominanceFrontier analysis information.
+ if (DT)
+ DT->splitBlock(NewBB);
+ if (DF)
+ DF->splitBlock(NewBB);
+
+ // Remember the newly constructed landing pad. The original landing pad
+ // LPad is no longer a landing pad now that all unwind edges have been
+ // revectored to NewBB.
+ LandingPads.insert(NewBB);
+ ++NumLandingPadsSplit;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume,
+/// rethrowing any previously caught exception. This will crash horribly
+/// at runtime if there is no such exception: using unwind to throw a new
+/// exception is currently not supported.
+bool DwarfEHPrepare::LowerUnwinds() {
+ bool Changed = false;
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (!isa<UnwindInst>(TI))
+ continue;
+
+ // Replace the unwind instruction with a call to _Unwind_Resume (or the
+ // appropriate target equivalent) followed by an UnreachableInst.
+
+ // Find the rewind function if we didn't already.
+ if (!RewindFunction) {
+ std::vector<const Type*> Params(1, PointerType::getUnqual(Type::Int8Ty));
+ FunctionType *FTy = FunctionType::get(Type::VoidTy, Params, false);
+ const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+ }
+
+ // Create the call...
+ CallInst::Create(RewindFunction, CreateReadOfExceptionValue(I), "", TI);
+ // ...followed by an UnreachableInst.
+ new UnreachableInst(TI);
+
+ // Nuke the unwind instruction.
+ TI->eraseFromParent();
+ ++NumUnwindsLowered;
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
+/// landing pads by replacing calls outside of landing pads with loads from a
+/// stack temporary. Move eh.exception calls inside landing pads to the start
+/// of the landing pad (optional, but may make things simpler for later passes).
+bool DwarfEHPrepare::MoveExceptionValueCalls() {
+ // If the eh.exception intrinsic is not declared in the module then there is
+ // nothing to do. Speed up compilation by checking for this common case.
+ if (!ExceptionValueIntrinsic &&
+ !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception)))
+ return false;
+
+ bool Changed = false;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (CI->getIntrinsicID() == Intrinsic::eh_exception) {
+ if (!CI->use_empty()) {
+ Value *ExceptionValue = CreateReadOfExceptionValue(BB);
+ if (CI == ExceptionValue) {
+ // The call was at the start of a landing pad - leave it alone.
+ assert(LandingPads.count(BB) &&
+ "Created eh.exception call outside landing pad!");
+ continue;
+ }
+ CI->replaceAllUsesWith(ExceptionValue);
+ }
+ CI->eraseFromParent();
+ ++NumExceptionValuesMoved;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// FinishStackTemporaries - If we introduced a stack variable to hold the
+/// exception value then initialize it in each landing pad.
+bool DwarfEHPrepare::FinishStackTemporaries() {
+ if (!ExceptionValueVar)
+ // Nothing to do.
+ return false;
+
+ bool Changed = false;
+
+ // Make sure that there is a store of the exception value at the start of
+ // each landing pad.
+ for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
+ LI != LE; ++LI) {
+ Instruction *ExceptionValue = CreateReadOfExceptionValue(*LI);
+ Instruction *Store = new StoreInst(ExceptionValue, ExceptionValueVar);
+ Store->insertAfter(ExceptionValue);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// PromoteStackTemporaries - Turn any stack temporaries we introduced into
+/// registers if possible.
+bool DwarfEHPrepare::PromoteStackTemporaries() {
+ if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) {
+ // Turn the exception temporary into registers and phi nodes if possible.
+ std::vector<AllocaInst*> Allocas(1, ExceptionValueVar);
+ PromoteMemToReg(Allocas, *DT, *DF);
+ return true;
+ }
+ return false;
+}
+
+/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
+/// the start of the basic block (unless there already is one, in which case
+/// the existing call is returned).
+Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
+ Instruction *Start = BB->getFirstNonPHI();
+ // Is this a call to eh.exception?
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
+ if (CI->getIntrinsicID() == Intrinsic::eh_exception)
+ // Reuse the existing call.
+ return Start;
+
+ // Find the eh.exception intrinsic if we didn't already.
+ if (!ExceptionValueIntrinsic)
+ ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::eh_exception);
+
+ // Create the call.
+ return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
+}
+
+/// CreateValueLoad - Insert a load of the exception value stack variable
+/// (creating it if necessary) at the start of the basic block (unless
+/// there already is a load, in which case the existing load is returned).
+Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) {
+ Instruction *Start = BB->getFirstNonPHI();
+ // Is this a load of the exception temporary?
+ if (ExceptionValueVar)
+ if (LoadInst* LI = dyn_cast<LoadInst>(Start))
+ if (LI->getPointerOperand() == ExceptionValueVar)
+ // Reuse the existing load.
+ return Start;
+
+ // Create the temporary if we didn't already.
+ if (!ExceptionValueVar) {
+ ExceptionValueVar = new AllocaInst(PointerType::getUnqual(Type::Int8Ty),
+ "eh.value", F->begin()->begin());
+ ++NumStackTempsIntroduced;
+ }
+
+ // Load the value.
+ return new LoadInst(ExceptionValueVar, "eh.value.load", Start);
+}
+
+bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+ bool Changed = false;
+
+ // Initialize internal state.
+ DT = getAnalysisIfAvailable<DominatorTree>();
+ DF = getAnalysisIfAvailable<DominanceFrontier>();
+ ExceptionValueVar = 0;
+ F = &Fn;
+
+ // Ensure that only unwind edges end at landing pads (a landing pad is a
+ // basic block where an invoke unwind edge ends).
+ Changed |= NormalizeLandingPads();
+
+ // Turn unwind instructions into libcalls.
+ Changed |= LowerUnwinds();
+
+ // TODO: Move eh.selector calls to landing pads and combine them.
+
+ // Move eh.exception calls to landing pads.
+ Changed |= MoveExceptionValueCalls();
+
+ // Initialize any stack temporaries we introduced.
+ Changed |= FinishStackTemporaries();
+
+ // Turn any stack temporaries into registers if possible.
+ if (!CompileFast)
+ Changed |= PromoteStackTemporaries();
+
+ LandingPads.clear();
+
+ return Changed;
+}
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
new file mode 100644
index 0000000..7cc1162
--- /dev/null
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -0,0 +1,575 @@
+//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent ELF writer. This file writes out
+// the ELF file in the following order:
+//
+// #1. ELF Header
+// #2. '.text' section
+// #3. '.data' section
+// #4. '.bss' section (conceptual position in file)
+// ...
+// #X. '.shstrtab' section
+// #Y. Section Table
+//
+// The entries in the section table are laid out as:
+// #0. Null entry [required]
+// #1. ".text" entry - the program code
+// #2. ".data" entry - global variables with initializers. [ if needed ]
+// #3. ".bss" entry - global variables without initializers. [ if needed ]
+// ...
+// #N. ".shstrtab" entry - String table for the section names.
+//
+// NOTE: This code should eventually be extended to support 64-bit ELF (this
+// won't be hard), but we haven't done so yet!
+//
+//===----------------------------------------------------------------------===//
+
+#include "ELFWriter.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/FileWriters.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include <list>
+using namespace llvm;
+
+char ELFWriter::ID = 0;
+/// AddELFWriter - Concrete function to add the ELF writer to the function pass
+/// manager.
+MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
+ raw_ostream &O,
+ TargetMachine &TM) {
+ ELFWriter *EW = new ELFWriter(O, TM);
+ PM.add(EW);
+ return &EW->getMachineCodeEmitter();
+}
+
+//===----------------------------------------------------------------------===//
+// ELFCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+ /// ELFCodeEmitter - This class is used by the ELFWriter to emit the code for
+ /// functions to the ELF file.
+ class ELFCodeEmitter : public MachineCodeEmitter {
+ ELFWriter &EW;
+ TargetMachine &TM;
+ ELFWriter::ELFSection *ES; // Section to write to.
+ std::vector<unsigned char> *OutBuffer;
+ size_t FnStart;
+ public:
+ explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), OutBuffer(0) {}
+
+ void startFunction(MachineFunction &F);
+ bool finishFunction(MachineFunction &F);
+
+ void addRelocation(const MachineRelocation &MR) {
+ assert(0 && "relo not handled yet!");
+ }
+
+ virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ }
+
+ virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
+ assert(0 && "CP not implementated yet!");
+ return 0;
+ }
+ virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
+ assert(0 && "JT not implementated yet!");
+ return 0;
+ }
+
+ virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ assert(0 && "JT not implementated yet!");
+ return 0;
+ }
+
+ virtual uintptr_t getLabelAddress(uint64_t Label) const {
+ assert(0 && "Label address not implementated yet!");
+ abort();
+ return 0;
+ }
+
+ virtual void emitLabel(uint64_t LabelID) {
+ assert(0 && "emit Label not implementated yet!");
+ abort();
+ }
+
+
+ virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { }
+
+
+ /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
+ void startGVStub(const GlobalValue* F, unsigned StubSize,
+ unsigned Alignment = 1) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ }
+ void startGVStub(const GlobalValue* F, void *Buffer, unsigned StubSize) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ }
+ void *finishGVStub(const GlobalValue *F) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ return 0;
+ }
+ };
+}
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void ELFCodeEmitter::startFunction(MachineFunction &F) {
+ // Align the output buffer to the appropriate alignment.
+ unsigned Align = 16; // FIXME: GENERICIZE!!
+ // Get the ELF Section that this function belongs in.
+ ES = &EW.getSection(".text", ELFWriter::ELFSection::SHT_PROGBITS,
+ ELFWriter::ELFSection::SHF_EXECINSTR |
+ ELFWriter::ELFSection::SHF_ALLOC);
+ OutBuffer = &ES->SectionData;
+ cerr << "FIXME: This code needs to be updated for changes in the "
+ << "CodeEmitter interfaces. In particular, this should set "
+ << "BufferBegin/BufferEnd/CurBufferPtr, not deal with OutBuffer!";
+ abort();
+
+ // Upgrade the section alignment if required.
+ if (ES->Align < Align) ES->Align = Align;
+
+ // Add padding zeros to the end of the buffer to make sure that the
+ // function will start on the correct byte alignment within the section.
+ OutputBuffer OB(*OutBuffer,
+ TM.getTargetData()->getPointerSizeInBits() == 64,
+ TM.getTargetData()->isLittleEndian());
+ OB.align(Align);
+ FnStart = OutBuffer->size();
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool ELFCodeEmitter::finishFunction(MachineFunction &F) {
+ // We now know the size of the function, add a symbol to represent it.
+ ELFWriter::ELFSym FnSym(F.getFunction());
+
+ // Figure out the binding (linkage) of the symbol.
+ switch (F.getFunction()->getLinkage()) {
+ default:
+ // appending linkage is illegal for functions.
+ assert(0 && "Unknown linkage type!");
+ case GlobalValue::ExternalLinkage:
+ FnSym.SetBind(ELFWriter::ELFSym::STB_GLOBAL);
+ break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ FnSym.SetBind(ELFWriter::ELFSym::STB_WEAK);
+ break;
+ case GlobalValue::PrivateLinkage:
+ assert (0 && "PrivateLinkage should not be in the symbol table.");
+ case GlobalValue::InternalLinkage:
+ FnSym.SetBind(ELFWriter::ELFSym::STB_LOCAL);
+ break;
+ }
+
+ ES->Size = OutBuffer->size();
+
+ FnSym.SetType(ELFWriter::ELFSym::STT_FUNC);
+ FnSym.SectionIdx = ES->SectionIdx;
+ FnSym.Value = FnStart; // Value = Offset from start of Section.
+ FnSym.Size = OutBuffer->size()-FnStart;
+
+ // Finally, add it to the symtab.
+ EW.SymbolTable.push_back(FnSym);
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// ELFWriter Implementation
+//===----------------------------------------------------------------------===//
+
+ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
+ : MachineFunctionPass(&ID), O(o), TM(tm) {
+ e_flags = 0; // e_flags defaults to 0, no flags.
+
+ is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ isLittleEndian = TM.getTargetData()->isLittleEndian();
+
+ // Create the machine code emitter object for this target.
+ MCE = new ELFCodeEmitter(*this);
+ NumSections = 0;
+}
+
+ELFWriter::~ELFWriter() {
+ delete MCE;
+}
+
+// doInitialization - Emit the file header and all of the global variables for
+// the module to the ELF file.
+bool ELFWriter::doInitialization(Module &M) {
+ Mang = new Mangler(M);
+
+ // Local alias to shortenify coming code.
+ std::vector<unsigned char> &FH = FileHeader;
+ OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
+
+ FHOut.outbyte(0x7F); // EI_MAG0
+ FHOut.outbyte('E'); // EI_MAG1
+ FHOut.outbyte('L'); // EI_MAG2
+ FHOut.outbyte('F'); // EI_MAG3
+ FHOut.outbyte(is64Bit ? 2 : 1); // EI_CLASS
+ FHOut.outbyte(isLittleEndian ? 1 : 2); // EI_DATA
+ FHOut.outbyte(1); // EI_VERSION
+ FH.resize(16); // EI_PAD up to 16 bytes.
+
+ // This should change for shared objects.
+ FHOut.outhalf(1); // e_type = ET_REL
+ FHOut.outhalf(TM.getELFWriterInfo()->getEMachine()); // target-defined
+ FHOut.outword(1); // e_version = 1
+ FHOut.outaddr(0); // e_entry = 0 -> no entry point in .o file
+ FHOut.outaddr(0); // e_phoff = 0 -> no program header for .o
+
+ ELFHeader_e_shoff_Offset = FH.size();
+ FHOut.outaddr(0); // e_shoff
+ FHOut.outword(e_flags); // e_flags = whatever the target wants
+
+ FHOut.outhalf(is64Bit ? 64 : 52); // e_ehsize = ELF header size
+ FHOut.outhalf(0); // e_phentsize = prog header entry size
+ FHOut.outhalf(0); // e_phnum = # prog header entries = 0
+ FHOut.outhalf(is64Bit ? 64 : 40); // e_shentsize = sect hdr entry size
+
+
+ ELFHeader_e_shnum_Offset = FH.size();
+ FHOut.outhalf(0); // e_shnum = # of section header ents
+ ELFHeader_e_shstrndx_Offset = FH.size();
+ FHOut.outhalf(0); // e_shstrndx = Section # of '.shstrtab'
+
+ // Add the null section, which is required to be first in the file.
+ getSection("", 0, 0);
+
+ // Start up the symbol table. The first entry in the symtab is the null
+ // entry.
+ SymbolTable.push_back(ELFSym(0));
+
+ return false;
+}
+
+void ELFWriter::EmitGlobal(GlobalVariable *GV) {
+ // If this is an external global, emit it now. TODO: Note that it would be
+ // better to ignore the symbol here and only add it to the symbol table if
+ // referenced.
+ if (!GV->hasInitializer()) {
+ ELFSym ExternalSym(GV);
+ ExternalSym.SetBind(ELFSym::STB_GLOBAL);
+ ExternalSym.SetType(ELFSym::STT_NOTYPE);
+ ExternalSym.SectionIdx = ELFSection::SHN_UNDEF;
+ SymbolTable.push_back(ExternalSym);
+ return;
+ }
+
+ unsigned Align = TM.getTargetData()->getPreferredAlignment(GV);
+ unsigned Size =
+ TM.getTargetData()->getTypeAllocSize(GV->getType()->getElementType());
+
+ // If this global has a zero initializer, it is part of the .bss or common
+ // section.
+ if (GV->getInitializer()->isNullValue()) {
+ // If this global is part of the common block, add it now. Variables are
+ // part of the common block if they are zero initialized and allowed to be
+ // merged with other symbols.
+ if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
+ GV->hasCommonLinkage()) {
+ ELFSym CommonSym(GV);
+ // Value for common symbols is the alignment required.
+ CommonSym.Value = Align;
+ CommonSym.Size = Size;
+ CommonSym.SetBind(ELFSym::STB_GLOBAL);
+ CommonSym.SetType(ELFSym::STT_OBJECT);
+ // TODO SOMEDAY: add ELF visibility.
+ CommonSym.SectionIdx = ELFSection::SHN_COMMON;
+ SymbolTable.push_back(CommonSym);
+ return;
+ }
+
+ // Otherwise, this symbol is part of the .bss section. Emit it now.
+
+ // Handle alignment. Ensure section is aligned at least as much as required
+ // by this symbol.
+ ELFSection &BSSSection = getBSSSection();
+ BSSSection.Align = std::max(BSSSection.Align, Align);
+
+ // Within the section, emit enough virtual padding to get us to an alignment
+ // boundary.
+ if (Align)
+ BSSSection.Size = (BSSSection.Size + Align - 1) & ~(Align-1);
+
+ ELFSym BSSSym(GV);
+ BSSSym.Value = BSSSection.Size;
+ BSSSym.Size = Size;
+ BSSSym.SetType(ELFSym::STT_OBJECT);
+
+ switch (GV->getLinkage()) {
+ default: // weak/linkonce/common handled above
+ assert(0 && "Unexpected linkage type!");
+ case GlobalValue::AppendingLinkage: // FIXME: This should be improved!
+ case GlobalValue::ExternalLinkage:
+ BSSSym.SetBind(ELFSym::STB_GLOBAL);
+ break;
+ case GlobalValue::InternalLinkage:
+ BSSSym.SetBind(ELFSym::STB_LOCAL);
+ break;
+ }
+
+ // Set the idx of the .bss section
+ BSSSym.SectionIdx = BSSSection.SectionIdx;
+ if (!GV->hasPrivateLinkage())
+ SymbolTable.push_back(BSSSym);
+
+ // Reserve space in the .bss section for this symbol.
+ BSSSection.Size += Size;
+ return;
+ }
+
+ // FIXME: handle .rodata
+ //assert(!GV->isConstant() && "unimp");
+
+ // FIXME: handle .data
+ //assert(0 && "unimp");
+}
+
+
+bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
+ // Nothing to do here, this is all done through the MCE object above.
+ return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the ELF file to 'O'.
+bool ELFWriter::doFinalization(Module &M) {
+ // Okay, the ELF header and .text sections have been completed, build the
+ // .data, .bss, and "common" sections next.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobal(I);
+
+ // Emit the symbol table now, if non-empty.
+ EmitSymbolTable();
+
+ // FIXME: Emit the relocations now.
+
+ // Emit the string table for the sections in the ELF file we have.
+ EmitSectionTableStringTable();
+
+ // Emit the sections to the .o file, and emit the section table for the file.
+ OutputSectionsAndSectionTable();
+
+ // We are done with the abstract symbols.
+ SectionList.clear();
+ NumSections = 0;
+
+ // Release the name mangler object.
+ delete Mang; Mang = 0;
+ return false;
+}
+
+/// EmitSymbolTable - If the current symbol table is non-empty, emit the string
+/// table for it and then the symbol table itself.
+void ELFWriter::EmitSymbolTable() {
+ if (SymbolTable.size() == 1) return; // Only the null entry.
+
+ // FIXME: compact all local symbols to the start of the symtab.
+ unsigned FirstNonLocalSymbol = 1;
+
+ ELFSection &StrTab = getSection(".strtab", ELFSection::SHT_STRTAB, 0);
+ StrTab.Align = 1;
+
+ DataBuffer &StrTabBuf = StrTab.SectionData;
+ OutputBuffer StrTabOut(StrTabBuf, is64Bit, isLittleEndian);
+
+ // Set the zero'th symbol to a null byte, as required.
+ StrTabOut.outbyte(0);
+ SymbolTable[0].NameIdx = 0;
+ unsigned Index = 1;
+ for (unsigned i = 1, e = SymbolTable.size(); i != e; ++i) {
+ // Use the name mangler to uniquify the LLVM symbol.
+ std::string Name = Mang->getValueName(SymbolTable[i].GV);
+
+ if (Name.empty()) {
+ SymbolTable[i].NameIdx = 0;
+ } else {
+ SymbolTable[i].NameIdx = Index;
+
+ // Add the name to the output buffer, including the null terminator.
+ StrTabBuf.insert(StrTabBuf.end(), Name.begin(), Name.end());
+
+ // Add a null terminator.
+ StrTabBuf.push_back(0);
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += Name.size()+1;
+ }
+ }
+ assert(Index == StrTabBuf.size());
+ StrTab.Size = Index;
+
+ // Now that we have emitted the string table and know the offset into the
+ // string table of each symbol, emit the symbol table itself.
+ ELFSection &SymTab = getSection(".symtab", ELFSection::SHT_SYMTAB, 0);
+ SymTab.Align = is64Bit ? 8 : 4;
+ SymTab.Link = SymTab.SectionIdx; // Section Index of .strtab.
+ SymTab.Info = FirstNonLocalSymbol; // First non-STB_LOCAL symbol.
+ SymTab.EntSize = 16; // Size of each symtab entry. FIXME: wrong for ELF64
+ DataBuffer &SymTabBuf = SymTab.SectionData;
+ OutputBuffer SymTabOut(SymTabBuf, is64Bit, isLittleEndian);
+
+ if (!is64Bit) { // 32-bit and 64-bit formats are shuffled a bit.
+ for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
+ ELFSym &Sym = SymbolTable[i];
+ SymTabOut.outword(Sym.NameIdx);
+ SymTabOut.outaddr32(Sym.Value);
+ SymTabOut.outword(Sym.Size);
+ SymTabOut.outbyte(Sym.Info);
+ SymTabOut.outbyte(Sym.Other);
+ SymTabOut.outhalf(Sym.SectionIdx);
+ }
+ } else {
+ for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
+ ELFSym &Sym = SymbolTable[i];
+ SymTabOut.outword(Sym.NameIdx);
+ SymTabOut.outbyte(Sym.Info);
+ SymTabOut.outbyte(Sym.Other);
+ SymTabOut.outhalf(Sym.SectionIdx);
+ SymTabOut.outaddr64(Sym.Value);
+ SymTabOut.outxword(Sym.Size);
+ }
+ }
+
+ SymTab.Size = SymTabBuf.size();
+}
+
+/// EmitSectionTableStringTable - This method adds and emits a section for the
+/// ELF Section Table string table: the string table that holds all of the
+/// section names.
+void ELFWriter::EmitSectionTableStringTable() {
+ // First step: add the section for the string table to the list of sections:
+ ELFSection &SHStrTab = getSection(".shstrtab", ELFSection::SHT_STRTAB, 0);
+
+ // Now that we know which section number is the .shstrtab section, update the
+ // e_shstrndx entry in the ELF header.
+ OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
+ FHOut.fixhalf(SHStrTab.SectionIdx, ELFHeader_e_shstrndx_Offset);
+
+ // Set the NameIdx of each section in the string table and emit the bytes for
+ // the string table.
+ unsigned Index = 0;
+ DataBuffer &Buf = SHStrTab.SectionData;
+
+ for (std::list<ELFSection>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ // Set the index into the table. Note if we have lots of entries with
+ // common suffixes, we could memoize them here if we cared.
+ I->NameIdx = Index;
+
+ // Add the name to the output buffer, including the null terminator.
+ Buf.insert(Buf.end(), I->Name.begin(), I->Name.end());
+
+ // Add a null terminator.
+ Buf.push_back(0);
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += I->Name.size()+1;
+ }
+
+ // Set the size of .shstrtab now that we know what it is.
+ assert(Index == Buf.size());
+ SHStrTab.Size = Index;
+}
+
+/// OutputSectionsAndSectionTable - Now that we have constructed the file header
+/// and all of the sections, emit these to the ostream destination and emit the
+/// SectionTable.
+void ELFWriter::OutputSectionsAndSectionTable() {
+ // Pass #1: Compute the file offset for each section.
+ size_t FileOff = FileHeader.size(); // File header first.
+
+ // Emit all of the section data in order.
+ for (std::list<ELFSection>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (I->Align)
+ FileOff = (FileOff+I->Align-1) & ~(I->Align-1);
+ I->Offset = FileOff;
+ FileOff += I->SectionData.size();
+ }
+
+ // Align Section Header.
+ unsigned TableAlign = is64Bit ? 8 : 4;
+ FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+
+ // Now that we know where all of the sections will be emitted, set the e_shnum
+ // entry in the ELF header.
+ OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
+ FHOut.fixhalf(NumSections, ELFHeader_e_shnum_Offset);
+
+ // Now that we know the offset in the file of the section table, update the
+ // e_shoff address in the ELF header.
+ FHOut.fixaddr(FileOff, ELFHeader_e_shoff_Offset);
+
+ // Now that we know all of the data in the file header, emit it and all of the
+ // sections!
+ O.write((char*)&FileHeader[0], FileHeader.size());
+ FileOff = FileHeader.size();
+ DataBuffer().swap(FileHeader);
+
+ DataBuffer Table;
+ OutputBuffer TableOut(Table, is64Bit, isLittleEndian);
+
+ // Emit all of the section data and build the section table itself.
+ while (!SectionList.empty()) {
+ const ELFSection &S = *SectionList.begin();
+
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (S.Align)
+ for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
+ FileOff != NewFileOff; ++FileOff)
+ O << (char)0xAB;
+ O.write((char*)&S.SectionData[0], S.SectionData.size());
+ FileOff += S.SectionData.size();
+
+ TableOut.outword(S.NameIdx); // sh_name - Symbol table name idx
+ TableOut.outword(S.Type); // sh_type - Section contents & semantics
+ TableOut.outword(S.Flags); // sh_flags - Section flags.
+ TableOut.outaddr(S.Addr); // sh_addr - The mem addr this section is in.
+ TableOut.outaddr(S.Offset); // sh_offset - Offset from the file start.
+ TableOut.outword(S.Size); // sh_size - The section size.
+ TableOut.outword(S.Link); // sh_link - Section header table index link.
+ TableOut.outword(S.Info); // sh_info - Auxillary information.
+ TableOut.outword(S.Align); // sh_addralign - Alignment of section.
+ TableOut.outword(S.EntSize); // sh_entsize - Size of entries in the section
+
+ SectionList.pop_front();
+ }
+
+ // Align output for the section table.
+ for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+ FileOff != NewFileOff; ++FileOff)
+ O << (char)0xAB;
+
+ // Emit the section table itself.
+ O.write((char*)&Table[0], Table.size());
+}
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
new file mode 100644
index 0000000..31aa05a
--- /dev/null
+++ b/lib/CodeGen/ELFWriter.h
@@ -0,0 +1,230 @@
+//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFWRITER_H
+#define ELFWRITER_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <list>
+#include <map>
+
+namespace llvm {
+ class GlobalVariable;
+ class Mangler;
+ class MachineCodeEmitter;
+ class ELFCodeEmitter;
+ class raw_ostream;
+
+ /// ELFWriter - This class implements the common target-independent code for
+ /// writing ELF files. Targets should derive a class from this to
+ /// parameterize the output format.
+ ///
+ class ELFWriter : public MachineFunctionPass {
+ friend class ELFCodeEmitter;
+ public:
+ static char ID;
+
+ MachineCodeEmitter &getMachineCodeEmitter() const {
+ return *(MachineCodeEmitter*)MCE;
+ }
+
+ ELFWriter(raw_ostream &O, TargetMachine &TM);
+ ~ELFWriter();
+
+ typedef std::vector<unsigned char> DataBuffer;
+
+ protected:
+ /// Output stream to send the resultant object file to.
+ ///
+ raw_ostream &O;
+
+ /// Target machine description.
+ ///
+ TargetMachine &TM;
+
+ /// Mang - The object used to perform name mangling for this module.
+ ///
+ Mangler *Mang;
+
+ /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+ /// code for functions to the .o file.
+ ELFCodeEmitter *MCE;
+
+ //===------------------------------------------------------------------===//
+ // Properties to be set by the derived class ctor, used to configure the
+ // ELFWriter.
+
+ // e_machine - This field is the target specific value to emit as the
+ // e_machine member of the ELF header.
+ unsigned short e_machine;
+
+ // e_flags - The machine flags for the target. This defaults to zero.
+ unsigned e_flags;
+
+ //===------------------------------------------------------------------===//
+ // Properties inferred automatically from the target machine.
+ //
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
+ bool is64Bit, isLittleEndian;
+
+ /// doInitialization - Emit the file header and all of the global variables
+ /// for the module to the ELF file.
+ bool doInitialization(Module &M);
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+
+ /// doFinalization - Now that the module has been completely processed, emit
+ /// the ELF file to 'O'.
+ bool doFinalization(Module &M);
+
+ private:
+ // The buffer we accumulate the file header into. Note that this should be
+ // changed into something much more efficient later (and the bitcode writer
+ // as well!).
+ DataBuffer FileHeader;
+
+ /// ELFSection - This struct contains information about each section that is
+ /// emitted to the file. This is eventually turned into the section header
+ /// table at the end of the file.
+ struct ELFSection {
+ std::string Name; // Name of the section.
+ unsigned NameIdx; // Index in .shstrtab of name, once emitted.
+ unsigned Type;
+ unsigned Flags;
+ uint64_t Addr;
+ unsigned Offset;
+ unsigned Size;
+ unsigned Link;
+ unsigned Info;
+ unsigned Align;
+ unsigned EntSize;
+
+ /// SectionIdx - The number of the section in the Section Table.
+ ///
+ unsigned short SectionIdx;
+
+ /// SectionData - The actual data for this section which we are building
+ /// up for emission to the file.
+ DataBuffer SectionData;
+
+ enum { SHT_NULL = 0, SHT_PROGBITS = 1, SHT_SYMTAB = 2, SHT_STRTAB = 3,
+ SHT_RELA = 4, SHT_HASH = 5, SHT_DYNAMIC = 6, SHT_NOTE = 7,
+ SHT_NOBITS = 8, SHT_REL = 9, SHT_SHLIB = 10, SHT_DYNSYM = 11 };
+ enum { SHN_UNDEF = 0, SHN_ABS = 0xFFF1, SHN_COMMON = 0xFFF2 };
+ enum { // SHF - ELF Section Header Flags
+ SHF_WRITE = 1 << 0, // Writable
+ SHF_ALLOC = 1 << 1, // Mapped into the process addr space
+ SHF_EXECINSTR = 1 << 2, // Executable
+ SHF_MERGE = 1 << 4, // Might be merged if equal
+ SHF_STRINGS = 1 << 5, // Contains null-terminated strings
+ SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index
+ SHF_LINK_ORDER = 1 << 7, // Preserve order after combining
+ SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required
+ SHF_GROUP = 1 << 9, // Section is a member of a group
+ SHF_TLS = 1 << 10 // Section holds thread-local data
+ };
+
+ ELFSection(const std::string &name)
+ : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0),
+ Link(0), Info(0), Align(0), EntSize(0) {
+ }
+ };
+
+ /// SectionList - This is the list of sections that we have emitted to the
+ /// file. Once the file has been completely built, the section header table
+ /// is constructed from this info.
+ std::list<ELFSection> SectionList;
+ unsigned NumSections; // Always = SectionList.size()
+
+ /// SectionLookup - This is a mapping from section name to section number in
+ /// the SectionList.
+ std::map<std::string, ELFSection*> SectionLookup;
+
+ /// getSection - Return the section with the specified name, creating a new
+ /// section if one does not already exist.
+ ELFSection &getSection(const std::string &Name,
+ unsigned Type, unsigned Flags = 0) {
+ ELFSection *&SN = SectionLookup[Name];
+ if (SN) return *SN;
+
+ SectionList.push_back(Name);
+ SN = &SectionList.back();
+ SN->SectionIdx = NumSections++;
+ SN->Type = Type;
+ SN->Flags = Flags;
+ return *SN;
+ }
+
+ ELFSection &getDataSection() {
+ return getSection(".data", ELFSection::SHT_PROGBITS,
+ ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
+ }
+ ELFSection &getBSSSection() {
+ return getSection(".bss", ELFSection::SHT_NOBITS,
+ ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
+ }
+
+ /// ELFSym - This struct contains information about each symbol that is
+ /// added to logical symbol table for the module. This is eventually
+ /// turned into a real symbol table in the file.
+ struct ELFSym {
+ const GlobalValue *GV; // The global value this corresponds to.
+ unsigned NameIdx; // Index in .strtab of name, once emitted.
+ uint64_t Value;
+ unsigned Size;
+ unsigned char Info;
+ unsigned char Other;
+ unsigned short SectionIdx;
+
+ enum { STB_LOCAL = 0, STB_GLOBAL = 1, STB_WEAK = 2 };
+ enum { STT_NOTYPE = 0, STT_OBJECT = 1, STT_FUNC = 2, STT_SECTION = 3,
+ STT_FILE = 4 };
+ ELFSym(const GlobalValue *gv) : GV(gv), Value(0), Size(0), Info(0),
+ Other(0), SectionIdx(0) {}
+
+ void SetBind(unsigned X) {
+ assert(X == (X & 0xF) && "Bind value out of range!");
+ Info = (Info & 0x0F) | (X << 4);
+ }
+ void SetType(unsigned X) {
+ assert(X == (X & 0xF) && "Type value out of range!");
+ Info = (Info & 0xF0) | X;
+ }
+ };
+
+ /// SymbolTable - This is the list of symbols we have emitted to the file.
+ /// This actually gets rearranged before emission to the file (to put the
+ /// local symbols first in the list).
+ std::vector<ELFSym> SymbolTable;
+
+ // As we complete the ELF file, we need to update fields in the ELF header
+ // (e.g. the location of the section table). These members keep track of
+ // the offset in ELFHeader of these various pieces to update and other
+ // locations in the file.
+ unsigned ELFHeader_e_shoff_Offset; // e_shoff in ELF header.
+ unsigned ELFHeader_e_shstrndx_Offset; // e_shstrndx in ELF header.
+ unsigned ELFHeader_e_shnum_Offset; // e_shnum in ELF header.
+ private:
+ void EmitGlobal(GlobalVariable *GV);
+
+ void EmitSymbolTable();
+
+ void EmitSectionTableStringTable();
+ void OutputSectionsAndSectionTable();
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
new file mode 100644
index 0000000..cf2ebb3
--- /dev/null
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -0,0 +1,212 @@
+//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GCFunctionInfo class and GCModuleInfo pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+
+ class VISIBILITY_HIDDEN Printer : public FunctionPass {
+ static char ID;
+ std::ostream &OS;
+
+ public:
+ explicit Printer(std::ostream &OS = *cerr);
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ };
+
+ class VISIBILITY_HIDDEN Deleter : public FunctionPass {
+ static char ID;
+
+ public:
+ Deleter();
+
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnFunction(Function &F);
+ bool doFinalization(Module &M);
+ };
+
+}
+
+static RegisterPass<GCModuleInfo>
+X("collector-metadata", "Create Garbage Collector Module Metadata");
+
+// -----------------------------------------------------------------------------
+
+GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
+ : F(F), S(S), FrameSize(~0LL) {}
+
+GCFunctionInfo::~GCFunctionInfo() {}
+
+// -----------------------------------------------------------------------------
+
+char GCModuleInfo::ID = 0;
+
+GCModuleInfo::GCModuleInfo()
+ : ImmutablePass(&ID) {}
+
+GCModuleInfo::~GCModuleInfo() {
+ clear();
+}
+
+GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
+ const std::string &Name) {
+ const char *Start = Name.c_str();
+
+ strategy_map_type::iterator NMI =
+ StrategyMap.find(Start, Start + Name.size());
+ if (NMI != StrategyMap.end())
+ return NMI->getValue();
+
+ for (GCRegistry::iterator I = GCRegistry::begin(),
+ E = GCRegistry::end(); I != E; ++I) {
+ if (strcmp(Start, I->getName()) == 0) {
+ GCStrategy *S = I->instantiate();
+ S->M = M;
+ S->Name = Name;
+ StrategyMap.GetOrCreateValue(Start, Start + Name.size()).setValue(S);
+ StrategyList.push_back(S);
+ return S;
+ }
+ }
+
+ cerr << "unsupported GC: " << Name << "\n";
+ abort();
+}
+
+GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
+ assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+ assert(F.hasGC());
+
+ finfo_map_type::iterator I = FInfoMap.find(&F);
+ if (I != FInfoMap.end())
+ return *I->second;
+
+ GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
+ GCFunctionInfo *GFI = S->insertFunctionInfo(F);
+ FInfoMap[&F] = GFI;
+ return *GFI;
+}
+
+void GCModuleInfo::clear() {
+ FInfoMap.clear();
+ StrategyMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+ StrategyList.clear();
+}
+
+// -----------------------------------------------------------------------------
+
+char Printer::ID = 0;
+
+FunctionPass *llvm::createGCInfoPrinter(std::ostream &OS) {
+ return new Printer(OS);
+}
+
+Printer::Printer(std::ostream &OS)
+ : FunctionPass(&ID), OS(OS) {}
+
+const char *Printer::getPassName() const {
+ return "Print Garbage Collector Information";
+}
+
+void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+static const char *DescKind(GC::PointKind Kind) {
+ switch (Kind) {
+ default: assert(0 && "Unknown GC point kind");
+ case GC::Loop: return "loop";
+ case GC::Return: return "return";
+ case GC::PreCall: return "pre-call";
+ case GC::PostCall: return "post-call";
+ }
+}
+
+bool Printer::runOnFunction(Function &F) {
+ if (!F.hasGC()) {
+ GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+
+ OS << "GC roots for " << FD->getFunction().getNameStart() << ":\n";
+ for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+ RE = FD->roots_end(); RI != RE; ++RI)
+ OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+
+ OS << "GC safe points for " << FD->getFunction().getNameStart() << ":\n";
+ for (GCFunctionInfo::iterator PI = FD->begin(),
+ PE = FD->end(); PI != PE; ++PI) {
+
+ OS << "\tlabel " << PI->Num << ": " << DescKind(PI->Kind) << ", live = {";
+
+ for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+ RE = FD->live_end(PI);;) {
+ OS << " " << RI->Num;
+ if (++RI == RE)
+ break;
+ OS << ",";
+ }
+
+ OS << " }\n";
+ }
+ }
+
+ return false;
+}
+
+// -----------------------------------------------------------------------------
+
+char Deleter::ID = 0;
+
+FunctionPass *llvm::createGCInfoDeleter() {
+ return new Deleter();
+}
+
+Deleter::Deleter() : FunctionPass(&ID) {}
+
+const char *Deleter::getPassName() const {
+ return "Delete Garbage Collector Information";
+}
+
+void Deleter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+bool Deleter::runOnFunction(Function &MF) {
+ return false;
+}
+
+bool Deleter::doFinalization(Module &M) {
+ GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(GMI && "Deleter didn't require GCModuleInfo?!");
+ GMI->clear();
+ return false;
+}
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
new file mode 100644
index 0000000..5a5ef84
--- /dev/null
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -0,0 +1,30 @@
+//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract base class GCMetadataPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+
+using namespace llvm;
+
+GCMetadataPrinter::GCMetadataPrinter() { }
+
+GCMetadataPrinter::~GCMetadataPrinter() { }
+
+void GCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
+ // Default is no action.
+}
+
+void GCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
+ // Default is no action.
+}
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
new file mode 100644
index 0000000..ad7421a
--- /dev/null
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -0,0 +1,392 @@
+//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target- and collector-independent garbage collection
+// infrastructure.
+//
+// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots
+// are identified in SelectionDAGISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+
+ /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+ /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+ /// directed by the GCStrategy. It also performs automatic root initialization
+ /// and custom intrinsic lowering.
+ class VISIBILITY_HIDDEN LowerIntrinsics : public FunctionPass {
+ static bool NeedsDefaultLoweringPass(const GCStrategy &C);
+ static bool NeedsCustomLoweringPass(const GCStrategy &C);
+ static bool CouldBecomeSafePoint(Instruction *I);
+ bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+ static bool InsertRootInitializers(Function &F,
+ AllocaInst **Roots, unsigned Count);
+
+ public:
+ static char ID;
+
+ LowerIntrinsics();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+ };
+
+
+ /// MachineCodeAnalysis - This is a target-independent pass over the machine
+ /// function representation to identify safe points for the garbage collector
+ /// in the machine code. It inserts labels at safe points and populates a
+ /// GCMetadata record for each function.
+ class VISIBILITY_HIDDEN MachineCodeAnalysis : public MachineFunctionPass {
+ const TargetMachine *TM;
+ GCFunctionInfo *FI;
+ MachineModuleInfo *MMI;
+ const TargetInstrInfo *TII;
+
+ void FindSafePoints(MachineFunction &MF);
+ void VisitCallPoint(MachineBasicBlock::iterator MI);
+ unsigned InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ void FindStackOffsets(MachineFunction &MF);
+
+ public:
+ static char ID;
+
+ MachineCodeAnalysis();
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+ };
+
+}
+
+// -----------------------------------------------------------------------------
+
+GCStrategy::GCStrategy() :
+ NeededSafePoints(0),
+ CustomReadBarriers(false),
+ CustomWriteBarriers(false),
+ CustomRoots(false),
+ InitRoots(true),
+ UsesMetadata(false)
+{}
+
+GCStrategy::~GCStrategy() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+
+ Functions.clear();
+}
+
+bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
+
+bool GCStrategy::performCustomLowering(Function &F) {
+ cerr << "gc " << getName() << " must override performCustomLowering.\n";
+ abort();
+ return 0;
+}
+
+GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
+ GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
+ Functions.push_back(FI);
+ return FI;
+}
+
+// -----------------------------------------------------------------------------
+
+FunctionPass *llvm::createGCLoweringPass() {
+ return new LowerIntrinsics();
+}
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics()
+ : FunctionPass(&ID) {}
+
+const char *LowerIntrinsics::getPassName() const {
+ return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+ // FIXME: This is rather antisocial in the context of a JIT since it performs
+ // work against the entire module. But this cannot be done at
+ // runFunction time (initializeCustomLowering likely needs to change
+ // the module).
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && I->hasGC())
+ MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+
+ bool MadeChange = false;
+ for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ if (NeedsCustomLoweringPass(**I))
+ if ((*I)->initializeCustomLowering(M))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
+ unsigned Count) {
+ // Scroll past alloca instructions.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ while (isa<AllocaInst>(IP)) ++IP;
+
+ // Search for initializers in the initial BB.
+ SmallPtrSet<AllocaInst*,16> InitedRoots;
+ for (; !CouldBecomeSafePoint(IP); ++IP)
+ if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+ if (AllocaInst *AI =
+ dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+ InitedRoots.insert(AI);
+
+ // Add root initializers.
+ bool MadeChange = false;
+
+ for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+ if (!InitedRoots.count(*I)) {
+ new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+ cast<PointerType>((*I)->getType())->getElementType())),
+ *I, IP);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
+ // Default lowering is necessary only if read or write barriers have a default
+ // action. The default for roots is no action.
+ return !C.customWriteBarrier()
+ || !C.customReadBarrier()
+ || C.initializeRoots();
+}
+
+bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
+ // Custom lowering is only necessary if enabled for some action.
+ return C.customWriteBarrier()
+ || C.customReadBarrier()
+ || C.customRoots();
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
+ // The natural definition of instructions which could introduce safe points
+ // are:
+ //
+ // - call, invoke (AfterCall, BeforeCall)
+ // - phis (Loops)
+ // - invoke, ret, unwind (Exit)
+ //
+ // However, instructions as seemingly inoccuous as arithmetic can become
+ // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+ // it is necessary to take a conservative approach.
+
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<StoreInst>(I) || isa<LoadInst>(I))
+ return false;
+
+ // llvm.gcroot is safe because it doesn't do anything at runtime.
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ if (unsigned IID = F->getIntrinsicID())
+ if (IID == Intrinsic::gcroot)
+ return false;
+
+ return true;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use GC.
+ if (!F.hasGC())
+ return false;
+
+ GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+ GCStrategy &S = FI.getStrategy();
+
+ bool MadeChange = false;
+
+ if (NeedsDefaultLoweringPass(S))
+ MadeChange |= PerformDefaultLowering(F, S);
+
+ if (NeedsCustomLoweringPass(S))
+ MadeChange |= S.performCustomLowering(F);
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+ bool LowerWr = !S.customWriteBarrier();
+ bool LowerRd = !S.customReadBarrier();
+ bool InitRoots = S.initializeRoots();
+
+ SmallVector<AllocaInst*,32> Roots;
+
+ bool MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::gcwrite:
+ if (LowerWr) {
+ // Replace a write barrier with a simple store.
+ Value *St = new StoreInst(CI->getOperand(1), CI->getOperand(3), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcread:
+ if (LowerRd) {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getOperand(2), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcroot:
+ if (InitRoots) {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(cast<AllocaInst>(
+ CI->getOperand(1)->stripPointerCasts()));
+ }
+ break;
+ default:
+ continue;
+ }
+
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (Roots.size())
+ MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+ return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
+ return new MachineCodeAnalysis();
+}
+
+char MachineCodeAnalysis::ID = 0;
+
+MachineCodeAnalysis::MachineCodeAnalysis()
+ : MachineFunctionPass(&ID) {}
+
+const char *MachineCodeAnalysis::getPassName() const {
+ return "Analyze Machine Code For Garbage Collection";
+}
+
+void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+}
+
+unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ unsigned Label = MMI->NextLabelID();
+ // N.B. we assume that MI is *not* equal to the "end()" iterator.
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetInstrInfo::GC_LABEL)).addImm(Label);
+ return Label;
+}
+
+void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+ // Find the return address (next instruction), too, so as to bracket the call
+ // instruction.
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
+ if (FI->getStrategy().needsSafePoint(GC::PreCall))
+ FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI));
+
+ if (FI->getStrategy().needsSafePoint(GC::PostCall))
+ FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI));
+}
+
+void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(),
+ BBE = MF.end(); BBI != BBE; ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(),
+ ME = BBI->end(); MI != ME; ++MI)
+ if (MI->getDesc().isCall())
+ VisitCallPoint(MI);
+}
+
+void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+ const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ assert(TRI && "TargetRegisterInfo not available!");
+
+ for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
+ RE = FI->roots_end(); RI != RE; ++RI)
+ RI->StackOffset = TRI->getFrameIndexOffset(MF, RI->Num);
+}
+
+bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ // Quick exit for functions that do not use GC.
+ if (!MF.getFunction()->hasGC())
+ return false;
+
+ FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+ if (!FI->getStrategy().needsSafePoints())
+ return false;
+
+ TM = &MF.getTarget();
+ MMI = &getAnalysis<MachineModuleInfo>();
+ TII = TM->getInstrInfo();
+
+ // Find the size of the stack frame.
+ FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+
+ // Find all safe points.
+ FindSafePoints(MF);
+
+ // Find the stack offsets for all roots.
+ FindStackOffsets(MF);
+
+ return false;
+}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 0000000..1d0887f
--- /dev/null
+++ b/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1229 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ifcvt"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+ cl::init(false), cl::Hidden);
+
+STATISTIC(NumSimple, "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
+STATISTIC(NumDupBBs, "Number of duplicated blocks");
+
+namespace {
+ class VISIBILITY_HIDDEN IfConverter : public MachineFunctionPass {
+ enum IfcvtKind {
+ ICNotClassfied, // BB data valid, but not classified.
+ ICSimpleFalse, // Same as ICSimple, but on the false path.
+ ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
+ ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
+ ICTriangleRev, // Same as ICTriangle, but true path rev condition.
+ ICTriangleFalse, // Same as ICTriangle, but on the false path.
+ ICTriangle, // BB is entry of a triangle sub-CFG.
+ ICDiamond // BB is entry of a diamond sub-CFG.
+ };
+
+ /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+ /// if-conversion feasibility analysis. This includes results from
+ /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+ /// classification, and common tail block of its successors (if it's a
+ /// diamond shape), its size, whether it's predicable, and whether any
+ /// instruction can clobber the 'would-be' predicate.
+ ///
+ /// IsDone - True if BB is not to be considered for ifcvt.
+ /// IsBeingAnalyzed - True if BB is currently being analyzed.
+ /// IsAnalyzed - True if BB has been analyzed (info is still valid).
+ /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
+ /// IsBrAnalyzable - True if AnalyzeBranch() returns false.
+ /// HasFallThrough - True if BB may fallthrough to the following BB.
+ /// IsUnpredicable - True if BB is known to be unpredicable.
+ /// ClobbersPred - True if BB could modify predicates (e.g. has
+ /// cmp, call, etc.)
+ /// NonPredSize - Number of non-predicated instructions.
+ /// BB - Corresponding MachineBasicBlock.
+ /// TrueBB / FalseBB- See AnalyzeBranch().
+ /// BrCond - Conditions for end of block conditional branches.
+ /// Predicate - Predicate used in the BB.
+ struct BBInfo {
+ bool IsDone : 1;
+ bool IsBeingAnalyzed : 1;
+ bool IsAnalyzed : 1;
+ bool IsEnqueued : 1;
+ bool IsBrAnalyzable : 1;
+ bool HasFallThrough : 1;
+ bool IsUnpredicable : 1;
+ bool CannotBeCopied : 1;
+ bool ClobbersPred : 1;
+ unsigned NonPredSize;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *TrueBB;
+ MachineBasicBlock *FalseBB;
+ SmallVector<MachineOperand, 4> BrCond;
+ SmallVector<MachineOperand, 4> Predicate;
+ BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+ IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+ HasFallThrough(false), IsUnpredicable(false),
+ CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+ BB(0), TrueBB(0), FalseBB(0) {}
+ };
+
+ /// IfcvtToken - Record information about pending if-conversions to attemp:
+ /// BBI - Corresponding BBInfo.
+ /// Kind - Type of block. See IfcvtKind.
+ /// NeedSubsumption - True if the to-be-predicated BB has already been
+ /// predicated.
+ /// NumDups - Number of instructions that would be duplicated due
+ /// to this if-conversion. (For diamonds, the number of
+ /// identical instructions at the beginnings of both
+ /// paths).
+ /// NumDups2 - For diamonds, the number of identical instructions
+ /// at the ends of both paths.
+ struct IfcvtToken {
+ BBInfo &BBI;
+ IfcvtKind Kind;
+ bool NeedSubsumption;
+ unsigned NumDups;
+ unsigned NumDups2;
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+ : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+ };
+
+ /// Roots - Basic blocks that do not have successors. These are the starting
+ /// points of Graph traversal.
+ std::vector<MachineBasicBlock*> Roots;
+
+ /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+ /// basic block number.
+ std::vector<BBInfo> BBAnalysis;
+
+ const TargetLowering *TLI;
+ const TargetInstrInfo *TII;
+ bool MadeChange;
+ public:
+ static char ID;
+ IfConverter() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "If Converter"; }
+
+ private:
+ bool ReverseBranchCondition(BBInfo &BBI);
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const;
+ bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups) const;
+ bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const;
+ void ScanInstructions(BBInfo &BBI);
+ BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens);
+ bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
+ bool isTriangle = false, bool RevBranch = false);
+ bool AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens);
+ void InvalidatePreds(MachineBasicBlock *BB);
+ void RemoveExtraEdges(BBInfo &BBI);
+ bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2);
+ void PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond);
+ void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool IgnoreBr = false);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI);
+
+ bool MeetIfcvtSizeLimit(unsigned Size) const {
+ return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit();
+ }
+
+ // blockAlwaysFallThrough - Block ends without a terminator.
+ bool blockAlwaysFallThrough(BBInfo &BBI) const {
+ return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+ }
+
+ // IfcvtTokenCmp - Used to sort if-conversion candidates.
+ static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+ int Incr1 = (C1->Kind == ICDiamond)
+ ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+ int Incr2 = (C2->Kind == ICDiamond)
+ ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+ if (Incr1 > Incr2)
+ return true;
+ else if (Incr1 == Incr2) {
+ // Favors subsumption.
+ if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+ return true;
+ else if (C1->NeedSubsumption == C2->NeedSubsumption) {
+ // Favors diamond over triangle, etc.
+ if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+ return true;
+ else if (C1->Kind == C2->Kind)
+ return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+ }
+ }
+ return false;
+ }
+ };
+
+ char IfConverter::ID = 0;
+}
+
+static RegisterPass<IfConverter>
+X("if-converter", "If Converter");
+
+FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+ if (!TII) return false;
+
+ static int FnNum = -1;
+ DOUT << "\nIfcvt: function (" << ++FnNum << ") \'"
+ << MF.getFunction()->getName() << "\'";
+
+ if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+ DOUT << " skipped\n";
+ return false;
+ }
+ DOUT << "\n";
+
+ MF.RenumberBlocks();
+ BBAnalysis.resize(MF.getNumBlockIDs());
+
+ // Look for root nodes, i.e. blocks without successors.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ if (I->succ_empty())
+ Roots.push_back(I);
+
+ std::vector<IfcvtToken*> Tokens;
+ MadeChange = false;
+ unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+ NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+ // Do an initial analysis for each basic block and find all the potential
+ // candidates to perform if-conversion.
+ bool Change = AnalyzeBlocks(MF, Tokens);
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ BBInfo &BBI = Token->BBI;
+ IfcvtKind Kind = Token->Kind;
+ unsigned NumDups = Token->NumDups;
+ unsigned NumDups2 = Token->NumDups2;
+
+ delete Token;
+
+ // If the block has been evicted out of the queue or it has already been
+ // marked dead (due to it being predicated), then skip it.
+ if (BBI.IsDone)
+ BBI.IsEnqueued = false;
+ if (!BBI.IsEnqueued)
+ continue;
+
+ BBI.IsEnqueued = false;
+
+ bool RetVal = false;
+ switch (Kind) {
+ default: assert(false && "Unexpected!");
+ break;
+ case ICSimple:
+ case ICSimpleFalse: {
+ bool isFalse = Kind == ICSimpleFalse;
+ if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+ DOUT << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
+ << "): BB#" << BBI.BB->getNumber() << " ("
+ << ((Kind == ICSimpleFalse)
+ ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber()) << ") ";
+ RetVal = IfConvertSimple(BBI, Kind);
+ DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+ if (RetVal) {
+ if (isFalse) NumSimpleFalse++;
+ else NumSimple++;
+ }
+ break;
+ }
+ case ICTriangle:
+ case ICTriangleRev:
+ case ICTriangleFalse:
+ case ICTriangleFRev: {
+ bool isFalse = Kind == ICTriangleFalse;
+ bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+ if (DisableTriangle && !isFalse && !isRev) break;
+ if (DisableTriangleR && !isFalse && isRev) break;
+ if (DisableTriangleF && isFalse && !isRev) break;
+ if (DisableTriangleFR && isFalse && isRev) break;
+ DOUT << "Ifcvt (Triangle";
+ if (isFalse)
+ DOUT << " false";
+ if (isRev)
+ DOUT << " rev";
+ DOUT << "): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ";
+ RetVal = IfConvertTriangle(BBI, Kind);
+ DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+ if (RetVal) {
+ if (isFalse) {
+ if (isRev) NumTriangleFRev++;
+ else NumTriangleFalse++;
+ } else {
+ if (isRev) NumTriangleRev++;
+ else NumTriangle++;
+ }
+ }
+ break;
+ }
+ case ICDiamond: {
+ if (DisableDiamond) break;
+ DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ";
+ RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+ DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+ if (RetVal) NumDiamonds++;
+ break;
+ }
+ }
+
+ Change |= RetVal;
+
+ NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+ NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+ break;
+ }
+
+ if (!Change)
+ break;
+ MadeChange |= Change;
+ }
+
+ // Delete tokens in case of early exit.
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ delete Token;
+ }
+
+ Tokens.clear();
+ Roots.clear();
+ BBAnalysis.clear();
+
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branch. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+ if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+ TII->RemoveBranch(*BBI.BB);
+ TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond);
+ std::swap(BBI.TrueBB, BBI.FalseBB);
+ return true;
+ }
+ return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ if (++I == E)
+ return NULL;
+ return I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.IsBrAnalyzable)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied ||
+ TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit())
+ return false;
+ Dups = TrueBBI.NonPredSize;
+ }
+
+ return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the false branch rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied)
+ return false;
+
+ unsigned Size = TrueBBI.NonPredSize;
+ if (TrueBBI.IsBrAnalyzable) {
+ if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
+ // Ends with an unconditional branch. It will be removed.
+ --Size;
+ else {
+ MachineBasicBlock *FExit = FalseBranch
+ ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+ if (FExit)
+ // Require a conditional branch
+ ++Size;
+ }
+ }
+ if (Size > TLI->getIfCvtDupBlockSizeLimit())
+ return false;
+ Dups = Size;
+ }
+
+ MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+ if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+ MachineFunction::iterator I = TrueBBI.BB;
+ if (++I == TrueBBI.BB->getParent()->end())
+ return false;
+ TExit = I;
+ }
+ return TExit && TExit == FalseBBI.BB;
+}
+
+static
+MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB,
+ const TargetInstrInfo *TII) {
+ MachineBasicBlock::iterator I = BB->end();
+ while (I != BB->begin()) {
+ --I;
+ if (!I->getDesc().isBranch())
+ break;
+ }
+ return I;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+ if (!TT && blockAlwaysFallThrough(TrueBBI))
+ TT = getNextBlock(TrueBBI.BB);
+ if (!FT && blockAlwaysFallThrough(FalseBBI))
+ FT = getNextBlock(FalseBBI.BB);
+ if (TT != FT)
+ return false;
+ if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ return false;
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // FIXME: Allow true block to have an early exit?
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+ (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+ return false;
+
+ MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
+ while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) {
+ if (!TI->isIdenticalTo(FI))
+ break;
+ ++Dups1;
+ ++TI;
+ ++FI;
+ }
+
+ TI = firstNonBranchInst(TrueBBI.BB, TII);
+ FI = firstNonBranchInst(FalseBBI.BB, TII);
+ while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) {
+ if (!TI->isIdenticalTo(FI))
+ break;
+ ++Dups2;
+ --TI;
+ --FI;
+ }
+
+ return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+ if (BBI.IsDone)
+ return;
+
+ bool AlreadyPredicated = BBI.Predicate.size() > 0;
+ // First analyze the end of BB branches.
+ BBI.TrueBB = BBI.FalseBB = NULL;
+ BBI.BrCond.clear();
+ BBI.IsBrAnalyzable =
+ !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+
+ if (BBI.BrCond.size()) {
+ // No false branch. This BB must end with a conditional branch and a
+ // fallthrough.
+ if (!BBI.FalseBB)
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ assert(BBI.FalseBB && "Expected to find the fallthrough block!");
+ }
+
+ // Then scan all the instructions.
+ BBI.NonPredSize = 0;
+ BBI.ClobbersPred = false;
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
+ I != E; ++I) {
+ const TargetInstrDesc &TID = I->getDesc();
+ if (TID.isNotDuplicable())
+ BBI.CannotBeCopied = true;
+
+ bool isPredicated = TII->isPredicated(I);
+ bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
+
+ if (!isCondBr) {
+ if (!isPredicated)
+ BBI.NonPredSize++;
+ else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ if (BBI.ClobbersPred && !isPredicated) {
+ // Predicate modification instruction should end the block (except for
+ // already predicated instructions and end of block branches).
+ if (isCondBr) {
+ // A conditional branch is not predicable, but it may be eliminated.
+ continue;
+ }
+
+ // Predicate may have been modified, the subsequent (currently)
+ // unpredicated instructions cannot be correctly predicated.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+ // still potentially predicable.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->DefinesPredicate(I, PredDefs))
+ BBI.ClobbersPred = true;
+
+ if (!TID.isPredicable()) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+ SmallVectorImpl<MachineOperand> &Pred,
+ bool isTriangle, bool RevBranch) {
+ // If the block is dead or unpredicable, then it cannot be predicated.
+ if (BBI.IsDone || BBI.IsUnpredicable)
+ return false;
+
+ // If it is already predicated, check if its predicate subsumes the new
+ // predicate.
+ if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+ return false;
+
+ if (BBI.BrCond.size()) {
+ if (!isTriangle)
+ return false;
+
+ // Test predicate subsumption.
+ SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (RevBranch) {
+ if (TII->ReverseBranchCondition(Cond))
+ return false;
+ }
+ if (TII->ReverseBranchCondition(RevPred) ||
+ !TII->SubsumesPredicate(Cond, RevPred))
+ return false;
+ }
+
+ return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens) {
+ BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+ if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
+ return BBI;
+
+ BBI.BB = BB;
+ BBI.IsBeingAnalyzed = true;
+
+ ScanInstructions(BBI);
+
+ // Unanalyzable or ends with fallthrough or unconditional branch.
+ if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if either path is a back edge to the entry block.
+ if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens);
+ BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+
+ if (TrueBBI.IsDone && FalseBBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+ unsigned Dups = 0;
+ unsigned Dups2 = 0;
+ bool TNeedSub = TrueBBI.Predicate.size() > 0;
+ bool FNeedSub = FalseBBI.Predicate.size() > 0;
+ bool Enqueued = false;
+ if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+ Dups2));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+ // Triangle:
+ // EBB
+ // | \_
+ // | |
+ // | TBB
+ // | /
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(TrueBBI, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+ // Simple (split, no rejoin):
+ // EBB
+ // | \_
+ // | |
+ // | TBB---> exit
+ // |
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (CanRevCond) {
+ // Try the other path...
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(FalseBBI, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+ }
+
+ BBI.IsEnqueued = Enqueued;
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates. It returns true if any CFG restructuring is done to expose more
+/// if-conversion opportunities.
+bool IfConverter::AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens) {
+ bool Change = false;
+ std::set<MachineBasicBlock*> Visited;
+ for (unsigned i = 0, e = Roots.size(); i != e; ++i) {
+ for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited),
+ E = idf_ext_end(Roots[i], Visited); I != E; ++I) {
+ MachineBasicBlock *BB = *I;
+ AnalyzeBlock(BB, Tokens);
+ }
+ }
+
+ // Sort to favor more complex ifcvt scheme.
+ std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+
+ return Change;
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator TI = ToBB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ while (++I != TI)
+ if (I == E || !I->empty())
+ return false;
+ return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+ if (PBBI.IsDone || PBBI.BB == BB)
+ continue;
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+ const TargetInstrInfo *TII) {
+ SmallVector<MachineOperand, 0> NoCond;
+ TII->InsertBranch(*BB, ToBB, NULL, NoCond);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+ MachineBasicBlock *TBB = NULL, *FBB = NULL;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+ BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICSimpleFalse)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICSimpleFalse)
+ if (TII->ReverseBranchCondition(Cond))
+ assert(false && "Unable to reverse branch condition!");
+
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
+ } else {
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+
+ // Merge converted block into entry block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ bool IterIfcvt = true;
+ if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ // Now ifcvt'd block will look like this:
+ // BB:
+ // ...
+ // t, f = cmp
+ // if t op
+ // b BBf
+ //
+ // We cannot further ifcvt this block because the unconditional branch
+ // will have to be predicated on the new condition, that will not be
+ // available if cmp executes.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ if (TII->ReverseBranchCondition(Cond))
+ assert(false && "Unable to reverse branch condition!");
+
+ if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+ if (ReverseBranchCondition(*CvtBBI)) {
+ // BB has been changed, modify its predecessors (except for this
+ // one) so they don't get ifcvt'ed based on bad intel.
+ for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+ E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PBB = *PI;
+ if (PBB == BBI.BB)
+ continue;
+ BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+ if (PBBI.IsEnqueued) {
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+ }
+ }
+ }
+
+ bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+ bool DupBB = CvtBBI->BB->pred_size() > 1;
+ if (DupBB) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
+ } else {
+ // Predicate the 'true' block after removing its branch.
+ CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+
+ // Now merge the entry of the triangle with the true block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ // If 'true' block has a 'false' successor, add an exit branch to it.
+ if (HasEarlyExit) {
+ SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
+ CvtBBI->BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ assert(false && "Unable to reverse branch condition!");
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB);
+ }
+
+ // Merge in the 'false' block if the 'false' block has no other
+ // predecessors. Otherwise, add an unconditional branch to 'false'.
+ bool FalseBBDead = false;
+ bool IterIfcvt = true;
+ bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+ if (!isFallThrough) {
+ // Only merge them if the true block does not fallthrough to the false
+ // block. By not merging them, we make it possible to iteratively
+ // ifcvt the blocks.
+ if (!HasEarlyExit &&
+ NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+ MergeBlocks(BBI, *NextBBI);
+ FalseBBDead = true;
+ } else {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ }
+ // Mixed predicated and unpredicated code. This cannot be iteratively
+ // predicated.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+ if (FalseBBDead)
+ NextBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+ // True block must fall through or end with an unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (TrueBBI.IsDone || FalseBBI.IsDone ||
+ TrueBBI.BB->pred_size() > 1 ||
+ FalseBBI.BB->pred_size() > 1) {
+ // Something has changed. It's no longer safe to predicate these blocks.
+ BBI.IsAnalyzed = false;
+ TrueBBI.IsAnalyzed = false;
+ FalseBBI.IsAnalyzed = false;
+ return false;
+ }
+
+ // Merge the 'true' and 'false' blocks by copying the instructions
+ // from the 'false' block to the 'true' block. That is, unless the true
+ // block would clobber the predicate, in that case, do the opposite.
+ BBInfo *BBI1 = &TrueBBI;
+ BBInfo *BBI2 = &FalseBBI;
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ assert(false && "Unable to reverse branch condition!");
+ SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
+ SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
+
+ // Figure out the more profitable ordering.
+ bool DoSwap = false;
+ if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+ DoSwap = true;
+ else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+ if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+ DoSwap = true;
+ }
+ if (DoSwap) {
+ std::swap(BBI1, BBI2);
+ std::swap(Cond1, Cond2);
+ }
+
+ // Remove the conditional branch from entry to the blocks.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+ // Remove the duplicated instructions at the beginnings of both paths.
+ MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
+ MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+ BBI1->NonPredSize -= NumDups1;
+ BBI2->NonPredSize -= NumDups1;
+ while (NumDups1 != 0) {
+ ++DI1;
+ ++DI2;
+ --NumDups1;
+ }
+ BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+ BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+ // Predicate the 'true' block after removing its branch.
+ BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ DI1 = BBI1->BB->end();
+ for (unsigned i = 0; i != NumDups2; ++i)
+ --DI1;
+ BBI1->BB->erase(DI1, BBI1->BB->end());
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1);
+
+ // Predicate the 'false' block.
+ BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+ DI2 = BBI2->BB->end();
+ while (NumDups2 != 0) {
+ --DI2;
+ --NumDups2;
+ }
+ PredicateBlock(*BBI2, DI2, *Cond2);
+
+ // Merge the true block into the entry of the diamond.
+ MergeBlocks(BBI, *BBI1);
+ MergeBlocks(BBI, *BBI2);
+
+ // If the if-converted block falls through or unconditionally branches into
+ // the tail block, and the tail block does not have other predecessors, then
+ // fold the tail block in as well. Otherwise, unless it falls through to the
+ // tail, add a unconditional branch to it.
+ if (TailBB) {
+ BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+ if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, TailBBI);
+ TailBBI.IsDone = true;
+ } else {
+ InsertUncondBranch(BBI.BB, TailBB, TII);
+ BBI.HasFallThrough = false;
+ }
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond) {
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
+ if (TII->isPredicated(I))
+ continue;
+ if (!TII->PredicateInstruction(I, Cond)) {
+ cerr << "Unable to predicate " << *I << "!\n";
+ abort();
+ }
+ }
+
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+
+ BBI.IsAnalyzed = false;
+ BBI.NonPredSize = 0;
+
+ NumIfConvBBs++;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool IgnoreBr) {
+ MachineFunction &MF = *ToBBI.BB->getParent();
+
+ for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
+ E = FromBBI.BB->end(); I != E; ++I) {
+ const TargetInstrDesc &TID = I->getDesc();
+ bool isPredicated = TII->isPredicated(I);
+ // Do not copy the end of the block branches.
+ if (IgnoreBr && !isPredicated && TID.isBranch())
+ break;
+
+ MachineInstr *MI = MF.CloneMachineInstr(I);
+ ToBBI.BB->insert(ToBBI.BB->end(), MI);
+ ToBBI.NonPredSize++;
+
+ if (!isPredicated)
+ if (!TII->PredicateInstruction(MI, Cond)) {
+ cerr << "Unable to predicate " << *MI << "!\n";
+ abort();
+ }
+ }
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.IsAnalyzed = false;
+
+ NumDupBBs++;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+///
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) {
+ ToBBI.BB->splice(ToBBI.BB->end(),
+ FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+
+ // Redirect all branches to FromBB to ToBB.
+ std::vector<MachineBasicBlock *> Preds(FromBBI.BB->pred_begin(),
+ FromBBI.BB->pred_end());
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ MachineBasicBlock *Pred = Preds[i];
+ if (Pred == ToBBI.BB)
+ continue;
+ Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB);
+ }
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ FromBBI.BB->removeSuccessor(Succ);
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ // Now FromBBI always falls through to the next block!
+ if (NBB && !FromBBI.BB->isSuccessor(NBB))
+ FromBBI.BB->addSuccessor(NBB);
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ FromBBI.Predicate.clear();
+
+ ToBBI.NonPredSize += FromBBI.NonPredSize;
+ FromBBI.NonPredSize = 0;
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+ ToBBI.IsAnalyzed = false;
+ FromBBI.IsAnalyzed = false;
+}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 0000000..e6912b8
--- /dev/null
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,892 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ const Type *RetTy) {
+ // Insert a correctly-typed definition now.
+ std::vector<const Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back(I->getType());
+ M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+ const char *FName,
+ const char *DName, const char *LDName) {
+ // Insert definitions for all the floating point types.
+ switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+ Type::FloatTy);
+ break;
+ case Type::DoubleTyID:
+ EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+ Type::DoubleTy);
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
+ Fn->arg_begin()->getType());
+ break;
+ }
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function. This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ const Type *RetTy, Constant *&FCache) {
+ if (!FCache) {
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = CI->getParent()->getParent()->getParent();
+ // Get or insert the definition now.
+ std::vector<const Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back((*I)->getType());
+ FCache = M->getOrInsertFunction(NewFn,
+ FunctionType::get(RetTy, ParamTys, false));
+ }
+
+ IRBuilder<> Builder(CI->getParent(), CI);
+ SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
+ CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end());
+ NewCI->setName(CI->getName());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
+}
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->isDeclaration() && !I->use_empty())
+ switch (I->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::setjmp:
+ EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+ Type::Int32Ty);
+ break;
+ case Intrinsic::longjmp:
+ EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+ Type::VoidTy);
+ break;
+ case Intrinsic::siglongjmp:
+ EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+ Type::VoidTy);
+ break;
+ case Intrinsic::memcpy:
+ M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ TD.getIntPtrType(), (Type *)0);
+ break;
+ case Intrinsic::memmove:
+ M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ TD.getIntPtrType(), (Type *)0);
+ break;
+ case Intrinsic::memset:
+ M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty),
+ PointerType::getUnqual(Type::Int8Ty),
+ Type::Int32Ty,
+ TD.getIntPtrType(), (Type *)0);
+ break;
+ case Intrinsic::sqrt:
+ EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+ break;
+ case Intrinsic::sin:
+ EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+ break;
+ case Intrinsic::cos:
+ EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+ break;
+ case Intrinsic::pow:
+ EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+ break;
+ case Intrinsic::log:
+ EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+ break;
+ case Intrinsic::log2:
+ EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+ break;
+ case Intrinsic::log10:
+ EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+ break;
+ case Intrinsic::exp:
+ EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+ break;
+ case Intrinsic::exp2:
+ EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+ break;
+ }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(Value *V, Instruction *IP) {
+ assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ switch(BitSize) {
+ default: assert(0 && "Unhandled type size of value to byteswap!");
+ case 16: {
+ Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.1");
+ V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
+ break;
+ }
+ case 32: {
+ Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.4");
+ Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.3");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24),
+ "bswap.1");
+ Tmp3 = Builder.CreateAnd(Tmp3, ConstantInt::get(Type::Int32Ty, 0xFF0000),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2, ConstantInt::get(Type::Int32Ty, 0xFF00),
+ "bswap.and2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
+ V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
+ break;
+ }
+ case 64: {
+ Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
+ "bswap.8");
+ Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
+ "bswap.7");
+ Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.6");
+ Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.5");
+ Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.4");
+ Value* Tmp3 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24),
+ "bswap.3");
+ Value* Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 40),
+ "bswap.2");
+ Value* Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 56),
+ "bswap.1");
+ Tmp7 = Builder.CreateAnd(Tmp7,
+ ConstantInt::get(Type::Int64Ty,
+ 0xFF000000000000ULL),
+ "bswap.and7");
+ Tmp6 = Builder.CreateAnd(Tmp6,
+ ConstantInt::get(Type::Int64Ty,
+ 0xFF0000000000ULL),
+ "bswap.and6");
+ Tmp5 = Builder.CreateAnd(Tmp5,
+ ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
+ "bswap.and5");
+ Tmp4 = Builder.CreateAnd(Tmp4,
+ ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
+ "bswap.and4");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
+ "bswap.and2");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
+ Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
+ V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
+ break;
+ }
+ }
+ return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(Value *V, Instruction *IP) {
+ assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
+
+ static const uint64_t MaskValues[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ unsigned WordSize = (BitSize + 63) / 64;
+ Value *Count = ConstantInt::get(V->getType(), 0);
+
+ for (unsigned n = 0; n < WordSize; ++n) {
+ Value *PartValue = V;
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ i <<= 1, ++ct) {
+ Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+ Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
+ Value *VShift = Builder.CreateLShr(PartValue,
+ ConstantInt::get(V->getType(), i),
+ "ctpop.sh");
+ Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
+ PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
+ }
+ Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
+ if (BitSize > 64) {
+ V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
+ "ctpop.part.sh");
+ BitSize -= 64;
+ }
+ }
+
+ return Count;
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(Value *V, Instruction *IP) {
+
+ IRBuilder<> Builder(IP->getParent(), IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ for (unsigned i = 1; i < BitSize; i <<= 1) {
+ Value *ShVal = ConstantInt::get(V->getType(), i);
+ ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
+ V = Builder.CreateOr(V, ShVal, "ctlz.step");
+ }
+
+ V = Builder.CreateNot(V);
+ return LowerCTPOP(V, IP);
+}
+
+/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes
+/// three integer arguments. The first argument is the Value from which the
+/// bits will be selected. It may be of any bit width. The second and third
+/// arguments specify a range of bits to select with the second argument
+/// specifying the low bit and the third argument specifying the high bit. Both
+/// must be type i32. The result is the corresponding selected bits from the
+/// Value in the same width as the Value (first argument). If the low bit index
+/// is higher than the high bit index then the inverse selection is done and
+/// the bits are returned in inverse order.
+/// @brief Lowering of llvm.part.select intrinsic.
+static Instruction *LowerPartSelect(CallInst *CI) {
+ IRBuilder<> Builder;
+
+ // Make sure we're dealing with a part select intrinsic here
+ Function *F = CI->getCalledFunction();
+ const FunctionType *FT = F->getFunctionType();
+ if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
+ FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() ||
+ !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger())
+ return CI;
+
+ // Get the intrinsic implementation function by converting all the . to _
+ // in the intrinsic's function name and then reconstructing the function
+ // declaration.
+ std::string Name(F->getName());
+ for (unsigned i = 4; i < Name.length(); ++i)
+ if (Name[i] == '.')
+ Name[i] = '_';
+ Module* M = F->getParent();
+ F = cast<Function>(M->getOrInsertFunction(Name, FT));
+ F->setLinkage(GlobalValue::WeakAnyLinkage);
+
+ // If we haven't defined the impl function yet, do so now
+ if (F->isDeclaration()) {
+
+ // Get the arguments to the function
+ Function::arg_iterator args = F->arg_begin();
+ Value* Val = args++; Val->setName("Val");
+ Value* Lo = args++; Lo->setName("Lo");
+ Value* Hi = args++; Hi->setName("High");
+
+ // We want to select a range of bits here such that [Hi, Lo] is shifted
+ // down to the low bits. However, it is quite possible that Hi is smaller
+ // than Lo in which case the bits have to be reversed.
+
+ // Create the blocks we will need for the two cases (forward, reverse)
+ BasicBlock* CurBB = BasicBlock::Create("entry", F);
+ BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent());
+ BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent());
+ BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent());
+ BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent());
+ BasicBlock *RsltBlk = BasicBlock::Create("result", CurBB->getParent());
+
+ Builder.SetInsertPoint(CurBB);
+
+ // Cast Hi and Lo to the size of Val so the widths are all the same
+ if (Hi->getType() != Val->getType())
+ Hi = Builder.CreateIntCast(Hi, Val->getType(), /* isSigned */ false,
+ "tmp");
+ if (Lo->getType() != Val->getType())
+ Lo = Builder.CreateIntCast(Lo, Val->getType(), /* isSigned */ false,
+ "tmp");
+
+ // Compute a few things that both cases will need, up front.
+ Constant* Zero = ConstantInt::get(Val->getType(), 0);
+ Constant* One = ConstantInt::get(Val->getType(), 1);
+ Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
+
+ // Compare the Hi and Lo bit positions. This is used to determine
+ // which case we have (forward or reverse)
+ Value *Cmp = Builder.CreateICmpULT(Hi, Lo, "less");
+ Builder.CreateCondBr(Cmp, RevSize, FwdSize);
+
+ // First, compute the number of bits in the forward case.
+ Builder.SetInsertPoint(FwdSize);
+ Value* FBitSize = Builder.CreateSub(Hi, Lo, "fbits");
+ Builder.CreateBr(Compute);
+
+ // Second, compute the number of bits in the reverse case.
+ Builder.SetInsertPoint(RevSize);
+ Value* RBitSize = Builder.CreateSub(Lo, Hi, "rbits");
+ Builder.CreateBr(Compute);
+
+ // Now, compute the bit range. Start by getting the bitsize and the shift
+ // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for
+ // the number of bits we want in the range. We shift the bits down to the
+ // least significant bits, apply the mask to zero out unwanted high bits,
+ // and we have computed the "forward" result. It may still need to be
+ // reversed.
+ Builder.SetInsertPoint(Compute);
+
+ // Get the BitSize from one of the two subtractions
+ PHINode *BitSize = Builder.CreatePHI(Val->getType(), "bits");
+ BitSize->reserveOperandSpace(2);
+ BitSize->addIncoming(FBitSize, FwdSize);
+ BitSize->addIncoming(RBitSize, RevSize);
+
+ // Get the ShiftAmount as the smaller of Hi/Lo
+ PHINode *ShiftAmt = Builder.CreatePHI(Val->getType(), "shiftamt");
+ ShiftAmt->reserveOperandSpace(2);
+ ShiftAmt->addIncoming(Lo, FwdSize);
+ ShiftAmt->addIncoming(Hi, RevSize);
+
+ // Increment the bit size
+ Value *BitSizePlusOne = Builder.CreateAdd(BitSize, One, "bits");
+
+ // Create a Mask to zero out the high order bits.
+ Value* Mask = Builder.CreateShl(AllOnes, BitSizePlusOne, "mask");
+ Mask = Builder.CreateNot(Mask, "mask");
+
+ // Shift the bits down and apply the mask
+ Value* FRes = Builder.CreateLShr(Val, ShiftAmt, "fres");
+ FRes = Builder.CreateAnd(FRes, Mask, "fres");
+ Builder.CreateCondBr(Cmp, Reverse, RsltBlk);
+
+ // In the Reverse block we have the mask already in FRes but we must reverse
+ // it by shifting FRes bits right and putting them in RRes by shifting them
+ // in from left.
+ Builder.SetInsertPoint(Reverse);
+
+ // First set up our loop counters
+ PHINode *Count = Builder.CreatePHI(Val->getType(), "count");
+ Count->reserveOperandSpace(2);
+ Count->addIncoming(BitSizePlusOne, Compute);
+
+ // Next, get the value that we are shifting.
+ PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val");
+ BitsToShift->reserveOperandSpace(2);
+ BitsToShift->addIncoming(FRes, Compute);
+
+ // Finally, get the result of the last computation
+ PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres");
+ RRes->reserveOperandSpace(2);
+ RRes->addIncoming(Zero, Compute);
+
+ // Decrement the counter
+ Value *Decr = Builder.CreateSub(Count, One, "decr");
+ Count->addIncoming(Decr, Reverse);
+
+ // Compute the Bit that we want to move
+ Value *Bit = Builder.CreateAnd(BitsToShift, One, "bit");
+
+ // Compute the new value for next iteration.
+ Value *NewVal = Builder.CreateLShr(BitsToShift, One, "rshift");
+ BitsToShift->addIncoming(NewVal, Reverse);
+
+ // Shift the bit into the low bits of the result.
+ Value *NewRes = Builder.CreateShl(RRes, One, "lshift");
+ NewRes = Builder.CreateOr(NewRes, Bit, "addbit");
+ RRes->addIncoming(NewRes, Reverse);
+
+ // Terminate loop if we've moved all the bits.
+ Value *Cond = Builder.CreateICmpEQ(Decr, Zero, "cond");
+ Builder.CreateCondBr(Cond, RsltBlk, Reverse);
+
+ // Finally, in the result block, select one of the two results with a PHI
+ // node and return the result;
+ Builder.SetInsertPoint(RsltBlk);
+ PHINode *BitSelect = Builder.CreatePHI(Val->getType(), "part_select");
+ BitSelect->reserveOperandSpace(2);
+ BitSelect->addIncoming(FRes, Compute);
+ BitSelect->addIncoming(NewRes, Reverse);
+ Builder.CreateRet(BitSelect);
+ }
+
+ // Return a call to the implementation function
+ Builder.SetInsertPoint(CI->getParent(), CI);
+ CallInst *NewCI = Builder.CreateCall3(F, CI->getOperand(1),
+ CI->getOperand(2), CI->getOperand(3));
+ NewCI->setName(CI->getName());
+ return NewCI;
+}
+
+/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes
+/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
+/// The first two arguments can be any bit width. The result is the same width
+/// as %Value. The operation replaces bits between %Low and %High with the value
+/// in %Replacement. If %Replacement is not the same width, it is truncated or
+/// zero extended as appropriate to fit the bits being replaced. If %Low is
+/// greater than %High then the inverse set of bits are replaced.
+/// @brief Lowering of llvm.bit.part.set intrinsic.
+static Instruction *LowerPartSet(CallInst *CI) {
+ IRBuilder<> Builder;
+
+ // Make sure we're dealing with a part select intrinsic here
+ Function *F = CI->getCalledFunction();
+ const FunctionType *FT = F->getFunctionType();
+ if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
+ FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() ||
+ !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() ||
+ !FT->getParamType(3)->isInteger())
+ return CI;
+
+ // Get the intrinsic implementation function by converting all the . to _
+ // in the intrinsic's function name and then reconstructing the function
+ // declaration.
+ std::string Name(F->getName());
+ for (unsigned i = 4; i < Name.length(); ++i)
+ if (Name[i] == '.')
+ Name[i] = '_';
+ Module* M = F->getParent();
+ F = cast<Function>(M->getOrInsertFunction(Name, FT));
+ F->setLinkage(GlobalValue::WeakAnyLinkage);
+
+ // If we haven't defined the impl function yet, do so now
+ if (F->isDeclaration()) {
+ // Get the arguments for the function.
+ Function::arg_iterator args = F->arg_begin();
+ Value* Val = args++; Val->setName("Val");
+ Value* Rep = args++; Rep->setName("Rep");
+ Value* Lo = args++; Lo->setName("Lo");
+ Value* Hi = args++; Hi->setName("Hi");
+
+ // Get some types we need
+ const IntegerType* ValTy = cast<IntegerType>(Val->getType());
+ const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
+ uint32_t RepBits = RepTy->getBitWidth();
+
+ // Constant Definitions
+ ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
+ ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
+ ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
+ ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
+ ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
+ ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
+ ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
+
+ // Basic blocks we fill in below.
+ BasicBlock* entry = BasicBlock::Create("entry", F, 0);
+ BasicBlock* large = BasicBlock::Create("large", F, 0);
+ BasicBlock* small = BasicBlock::Create("small", F, 0);
+ BasicBlock* reverse = BasicBlock::Create("reverse", F, 0);
+ BasicBlock* result = BasicBlock::Create("result", F, 0);
+
+ // BASIC BLOCK: entry
+ Builder.SetInsertPoint(entry);
+ // First, get the number of bits that we're placing as an i32
+ Value* is_forward = Builder.CreateICmpULT(Lo, Hi);
+ Value* Hi_pn = Builder.CreateSelect(is_forward, Hi, Lo);
+ Value* Lo_pn = Builder.CreateSelect(is_forward, Lo, Hi);
+ Value* NumBits = Builder.CreateSub(Hi_pn, Lo_pn);
+ NumBits = Builder.CreateAdd(NumBits, One);
+ // Now, convert Lo and Hi to ValTy bit width
+ Lo = Builder.CreateIntCast(Lo_pn, ValTy, /* isSigned */ false);
+ // Determine if the replacement bits are larger than the number of bits we
+ // are replacing and deal with it.
+ Value* is_large = Builder.CreateICmpULT(NumBits, RepBitWidth);
+ Builder.CreateCondBr(is_large, large, small);
+
+ // BASIC BLOCK: large
+ Builder.SetInsertPoint(large);
+ Value* MaskBits = Builder.CreateSub(RepBitWidth, NumBits);
+ MaskBits = Builder.CreateIntCast(MaskBits, RepMask->getType(),
+ /* isSigned */ false);
+ Value* Mask1 = Builder.CreateLShr(RepMask, MaskBits);
+ Value* Rep2 = Builder.CreateAnd(Mask1, Rep);
+ Builder.CreateBr(small);
+
+ // BASIC BLOCK: small
+ Builder.SetInsertPoint(small);
+ PHINode* Rep3 = Builder.CreatePHI(RepTy);
+ Rep3->reserveOperandSpace(2);
+ Rep3->addIncoming(Rep2, large);
+ Rep3->addIncoming(Rep, entry);
+ Value* Rep4 = Builder.CreateIntCast(Rep3, ValTy, /* isSigned */ false);
+ Builder.CreateCondBr(is_forward, result, reverse);
+
+ // BASIC BLOCK: reverse (reverses the bits of the replacement)
+ Builder.SetInsertPoint(reverse);
+ // Set up our loop counter as a PHI so we can decrement on each iteration.
+ // We will loop for the number of bits in the replacement value.
+ PHINode *Count = Builder.CreatePHI(Type::Int32Ty, "count");
+ Count->reserveOperandSpace(2);
+ Count->addIncoming(NumBits, small);
+
+ // Get the value that we are shifting bits out of as a PHI because
+ // we'll change this with each iteration.
+ PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val");
+ BitsToShift->reserveOperandSpace(2);
+ BitsToShift->addIncoming(Rep4, small);
+
+ // Get the result of the last computation or zero on first iteration
+ PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres");
+ RRes->reserveOperandSpace(2);
+ RRes->addIncoming(ValZero, small);
+
+ // Decrement the loop counter by one
+ Value *Decr = Builder.CreateSub(Count, One);
+ Count->addIncoming(Decr, reverse);
+
+ // Get the bit that we want to move into the result
+ Value *Bit = Builder.CreateAnd(BitsToShift, ValOne);
+
+ // Compute the new value of the bits to shift for the next iteration.
+ Value *NewVal = Builder.CreateLShr(BitsToShift, ValOne);
+ BitsToShift->addIncoming(NewVal, reverse);
+
+ // Shift the bit we extracted into the low bit of the result.
+ Value *NewRes = Builder.CreateShl(RRes, ValOne);
+ NewRes = Builder.CreateOr(NewRes, Bit);
+ RRes->addIncoming(NewRes, reverse);
+
+ // Terminate loop if we've moved all the bits.
+ Value *Cond = Builder.CreateICmpEQ(Decr, Zero);
+ Builder.CreateCondBr(Cond, result, reverse);
+
+ // BASIC BLOCK: result
+ Builder.SetInsertPoint(result);
+ PHINode *Rplcmnt = Builder.CreatePHI(Val->getType());
+ Rplcmnt->reserveOperandSpace(2);
+ Rplcmnt->addIncoming(NewRes, reverse);
+ Rplcmnt->addIncoming(Rep4, small);
+ Value* t0 = Builder.CreateIntCast(NumBits, ValTy, /* isSigned */ false);
+ Value* t1 = Builder.CreateShl(ValMask, Lo);
+ Value* t2 = Builder.CreateNot(t1);
+ Value* t3 = Builder.CreateShl(t1, t0);
+ Value* t4 = Builder.CreateOr(t2, t3);
+ Value* t5 = Builder.CreateAnd(t4, Val);
+ Value* t6 = Builder.CreateShl(Rplcmnt, Lo);
+ Value* Rslt = Builder.CreateOr(t5, t6, "part_set");
+ Builder.CreateRet(Rslt);
+ }
+
+ // Return a call to the implementation function
+ Builder.SetInsertPoint(CI->getParent(), CI);
+ CallInst *NewCI = Builder.CreateCall4(F, CI->getOperand(1),
+ CI->getOperand(2), CI->getOperand(3),
+ CI->getOperand(4));
+ NewCI->setName(CI->getName());
+ return NewCI;
+}
+
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, Constant *FCache,
+ Constant *DCache, Constant *LDCache,
+ const char *Fname, const char *Dname,
+ const char *LDname) {
+ switch (CI->getOperand(1)->getType()->getTypeID()) {
+ default: assert(0 && "Invalid type in intrinsic"); abort();
+ case Type::FloatTyID:
+ ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(),
+ Type::FloatTy, FCache);
+ break;
+ case Type::DoubleTyID:
+ ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(),
+ Type::DoubleTy, DCache);
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(),
+ CI->getOperand(1)->getType(), LDCache);
+ break;
+ }
+}
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+ IRBuilder<> Builder(CI->getParent(), CI);
+
+ Function *Callee = CI->getCalledFunction();
+ assert(Callee && "Cannot lower an indirect call!");
+
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ cerr << "Cannot lower a call to a non-intrinsic function '"
+ << Callee->getName() << "'!\n";
+ abort();
+ default:
+ cerr << "Error: Code generator does not support intrinsic function '"
+ << Callee->getName() << "'!\n";
+ abort();
+
+ // The setjmp/longjmp intrinsics should only exist in the code if it was
+ // never optimized (ie, right out of the CFE), or if it has been hacked on
+ // by the lowerinvoke pass. In both cases, the right thing to do is to
+ // convert the call to an explicit setjmp or longjmp call.
+ case Intrinsic::setjmp: {
+ static Constant *SetjmpFCache = 0;
+ Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
+ Type::Int32Ty, SetjmpFCache);
+ if (CI->getType() != Type::VoidTy)
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+ case Intrinsic::sigsetjmp:
+ if (CI->getType() != Type::VoidTy)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::longjmp: {
+ static Constant *LongjmpFCache = 0;
+ ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(),
+ Type::VoidTy, LongjmpFCache);
+ break;
+ }
+
+ case Intrinsic::siglongjmp: {
+ // Insert the call to abort
+ static Constant *AbortFCache = 0;
+ ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(),
+ Type::VoidTy, AbortFCache);
+ break;
+ }
+ case Intrinsic::ctpop:
+ CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::bswap:
+ CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::ctlz:
+ CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::cttz: {
+ // cttz(x) -> ctpop(~X & (X-1))
+ Value *Src = CI->getOperand(1);
+ Value *NotSrc = Builder.CreateNot(Src);
+ NotSrc->setName(Src->getName() + ".not");
+ Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+ SrcM1 = Builder.CreateSub(Src, SrcM1);
+ Src = LowerCTPOP(Builder.CreateAnd(NotSrc, SrcM1), CI);
+ CI->replaceAllUsesWith(Src);
+ break;
+ }
+
+ case Intrinsic::part_select:
+ CI->replaceAllUsesWith(LowerPartSelect(CI));
+ break;
+
+ case Intrinsic::part_set:
+ CI->replaceAllUsesWith(LowerPartSet(CI));
+ break;
+
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore: {
+ static bool Warned = false;
+ if (!Warned)
+ cerr << "WARNING: this target does not support the llvm.stack"
+ << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+ "save" : "restore") << " intrinsic.\n";
+ Warned = true;
+ if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+ }
+
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ cerr << "WARNING: this target does not support the llvm."
+ << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+ "return" : "frame") << "address intrinsic.\n";
+ CI->replaceAllUsesWith(ConstantPointerNull::get(
+ cast<PointerType>(CI->getType())));
+ break;
+
+ case Intrinsic::prefetch:
+ break; // Simply strip out prefetches on unsupported architectures
+
+ case Intrinsic::pcmarker:
+ break; // Simply strip out pcmarker on unsupported architectures
+ case Intrinsic::readcyclecounter: {
+ cerr << "WARNING: this target does not support the llvm.readcyclecoun"
+ << "ter intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
+ break;
+ }
+
+ case Intrinsic::dbg_stoppoint:
+ case Intrinsic::dbg_region_start:
+ case Intrinsic::dbg_region_end:
+ case Intrinsic::dbg_func_start:
+ case Intrinsic::dbg_declare:
+ break; // Simply strip out debugging intrinsics
+
+ case Intrinsic::eh_exception:
+ case Intrinsic::eh_selector_i32:
+ case Intrinsic::eh_selector_i64:
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::eh_typeid_for_i32:
+ case Intrinsic::eh_typeid_for_i64:
+ // Return something different to eh_selector.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+
+ case Intrinsic::var_annotation:
+ break; // Strip out annotate intrinsic
+
+ case Intrinsic::memcpy: {
+ static Constant *MemcpyFCache = 0;
+ const IntegerType *IntPtr = TD.getIntPtrType();
+ Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ Ops[1] = CI->getOperand(2);
+ Ops[2] = Size;
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+ MemcpyFCache);
+ break;
+ }
+ case Intrinsic::memmove: {
+ static Constant *MemmoveFCache = 0;
+ const IntegerType *IntPtr = TD.getIntPtrType();
+ Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ Ops[1] = CI->getOperand(2);
+ Ops[2] = Size;
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+ MemmoveFCache);
+ break;
+ }
+ case Intrinsic::memset: {
+ static Constant *MemsetFCache = 0;
+ const IntegerType *IntPtr = TD.getIntPtrType();
+ Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ // Extend the amount to i32.
+ Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::Int32Ty,
+ /* isSigned */ false);
+ Ops[2] = Size;
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+ MemsetFCache);
+ break;
+ }
+ case Intrinsic::sqrt: {
+ static Constant *sqrtFCache = 0;
+ static Constant *sqrtDCache = 0;
+ static Constant *sqrtLDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, sqrtFCache, sqrtDCache, sqrtLDCache,
+ "sqrtf", "sqrt", "sqrtl");
+ break;
+ }
+ case Intrinsic::log: {
+ static Constant *logFCache = 0;
+ static Constant *logDCache = 0;
+ static Constant *logLDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, logFCache, logDCache, logLDCache,
+ "logf", "log", "logl");
+ break;
+ }
+ case Intrinsic::log2: {
+ static Constant *log2FCache = 0;
+ static Constant *log2DCache = 0;
+ static Constant *log2LDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, log2FCache, log2DCache, log2LDCache,
+ "log2f", "log2", "log2l");
+ break;
+ }
+ case Intrinsic::log10: {
+ static Constant *log10FCache = 0;
+ static Constant *log10DCache = 0;
+ static Constant *log10LDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, log10FCache, log10DCache, log10LDCache,
+ "log10f", "log10", "log10l");
+ break;
+ }
+ case Intrinsic::exp: {
+ static Constant *expFCache = 0;
+ static Constant *expDCache = 0;
+ static Constant *expLDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, expFCache, expDCache, expLDCache,
+ "expf", "exp", "expl");
+ break;
+ }
+ case Intrinsic::exp2: {
+ static Constant *exp2FCache = 0;
+ static Constant *exp2DCache = 0;
+ static Constant *exp2LDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, exp2FCache, exp2DCache, exp2LDCache,
+ "exp2f", "exp2", "exp2l");
+ break;
+ }
+ case Intrinsic::pow: {
+ static Constant *powFCache = 0;
+ static Constant *powDCache = 0;
+ static Constant *powLDCache = 0;
+ ReplaceFPIntrinsicWithCall(CI, powFCache, powDCache, powLDCache,
+ "powf", "pow", "powl");
+ break;
+ }
+ case Intrinsic::flt_rounds:
+ // Lower to "round to the nearest"
+ if (CI->getType() != Type::VoidTy)
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 0000000..b3c60e6
--- /dev/null
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,289 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace llvm {
+ bool EnableFastISel;
+}
+
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintEmittedAsm("print-emitted-asm", cl::Hidden,
+ cl::desc("Dump emitter generated instructions as assembly"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+ cl::desc("Dump garbage collector data"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+
+// When this works it will be on by default.
+static cl::opt<bool>
+DisablePostRAScheduler("disable-post-RA-scheduler",
+ cl::desc("Disable scheduling after register allocation"),
+ cl::init(true));
+
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -fast.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the experimental \"fast\" instruction selector"));
+
+FileModel::Model
+LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel) {
+ // Add common CodeGen passes.
+ if (addCommonCodeGenPasses(PM, OptLevel))
+ return FileModel::Error;
+
+ // Fold redundant debug labels.
+ PM.add(createDebugLabelFoldingPass());
+
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createCodePlacementOptPass());
+
+ switch (FileType) {
+ default:
+ break;
+ case TargetMachine::AssemblyFile:
+ if (addAssemblyEmitter(PM, OptLevel, getAsmVerbosityDefault(), Out))
+ return FileModel::Error;
+ return FileModel::AsmFile;
+ case TargetMachine::ObjectFile:
+ if (getMachOWriterInfo())
+ return FileModel::MachOFile;
+ else if (getELFWriterInfo())
+ return FileModel::ElfFile;
+ }
+
+ return FileModel::Error;
+}
+
+/// addPassesToEmitFileFinish - If the passes to emit the specified file had to
+/// be split up (e.g., to add an object writer pass), this method can be used to
+/// finish up adding passes to emit the file, if necessary.
+bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
+ MachineCodeEmitter *MCE,
+ CodeGenOpt::Level OptLevel) {
+ if (MCE)
+ addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *MCE);
+
+ PM.add(createGCInfoDeleter());
+
+ // Delete machine code for this function
+ PM.add(createMachineCodeDeleter());
+
+ return false; // success!
+}
+
+/// addPassesToEmitFileFinish - If the passes to emit the specified file had to
+/// be split up (e.g., to add an object writer pass), this method can be used to
+/// finish up adding passes to emit the file, if necessary.
+bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM,
+ JITCodeEmitter *JCE,
+ CodeGenOpt::Level OptLevel) {
+ if (JCE)
+ addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *JCE);
+
+ PM.add(createGCInfoDeleter());
+
+ // Delete machine code for this function
+ PM.add(createMachineCodeDeleter());
+
+ return false; // success!
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted. This uses a MachineCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions. This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
+ MachineCodeEmitter &MCE,
+ CodeGenOpt::Level OptLevel) {
+ // Add common CodeGen passes.
+ if (addCommonCodeGenPasses(PM, OptLevel))
+ return true;
+
+ if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ addCodeEmitter(PM, OptLevel, PrintEmittedAsm, MCE);
+
+ PM.add(createGCInfoDeleter());
+
+ // Delete machine code for this function
+ PM.add(createMachineCodeDeleter());
+
+ return false; // success!
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted. This uses a MachineCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions. This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
+ JITCodeEmitter &JCE,
+ CodeGenOpt::Level OptLevel) {
+ // Add common CodeGen passes.
+ if (addCommonCodeGenPasses(PM, OptLevel))
+ return true;
+
+ if (addPreEmitPass(PM, OptLevel) && PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ addCodeEmitter(PM, OptLevel, PrintEmittedAsm, JCE);
+
+ PM.add(createGCInfoDeleter());
+
+ // Delete machine code for this function
+ PM.add(createMachineCodeDeleter());
+
+ return false; // success!
+}
+
+static void printAndVerify(PassManagerBase &PM,
+ bool allowDoubleDefs = false) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ if (VerifyMachineCode)
+ PM.add(createMachineVerifierPass(allowDoubleDefs));
+}
+
+/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
+/// emitting to assembly files or machine code output.
+///
+bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Standard LLVM-Level Passes.
+
+ // Run loop strength reduction before anything else.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ if (PrintLSR)
+ PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs()));
+ }
+
+ // Turn exception handling constructs into something the code generators can
+ // handle.
+ if (!getTargetAsmInfo()->doesSupportExceptionHandling())
+ PM.add(createLowerInvokePass(getTargetLowering()));
+ else
+ PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ PM.add(createStackProtectorPass(getTargetLowering()));
+
+ if (PrintISelInput)
+ PM.add(createPrintFunctionPass("\n\n"
+ "*** Final LLVM Code input to ISel ***\n",
+ &errs()));
+
+ // Standard Lower-Level Passes.
+
+ // Enable FastISel with -fast, but allow that to be overridden.
+ if (EnableFastISelOption == cl::BOU_TRUE ||
+ (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
+ EnableFastISel = true;
+
+ // Ask the target for an isel.
+ if (addInstSelector(PM, OptLevel))
+ return true;
+
+ // Print the instruction selected machine code...
+ printAndVerify(PM, /* allowDoubleDefs= */ true);
+
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createMachineLICMPass());
+ PM.add(createMachineSinkingPass());
+ printAndVerify(PM, /* allowDoubleDefs= */ true);
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc(PM, OptLevel))
+ printAndVerify(PM);
+
+ // Perform register allocation.
+ PM.add(createRegisterAllocator());
+
+ // Perform stack slot coloring.
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createStackSlotColoringPass(OptLevel >= CodeGenOpt::Aggressive));
+
+ printAndVerify(PM); // Print the register-allocated code
+
+ // Run post-ra passes.
+ if (addPostRegAlloc(PM, OptLevel))
+ printAndVerify(PM);
+
+ PM.add(createLowerSubregsPass());
+ printAndVerify(PM);
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ PM.add(createPrologEpilogCodeInserter());
+ printAndVerify(PM);
+
+ // Second pass scheduler.
+ if (OptLevel != CodeGenOpt::None && !DisablePostRAScheduler) {
+ PM.add(createPostRAScheduler());
+ printAndVerify(PM);
+ }
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+ printAndVerify(PM);
+ }
+
+ PM.add(createGCMachineCodeAnalysisPass());
+ printAndVerify(PM);
+
+ if (PrintGCInfo)
+ PM.add(createGCInfoPrinter(*cerr));
+
+ return false;
+}
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
new file mode 100644
index 0000000..2e7b89c
--- /dev/null
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -0,0 +1,114 @@
+//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LatencyPriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+
+ return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push_impl(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+
+ Queue.push(SU);
+}
+
+
+// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor. If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ AdjustPriorityOfUnscheduledPreds(I->getSUnit());
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 0000000..67120b8
--- /dev/null
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,853 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes. Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live interval for register v if there is no instruction with number j' > j
+// such that v is live at j' abd there is no instruction with number i' < i such
+// that v is live at i'. In this implementation intervals can have holes,
+// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each
+// individual range is represented as an instance of LiveRange, and the whole
+// interval is represented as an instance of LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <ostream>
+using namespace llvm;
+
+// An example for liveAt():
+//
+// this = [1,4), liveAt(0) will return false. The instruction defining this
+// spans slots [0,3]. The interval belongs to an spilled definition of the
+// variable it represents. This is because slot 1 is used (def slot) and spans
+// up to slot 3 (store slot).
+//
+bool LiveInterval::liveAt(unsigned I) const {
+ Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
+
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ return r->contains(I);
+}
+
+// liveBeforeAndAt - Check if the interval is live at the index and the index
+// just before it. If index is liveAt, check if it starts a new live range.
+// If it does, then check if the previous live range ends at index-1.
+bool LiveInterval::liveBeforeAndAt(unsigned I) const {
+ Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
+
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ if (!r->contains(I))
+ return false;
+ if (I != r->start)
+ return true;
+ // I is the start of a live range. Check if the previous live range ends
+ // at I-1.
+ if (r == ranges.begin())
+ return false;
+ return r->end == I;
+}
+
+// overlaps - Return true if the intersection of the two live intervals is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live intervals should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveInterval::overlapsFrom(const LiveInterval& other,
+ const_iterator StartPos) const {
+ const_iterator i = begin();
+ const_iterator ie = end();
+ const_iterator j = StartPos;
+ const_iterator je = other.end();
+
+ assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+ StartPos != other.end() && "Bogus start position hint!");
+
+ if (i->start < j->start) {
+ i = std::upper_bound(i, ie, j->start);
+ if (i != ranges.begin()) --i;
+ } else if (j->start < i->start) {
+ ++StartPos;
+ if (StartPos != other.end() && StartPos->start <= i->start) {
+ assert(StartPos < other.end() && i < end());
+ j = std::upper_bound(j, je, i->start);
+ if (j != other.ranges.begin()) --j;
+ }
+ } else {
+ return true;
+ }
+
+ if (j == je) return false;
+
+ while (i != ie) {
+ if (i->start > j->start) {
+ std::swap(i, j);
+ std::swap(ie, je);
+ }
+
+ if (i->end > j->start)
+ return true;
+ ++i;
+ }
+
+ return false;
+}
+
+/// overlaps - Return true if the live interval overlaps a range specified
+/// by [Start, End).
+bool LiveInterval::overlaps(unsigned Start, unsigned End) const {
+ assert(Start < End && "Invalid range");
+ const_iterator I = begin();
+ const_iterator E = end();
+ const_iterator si = std::upper_bound(I, E, Start);
+ const_iterator ei = std::upper_bound(I, E, End);
+ if (si != ei)
+ return true;
+ if (si == I)
+ return false;
+ --si;
+ return si->contains(Start);
+}
+
+/// extendIntervalEndTo - This method is used when we want to extend the range
+/// specified by I to end at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with. The iterator is
+/// not invalidated.
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+ unsigned OldEnd = I->end;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = next(I);
+ for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ }
+
+ // If NewEnd was in the middle of an interval, make sure to get its endpoint.
+ I->end = std::max(NewEnd, prior(MergeTo)->end);
+
+ // Erase any dead ranges.
+ ranges.erase(next(I), MergeTo);
+
+ // Update kill info.
+ removeKills(ValNo, OldEnd, I->end-1);
+
+ // If the newly formed range now touches the range after it and if they have
+ // the same value number, merge the two ranges into one range.
+ Ranges::iterator Next = next(I);
+ if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) {
+ I->end = Next->end;
+ ranges.erase(Next);
+ }
+}
+
+
+/// extendIntervalStartTo - This method is used when we want to extend the range
+/// specified by I to start at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with.
+LiveInterval::Ranges::iterator
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = I;
+ do {
+ if (MergeTo == ranges.begin()) {
+ I->start = NewStart;
+ ranges.erase(MergeTo, I);
+ return I;
+ }
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ --MergeTo;
+ } while (NewStart <= MergeTo->start);
+
+ // If we start in the middle of another interval, just delete a range and
+ // extend that interval.
+ if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+ MergeTo->end = I->end;
+ } else {
+ // Otherwise, extend the interval right after.
+ ++MergeTo;
+ MergeTo->start = NewStart;
+ MergeTo->end = I->end;
+ }
+
+ ranges.erase(next(MergeTo), next(I));
+ return MergeTo;
+}
+
+LiveInterval::iterator
+LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
+ unsigned Start = LR.start, End = LR.end;
+ iterator it = std::upper_bound(From, ranges.end(), Start);
+
+ // If the inserted interval starts in the middle or right at the end of
+ // another interval, just extend that interval to contain the range of LR.
+ if (it != ranges.begin()) {
+ iterator B = prior(it);
+ if (LR.valno == B->valno) {
+ if (B->start <= Start && B->end >= Start) {
+ extendIntervalEndTo(B, End);
+ return B;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(B->end <= Start &&
+ "Cannot overlap two LiveRanges with differing ValID's"
+ " (did you def the same reg twice in a MachineInstr?)");
+ }
+ }
+
+ // Otherwise, if this range ends in the middle of, or right next to, another
+ // interval, merge it into that interval.
+ if (it != ranges.end()) {
+ if (LR.valno == it->valno) {
+ if (it->start <= End) {
+ it = extendIntervalStartTo(it, Start);
+
+ // If LR is a complete superset of an interval, we may need to grow its
+ // endpoint as well.
+ if (End > it->end)
+ extendIntervalEndTo(it, End);
+ else if (End < it->end)
+ // Overlapping intervals, there might have been a kill here.
+ removeKill(it->valno, End);
+ return it;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different valno's.
+ assert(it->start >= End &&
+ "Cannot overlap two LiveRanges with differing ValID's");
+ }
+ }
+
+ // Otherwise, this is just a new range that doesn't interact with anything.
+ // Insert it.
+ return ranges.insert(it, LR);
+}
+
+/// isInOneLiveRange - Return true if the range specified is entirely in the
+/// a single LiveRange of the live interval.
+bool LiveInterval::isInOneLiveRange(unsigned Start, unsigned End) {
+ Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
+ if (I == ranges.begin())
+ return false;
+ --I;
+ return I->contains(Start) && I->contains(End-1);
+}
+
+
+/// removeRange - Remove the specified range from this interval. Note that
+/// the range must be in a single LiveRange in its entirety.
+void LiveInterval::removeRange(unsigned Start, unsigned End,
+ bool RemoveDeadValNo) {
+ // Find the LiveRange containing this span.
+ Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
+ assert(I != ranges.begin() && "Range is not in interval!");
+ --I;
+ assert(I->contains(Start) && I->contains(End-1) &&
+ "Range is not entirely in interval!");
+
+ // If the span we are removing is at the start of the LiveRange, adjust it.
+ VNInfo *ValNo = I->valno;
+ if (I->start == Start) {
+ if (I->end == End) {
+ removeKills(I->valno, Start, End);
+ if (RemoveDeadValNo) {
+ // Check if val# is dead.
+ bool isDead = true;
+ for (const_iterator II = begin(), EE = end(); II != EE; ++II)
+ if (II != I && II->valno == ValNo) {
+ isDead = false;
+ break;
+ }
+ if (isDead) {
+ // Now that ValNo is dead, remove it. If it is the largest value
+ // number, just nuke it (and any other deleted values neighboring it),
+ // otherwise mark it as ~1U so it can be nuked later.
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (!valnos.empty() && valnos.back()->def == ~1U);
+ } else {
+ ValNo->def = ~1U;
+ }
+ }
+ }
+
+ ranges.erase(I); // Removed the whole LiveRange.
+ } else
+ I->start = End;
+ return;
+ }
+
+ // Otherwise if the span we are removing is at the end of the LiveRange,
+ // adjust the other way.
+ if (I->end == End) {
+ removeKills(ValNo, Start, End);
+ I->end = Start;
+ return;
+ }
+
+ // Otherwise, we are splitting the LiveRange into two pieces.
+ unsigned OldEnd = I->end;
+ I->end = Start; // Trim the old interval.
+
+ // Insert the new one.
+ ranges.insert(next(I), LiveRange(End, OldEnd, ValNo));
+}
+
+/// removeValNo - Remove all the ranges defined by the specified value#.
+/// Also remove the value# from value# list.
+void LiveInterval::removeValNo(VNInfo *ValNo) {
+ if (empty()) return;
+ Ranges::iterator I = ranges.end();
+ Ranges::iterator E = ranges.begin();
+ do {
+ --I;
+ if (I->valno == ValNo)
+ ranges.erase(I);
+ } while (I != E);
+ // Now that ValNo is dead, remove it. If it is the largest value
+ // number, just nuke it (and any other deleted values neighboring it),
+ // otherwise mark it as ~1U so it can be nuked later.
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (!valnos.empty() && valnos.back()->def == ~1U);
+ } else {
+ ValNo->def = ~1U;
+ }
+}
+
+/// scaleNumbering - Renumber VNI and ranges to provide gaps for new
+/// instructions.
+void LiveInterval::scaleNumbering(unsigned factor) {
+ // Scale ranges.
+ for (iterator RI = begin(), RE = end(); RI != RE; ++RI) {
+ RI->start = InstrSlots::scale(RI->start, factor);
+ RI->end = InstrSlots::scale(RI->end, factor);
+ }
+
+ // Scale VNI info.
+ for (vni_iterator VNI = vni_begin(), VNIE = vni_end(); VNI != VNIE; ++VNI) {
+ VNInfo *vni = *VNI;
+ if (vni->def != ~0U && vni->def != ~1U) {
+ vni->def = InstrSlots::scale(vni->def, factor);
+ }
+
+ for (unsigned i = 0; i < vni->kills.size(); ++i) {
+ if (vni->kills[i] != 0)
+ vni->kills[i] = InstrSlots::scale(vni->kills[i], factor);
+ }
+ }
+}
+
+/// getLiveRangeContaining - Return the live range that contains the
+/// specified index, or null if there is none.
+LiveInterval::const_iterator
+LiveInterval::FindLiveRangeContaining(unsigned Idx) const {
+ const_iterator It = std::upper_bound(begin(), end(), Idx);
+ if (It != ranges.begin()) {
+ --It;
+ if (It->contains(Idx))
+ return It;
+ }
+
+ return end();
+}
+
+LiveInterval::iterator
+LiveInterval::FindLiveRangeContaining(unsigned Idx) {
+ iterator It = std::upper_bound(begin(), end(), Idx);
+ if (It != begin()) {
+ --It;
+ if (It->contains(Idx))
+ return It;
+ }
+
+ return end();
+}
+
+/// findDefinedVNInfo - Find the VNInfo that's defined at the specified index
+/// (register interval) or defined by the specified register (stack inteval).
+VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const {
+ VNInfo *VNI = NULL;
+ for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
+ i != e; ++i)
+ if ((*i)->def == DefIdxOrReg) {
+ VNI = *i;
+ break;
+ }
+ return VNI;
+}
+
+
+/// join - Join two live intervals (this, and other) together. This applies
+/// mappings to the value numbers in the LHS/RHS intervals as specified. If
+/// the intervals are not joinable, this aborts.
+void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments,
+ const int *RHSValNoAssignments,
+ SmallVector<VNInfo*, 16> &NewVNInfo) {
+ // Determine if any of our live range values are mapped. This is uncommon, so
+ // we want to avoid the interval scan if not.
+ bool MustMapCurValNos = false;
+ unsigned NumVals = getNumValNums();
+ unsigned NumNewVals = NewVNInfo.size();
+ for (unsigned i = 0; i != NumVals; ++i) {
+ unsigned LHSValID = LHSValNoAssignments[i];
+ if (i != LHSValID ||
+ (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i)))
+ MustMapCurValNos = true;
+ }
+
+ // If we have to apply a mapping to our base interval assignment, rewrite it
+ // now.
+ if (MustMapCurValNos) {
+ // Map the first live range.
+ iterator OutIt = begin();
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
+ ++OutIt;
+ for (iterator I = OutIt, E = end(); I != E; ++I) {
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+
+ // If this live range has the same value # as its immediate predecessor,
+ // and if they are neighbors, remove one LiveRange. This happens when we
+ // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
+ if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) {
+ (OutIt-1)->end = OutIt->end;
+ } else {
+ if (I != OutIt) {
+ OutIt->start = I->start;
+ OutIt->end = I->end;
+ }
+
+ // Didn't merge, on to the next one.
+ ++OutIt;
+ }
+ }
+
+ // If we merge some live ranges, chop off the end.
+ ranges.erase(OutIt, end());
+ }
+
+ // Remember assignements because val# ids are changing.
+ SmallVector<unsigned, 16> OtherAssignments;
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+ OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]);
+
+ // Update val# info. Renumber them and make sure they all belong to this
+ // LiveInterval now. Also remove dead val#'s.
+ unsigned NumValNos = 0;
+ for (unsigned i = 0; i < NumNewVals; ++i) {
+ VNInfo *VNI = NewVNInfo[i];
+ if (VNI) {
+ if (NumValNos >= NumVals)
+ valnos.push_back(VNI);
+ else
+ valnos[NumValNos] = VNI;
+ VNI->id = NumValNos++; // Renumber val#.
+ }
+ }
+ if (NumNewVals < NumVals)
+ valnos.resize(NumNewVals); // shrinkify
+
+ // Okay, now insert the RHS live ranges into the LHS.
+ iterator InsertPos = begin();
+ unsigned RangeNo = 0;
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) {
+ // Map the valno in the other live range to the current live range.
+ I->valno = NewVNInfo[OtherAssignments[RangeNo]];
+ assert(I->valno && "Adding a dead range?");
+ InsertPos = addRangeFrom(*I, InsertPos);
+ }
+
+ weight += Other.weight;
+ if (Other.preference && !preference)
+ preference = Other.preference;
+}
+
+/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
+/// interval as the specified value number. The LiveRanges in RHS are
+/// allowed to overlap with LiveRanges in the current interval, but only if
+/// the overlapping LiveRanges have the specified value number.
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+ VNInfo *LHSValNo) {
+ // TODO: Make this more efficient.
+ iterator InsertPos = begin();
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ // Map the valno in the other live range to the current live range.
+ LiveRange Tmp = *I;
+ Tmp.valno = LHSValNo;
+ InsertPos = addRangeFrom(Tmp, InsertPos);
+ }
+}
+
+
+/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
+/// in RHS into this live interval as the specified value number.
+/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
+/// current interval, it will replace the value numbers of the overlaped
+/// live ranges with the specified value number.
+void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
+ const VNInfo *RHSValNo, VNInfo *LHSValNo) {
+ SmallVector<VNInfo*, 4> ReplacedValNos;
+ iterator IP = begin();
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ if (I->valno != RHSValNo)
+ continue;
+ unsigned Start = I->start, End = I->end;
+ IP = std::upper_bound(IP, end(), Start);
+ // If the start of this range overlaps with an existing liverange, trim it.
+ if (IP != begin() && IP[-1].end > Start) {
+ if (IP[-1].valno != LHSValNo) {
+ ReplacedValNos.push_back(IP[-1].valno);
+ IP[-1].valno = LHSValNo; // Update val#.
+ }
+ Start = IP[-1].end;
+ // Trimmed away the whole range?
+ if (Start >= End) continue;
+ }
+ // If the end of this range overlaps with an existing liverange, trim it.
+ if (IP != end() && End > IP->start) {
+ if (IP->valno != LHSValNo) {
+ ReplacedValNos.push_back(IP->valno);
+ IP->valno = LHSValNo; // Update val#.
+ }
+ End = IP->start;
+ // If this trimmed away the whole range, ignore it.
+ if (Start == End) continue;
+ }
+
+ // Map the valno in the other live range to the current live range.
+ IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP);
+ }
+
+
+ SmallSet<VNInfo*, 4> Seen;
+ for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) {
+ VNInfo *V1 = ReplacedValNos[i];
+ if (Seen.insert(V1)) {
+ bool isDead = true;
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if (I->valno == V1) {
+ isDead = false;
+ break;
+ }
+ if (isDead) {
+ // Now that V1 is dead, remove it. If it is the largest value number,
+ // just nuke it (and any other deleted values neighboring it), otherwise
+ // mark it as ~1U so it can be nuked later.
+ if (V1->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (!valnos.empty() && valnos.back()->def == ~1U);
+ } else {
+ V1->def = ~1U;
+ }
+ }
+ }
+ }
+}
+
+
+/// MergeInClobberRanges - For any live ranges that are not defined in the
+/// current interval, but are defined in the Clobbers interval, mark them
+/// used with an unknown definition value.
+void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
+ BumpPtrAllocator &VNInfoAllocator) {
+ if (Clobbers.empty()) return;
+
+ DenseMap<VNInfo*, VNInfo*> ValNoMaps;
+ VNInfo *UnusedValNo = 0;
+ iterator IP = begin();
+ for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) {
+ // For every val# in the Clobbers interval, create a new "unknown" val#.
+ VNInfo *ClobberValNo = 0;
+ DenseMap<VNInfo*, VNInfo*>::iterator VI = ValNoMaps.find(I->valno);
+ if (VI != ValNoMaps.end())
+ ClobberValNo = VI->second;
+ else if (UnusedValNo)
+ ClobberValNo = UnusedValNo;
+ else {
+ UnusedValNo = ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator);
+ ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
+ }
+
+ bool Done = false;
+ unsigned Start = I->start, End = I->end;
+ // If a clobber range starts before an existing range and ends after
+ // it, the clobber range will need to be split into multiple ranges.
+ // Loop until the entire clobber range is handled.
+ while (!Done) {
+ Done = true;
+ IP = std::upper_bound(IP, end(), Start);
+ unsigned SubRangeStart = Start;
+ unsigned SubRangeEnd = End;
+
+ // If the start of this range overlaps with an existing liverange, trim it.
+ if (IP != begin() && IP[-1].end > SubRangeStart) {
+ SubRangeStart = IP[-1].end;
+ // Trimmed away the whole range?
+ if (SubRangeStart >= SubRangeEnd) continue;
+ }
+ // If the end of this range overlaps with an existing liverange, trim it.
+ if (IP != end() && SubRangeEnd > IP->start) {
+ // If the clobber live range extends beyond the existing live range,
+ // it'll need at least another live range, so set the flag to keep
+ // iterating.
+ if (SubRangeEnd > IP->end) {
+ Start = IP->end;
+ Done = false;
+ }
+ SubRangeEnd = IP->start;
+ // If this trimmed away the whole range, ignore it.
+ if (SubRangeStart == SubRangeEnd) continue;
+ }
+
+ // Insert the clobber interval.
+ IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo),
+ IP);
+ UnusedValNo = 0;
+ }
+ }
+
+ if (UnusedValNo) {
+ // Delete the last unused val#.
+ valnos.pop_back();
+ UnusedValNo->~VNInfo();
+ }
+}
+
+void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End,
+ BumpPtrAllocator &VNInfoAllocator) {
+ // Find a value # to use for the clobber ranges. If there is already a value#
+ // for unknown values, use it.
+ VNInfo *ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator);
+
+ iterator IP = begin();
+ IP = std::upper_bound(IP, end(), Start);
+
+ // If the start of this range overlaps with an existing liverange, trim it.
+ if (IP != begin() && IP[-1].end > Start) {
+ Start = IP[-1].end;
+ // Trimmed away the whole range?
+ if (Start >= End) return;
+ }
+ // If the end of this range overlaps with an existing liverange, trim it.
+ if (IP != end() && End > IP->start) {
+ End = IP->start;
+ // If this trimmed away the whole range, ignore it.
+ if (Start == End) return;
+ }
+
+ // Insert the clobber interval.
+ addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
+}
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent. This eliminates V1, replacing all
+/// LiveRanges with the V1 value number with the V2 value number. This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
+ assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+ // This code actually merges the (numerically) larger value number into the
+ // smaller value number, which is likely to allow us to compactify the value
+ // space. The only thing we have to be careful of is to preserve the
+ // instruction that defines the result value.
+
+ // Make sure V2 is smaller than V1.
+ if (V1->id < V2->id) {
+ copyValNumInfo(V1, V2);
+ std::swap(V1, V2);
+ }
+
+ // Merge V1 live ranges into V2.
+ for (iterator I = begin(); I != end(); ) {
+ iterator LR = I++;
+ if (LR->valno != V1) continue; // Not a V1 LiveRange.
+
+ // Okay, we found a V1 live range. If it had a previous, touching, V2 live
+ // range, extend it.
+ if (LR != begin()) {
+ iterator Prev = LR-1;
+ if (Prev->valno == V2 && Prev->end == LR->start) {
+ Prev->end = LR->end;
+
+ // Erase this live-range.
+ ranges.erase(LR);
+ I = Prev+1;
+ LR = Prev;
+ }
+ }
+
+ // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+ // Ensure that it is a V2 live-range.
+ LR->valno = V2;
+
+ // If we can merge it into later V2 live ranges, do so now. We ignore any
+ // following V1 live ranges, as they will be merged in subsequent iterations
+ // of the loop.
+ if (I != end()) {
+ if (I->start == LR->end && I->valno == V2) {
+ LR->end = I->end;
+ ranges.erase(I);
+ I = LR+1;
+ }
+ }
+ }
+
+ // Now that V1 is dead, remove it. If it is the largest value number, just
+ // nuke it (and any other deleted values neighboring it), otherwise mark it as
+ // ~1U so it can be nuked later.
+ if (V1->id == getNumValNums()-1) {
+ do {
+ VNInfo *VNI = valnos.back();
+ valnos.pop_back();
+ VNI->~VNInfo();
+ } while (valnos.back()->def == ~1U);
+ } else {
+ V1->def = ~1U;
+ }
+
+ return V2;
+}
+
+void LiveInterval::Copy(const LiveInterval &RHS,
+ BumpPtrAllocator &VNInfoAllocator) {
+ ranges.clear();
+ valnos.clear();
+ preference = RHS.preference;
+ weight = RHS.weight;
+ for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
+ const VNInfo *VNI = RHS.getValNumInfo(i);
+ VNInfo *NewVNI = getNextValue(~0U, 0, VNInfoAllocator);
+ copyValNumInfo(NewVNI, VNI);
+ }
+ for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
+ const LiveRange &LR = RHS.ranges[i];
+ addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
+ }
+}
+
+unsigned LiveInterval::getSize() const {
+ unsigned Sum = 0;
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ Sum += I->end - I->start;
+ return Sum;
+}
+
+std::ostream& llvm::operator<<(std::ostream& os, const LiveRange &LR) {
+ return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
+}
+
+void LiveRange::dump() const {
+ cerr << *this << "\n";
+}
+
+void LiveInterval::print(std::ostream &OS,
+ const TargetRegisterInfo *TRI) const {
+ if (isStackSlot())
+ OS << "SS#" << getStackSlotIndex();
+ else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg))
+ OS << TRI->getName(reg);
+ else
+ OS << "%reg" << reg;
+
+ OS << ',' << weight;
+
+ if (empty())
+ OS << " EMPTY";
+ else {
+ OS << " = ";
+ for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
+ E = ranges.end(); I != E; ++I)
+ OS << *I;
+ }
+
+ // Print value number info.
+ if (getNumValNums()) {
+ OS << " ";
+ unsigned vnum = 0;
+ for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
+ ++i, ++vnum) {
+ const VNInfo *vni = *i;
+ if (vnum) OS << " ";
+ OS << vnum << "@";
+ if (vni->def == ~1U) {
+ OS << "x";
+ } else {
+ if (vni->def == ~0U)
+ OS << "?";
+ else
+ OS << vni->def;
+ unsigned ee = vni->kills.size();
+ if (ee || vni->hasPHIKill) {
+ OS << "-(";
+ for (unsigned j = 0; j != ee; ++j) {
+ OS << vni->kills[j];
+ if (j != ee-1)
+ OS << " ";
+ }
+ if (vni->hasPHIKill) {
+ if (ee)
+ OS << " ";
+ OS << "phi";
+ }
+ OS << ")";
+ }
+ }
+ }
+ }
+}
+
+void LiveInterval::dump() const {
+ cerr << *this << "\n";
+}
+
+
+void LiveRange::print(std::ostream &os) const {
+ os << *this;
+}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
new file mode 100644
index 0000000..cf0a648
--- /dev/null
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -0,0 +1,2298 @@
+//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass which is used
+// by the Linear Scan Register allocator. This pass linearizes the
+// basic blocks of the function in DFS order and uses the
+// LiveVariables pass to conservatively compute live intervals for
+// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "liveintervals"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "VirtRegMap.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <limits>
+#include <cmath>
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<bool> DisableReMat("disable-rematerialization",
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> SplitAtBB("split-intervals-at-bb",
+ cl::init(true), cl::Hidden);
+static cl::opt<int> SplitLimit("split-limit",
+ cl::init(-1), cl::Hidden);
+
+static cl::opt<bool> EnableAggressiveRemat("aggressive-remat", cl::Hidden);
+
+static cl::opt<bool> EnableFastSpilling("fast-spill",
+ cl::init(false), cl::Hidden);
+
+STATISTIC(numIntervals, "Number of original intervals");
+STATISTIC(numFolds , "Number of loads/stores folded into instructions");
+STATISTIC(numSplits , "Number of intervals split");
+
+char LiveIntervals::ID = 0;
+static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis");
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+
+ if (!StrongPHIElim) {
+ AU.addPreservedID(PHIEliminationID);
+ AU.addRequiredID(PHIEliminationID);
+ }
+
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveIntervals::releaseMemory() {
+ // Free the live intervals themselves.
+ for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(),
+ E = r2iMap_.end(); I != E; ++I)
+ delete I->second;
+
+ MBB2IdxMap.clear();
+ Idx2MBBMap.clear();
+ mi2iMap_.clear();
+ i2miMap_.clear();
+ r2iMap_.clear();
+ // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
+ VNInfoAllocator.Reset();
+ while (!ClonedMIs.empty()) {
+ MachineInstr *MI = ClonedMIs.back();
+ ClonedMIs.pop_back();
+ mf_->DeleteMachineInstr(MI);
+ }
+}
+
+void LiveIntervals::computeNumbering() {
+ Index2MiMap OldI2MI = i2miMap_;
+ std::vector<IdxMBBPair> OldI2MBB = Idx2MBBMap;
+
+ Idx2MBBMap.clear();
+ MBB2IdxMap.clear();
+ mi2iMap_.clear();
+ i2miMap_.clear();
+
+ FunctionSize = 0;
+
+ // Number MachineInstrs and MachineBasicBlocks.
+ // Initialize MBB indexes to a sentinal.
+ MBB2IdxMap.resize(mf_->getNumBlockIDs(), std::make_pair(~0U,~0U));
+
+ unsigned MIIndex = 0;
+ for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end();
+ MBB != E; ++MBB) {
+ unsigned StartIdx = MIIndex;
+
+ // Insert an empty slot at the beginning of each block.
+ MIIndex += InstrSlots::NUM;
+ i2miMap_.push_back(0);
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second;
+ assert(inserted && "multiple MachineInstr -> index mappings");
+ inserted = true;
+ i2miMap_.push_back(I);
+ MIIndex += InstrSlots::NUM;
+ FunctionSize++;
+
+ // Insert max(1, numdefs) empty slots after every instruction.
+ unsigned Slots = I->getDesc().getNumDefs();
+ if (Slots == 0)
+ Slots = 1;
+ MIIndex += InstrSlots::NUM * Slots;
+ while (Slots--)
+ i2miMap_.push_back(0);
+ }
+
+ // Set the MBB2IdxMap entry for this MBB.
+ MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, MIIndex - 1);
+ Idx2MBBMap.push_back(std::make_pair(StartIdx, MBB));
+ }
+ std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare());
+
+ if (!OldI2MI.empty())
+ for (iterator OI = begin(), OE = end(); OI != OE; ++OI) {
+ for (LiveInterval::iterator LI = OI->second->begin(),
+ LE = OI->second->end(); LI != LE; ++LI) {
+
+ // Remap the start index of the live range to the corresponding new
+ // number, or our best guess at what it _should_ correspond to if the
+ // original instruction has been erased. This is either the following
+ // instruction or its predecessor.
+ unsigned index = LI->start / InstrSlots::NUM;
+ unsigned offset = LI->start % InstrSlots::NUM;
+ if (offset == InstrSlots::LOAD) {
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->start);
+ // Take the pair containing the index
+ std::vector<IdxMBBPair>::const_iterator J =
+ (I == OldI2MBB.end() && OldI2MBB.size()>0) ? (I-1): I;
+
+ LI->start = getMBBStartIdx(J->second);
+ } else {
+ LI->start = mi2iMap_[OldI2MI[index]] + offset;
+ }
+
+ // Remap the ending index in the same way that we remapped the start,
+ // except for the final step where we always map to the immediately
+ // following instruction.
+ index = (LI->end - 1) / InstrSlots::NUM;
+ offset = LI->end % InstrSlots::NUM;
+ if (offset == InstrSlots::LOAD) {
+ // VReg dies at end of block.
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->end);
+ --I;
+
+ LI->end = getMBBEndIdx(I->second) + 1;
+ } else {
+ unsigned idx = index;
+ while (index < OldI2MI.size() && !OldI2MI[index]) ++index;
+
+ if (index != OldI2MI.size())
+ LI->end = mi2iMap_[OldI2MI[index]] + (idx == index ? offset : 0);
+ else
+ LI->end = InstrSlots::NUM * i2miMap_.size();
+ }
+ }
+
+ for (LiveInterval::vni_iterator VNI = OI->second->vni_begin(),
+ VNE = OI->second->vni_end(); VNI != VNE; ++VNI) {
+ VNInfo* vni = *VNI;
+
+ // Remap the VNInfo def index, which works the same as the
+ // start indices above. VN's with special sentinel defs
+ // don't need to be remapped.
+ if (vni->def != ~0U && vni->def != ~1U) {
+ unsigned index = vni->def / InstrSlots::NUM;
+ unsigned offset = vni->def % InstrSlots::NUM;
+ if (offset == InstrSlots::LOAD) {
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->def);
+ // Take the pair containing the index
+ std::vector<IdxMBBPair>::const_iterator J =
+ (I == OldI2MBB.end() && OldI2MBB.size()>0) ? (I-1): I;
+
+ vni->def = getMBBStartIdx(J->second);
+ } else {
+ vni->def = mi2iMap_[OldI2MI[index]] + offset;
+ }
+ }
+
+ // Remap the VNInfo kill indices, which works the same as
+ // the end indices above.
+ for (size_t i = 0; i < vni->kills.size(); ++i) {
+ // PHI kills don't need to be remapped.
+ if (!vni->kills[i]) continue;
+
+ unsigned index = (vni->kills[i]-1) / InstrSlots::NUM;
+ unsigned offset = vni->kills[i] % InstrSlots::NUM;
+ if (offset == InstrSlots::LOAD) {
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]);
+ --I;
+
+ vni->kills[i] = getMBBEndIdx(I->second);
+ } else {
+ unsigned idx = index;
+ while (index < OldI2MI.size() && !OldI2MI[index]) ++index;
+
+ if (index != OldI2MI.size())
+ vni->kills[i] = mi2iMap_[OldI2MI[index]] +
+ (idx == index ? offset : 0);
+ else
+ vni->kills[i] = InstrSlots::NUM * i2miMap_.size();
+ }
+ }
+ }
+ }
+}
+
+void LiveIntervals::scaleNumbering(int factor) {
+ // Need to
+ // * scale MBB begin and end points
+ // * scale all ranges.
+ // * Update VNI structures.
+ // * Scale instruction numberings
+
+ // Scale the MBB indices.
+ Idx2MBBMap.clear();
+ for (MachineFunction::iterator MBB = mf_->begin(), MBBE = mf_->end();
+ MBB != MBBE; ++MBB) {
+ std::pair<unsigned, unsigned> &mbbIndices = MBB2IdxMap[MBB->getNumber()];
+ mbbIndices.first = InstrSlots::scale(mbbIndices.first, factor);
+ mbbIndices.second = InstrSlots::scale(mbbIndices.second, factor);
+ Idx2MBBMap.push_back(std::make_pair(mbbIndices.first, MBB));
+ }
+ std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare());
+
+ // Scale the intervals.
+ for (iterator LI = begin(), LE = end(); LI != LE; ++LI) {
+ LI->second->scaleNumbering(factor);
+ }
+
+ // Scale MachineInstrs.
+ Mi2IndexMap oldmi2iMap = mi2iMap_;
+ unsigned highestSlot = 0;
+ for (Mi2IndexMap::iterator MI = oldmi2iMap.begin(), ME = oldmi2iMap.end();
+ MI != ME; ++MI) {
+ unsigned newSlot = InstrSlots::scale(MI->second, factor);
+ mi2iMap_[MI->first] = newSlot;
+ highestSlot = std::max(highestSlot, newSlot);
+ }
+
+ i2miMap_.clear();
+ i2miMap_.resize(highestSlot + 1);
+ for (Mi2IndexMap::iterator MI = mi2iMap_.begin(), ME = mi2iMap_.end();
+ MI != ME; ++MI) {
+ i2miMap_[MI->second] = MI->first;
+ }
+
+}
+
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &mf_->getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ aa_ = &getAnalysis<AliasAnalysis>();
+ lv_ = &getAnalysis<LiveVariables>();
+ allocatableRegs_ = tri_->getAllocatableSet(fn);
+
+ computeNumbering();
+ computeIntervals();
+
+ numIntervals += getNumIntervals();
+
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void LiveIntervals::print(std::ostream &O, const Module* ) const {
+ O << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second->print(O, tri_);
+ O << "\n";
+ }
+
+ O << "********** MACHINEINSTRS **********\n";
+ for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+ mbbi != mbbe; ++mbbi) {
+ O << ((Value*)mbbi->getBasicBlock())->getName() << ":\n";
+ for (MachineBasicBlock::iterator mii = mbbi->begin(),
+ mie = mbbi->end(); mii != mie; ++mii) {
+ O << getInstructionIndex(mii) << '\t' << *mii;
+ }
+ }
+}
+
+/// conflictsWithPhysRegDef - Returns true if the specified register
+/// is defined during the duration of the specified interval.
+bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li,
+ VirtRegMap &vrm, unsigned reg) {
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ for (unsigned index = getBaseIndex(I->start),
+ end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end;
+ index += InstrSlots::NUM) {
+ // skip deleted instructions
+ while (index != end && !getInstructionFromIndex(index))
+ index += InstrSlots::NUM;
+ if (index == end) break;
+
+ MachineInstr *MI = getInstructionFromIndex(index);
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (SrcReg == li.reg || DstReg == li.reg)
+ continue;
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& mop = MI->getOperand(i);
+ if (!mop.isReg())
+ continue;
+ unsigned PhysReg = mop.getReg();
+ if (PhysReg == 0 || PhysReg == li.reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
+ if (!vrm.hasPhys(PhysReg))
+ continue;
+ PhysReg = vrm.getPhys(PhysReg);
+ }
+ if (PhysReg && tri_->regsOverlap(PhysReg, reg))
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except
+/// it can check use as well.
+bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
+ unsigned Reg, bool CheckUse,
+ SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ for (unsigned index = getBaseIndex(I->start),
+ end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end;
+ index += InstrSlots::NUM) {
+ // Skip deleted instructions.
+ MachineInstr *MI = 0;
+ while (index != end) {
+ MI = getInstructionFromIndex(index);
+ if (MI)
+ break;
+ index += InstrSlots::NUM;
+ }
+ if (index == end) break;
+
+ if (JoinedCopies.count(MI))
+ continue;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isUse() && !CheckUse)
+ continue;
+ unsigned PhysReg = MO.getReg();
+ if (PhysReg == 0 || TargetRegisterInfo::isVirtualRegister(PhysReg))
+ continue;
+ if (tri_->isSubRegister(Reg, PhysReg))
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+
+void LiveIntervals::printRegName(unsigned reg) const {
+ if (TargetRegisterInfo::isPhysicalRegister(reg))
+ cerr << tri_->getName(reg);
+ else
+ cerr << "%reg" << reg;
+}
+
+void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator mi,
+ unsigned MIIdx, MachineOperand& MO,
+ unsigned MOIdx,
+ LiveInterval &interval) {
+ DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
+ LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
+
+ if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ DOUT << "is a implicit_def\n";
+ return;
+ }
+
+ // Virtual registers may be defined multiple times (due to phi
+ // elimination and 2-addr elimination). Much of what we do only has to be
+ // done once for the vreg. We use an empty interval to detect the first
+ // time we see a vreg.
+ if (interval.empty()) {
+ // Get the Idx of the defining instructions.
+ unsigned defIndex = getDefIndex(MIIdx);
+ // Earlyclobbers move back one.
+ if (MO.isEarlyClobber())
+ defIndex = getUseIndex(MIIdx);
+ VNInfo *ValNo;
+ MachineInstr *CopyMI = NULL;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
+ mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
+ tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ CopyMI = mi;
+ // Earlyclobbers move back one.
+ ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+
+ assert(ValNo->id == 0 && "First value in interval is not 0?");
+
+ // Loop over all of the blocks that the vreg is defined in. There are
+ // two cases we have to handle here. The most common case is a vreg
+ // whose lifetime is contained within a basic block. In this case there
+ // will be a single kill, in MBB, which comes after the definition.
+ if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
+ // FIXME: what about dead vars?
+ unsigned killIdx;
+ if (vi.Kills[0] != mi)
+ killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1;
+ else
+ killIdx = defIndex+1;
+
+ // If the kill happens after the definition, we have an intra-block
+ // live range.
+ if (killIdx > defIndex) {
+ assert(vi.AliveBlocks.empty() &&
+ "Shouldn't be alive across any blocks!");
+ LiveRange LR(defIndex, killIdx, ValNo);
+ interval.addRange(LR);
+ DOUT << " +" << LR << "\n";
+ interval.addKill(ValNo, killIdx);
+ return;
+ }
+ }
+
+ // The other case we handle is when a virtual register lives to the end
+ // of the defining block, potentially live across some blocks, then is
+ // live into some number of blocks, but gets killed. Start by adding a
+ // range that goes from this definition to the end of the defining block.
+ LiveRange NewLR(defIndex, getMBBEndIdx(mbb)+1, ValNo);
+ DOUT << " +" << NewLR;
+ interval.addRange(NewLR);
+
+ // Iterate over all of the blocks that the variable is completely
+ // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+ // live interval.
+ for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
+ E = vi.AliveBlocks.end(); I != E; ++I) {
+ LiveRange LR(getMBBStartIdx(*I),
+ getMBBEndIdx(*I)+1, // MBB ends at -1.
+ ValNo);
+ interval.addRange(LR);
+ DOUT << " +" << LR;
+ }
+
+ // Finally, this virtual register is live from the start of any killing
+ // block to the 'use' slot of the killing instruction.
+ for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
+ MachineInstr *Kill = vi.Kills[i];
+ unsigned killIdx = getUseIndex(getInstructionIndex(Kill))+1;
+ LiveRange LR(getMBBStartIdx(Kill->getParent()),
+ killIdx, ValNo);
+ interval.addRange(LR);
+ interval.addKill(ValNo, killIdx);
+ DOUT << " +" << LR;
+ }
+
+ } else {
+ // If this is the second time we see a virtual register definition, it
+ // must be due to phi elimination or two addr elimination. If this is
+ // the result of two address elimination, then the vreg is one of the
+ // def-and-use register operand.
+ if (mi->isRegTiedToUseOperand(MOIdx)) {
+ // If this is a two-address definition, then we have already processed
+ // the live range. The only problem is that we didn't realize there
+ // are actually two values in the live interval. Because of this we
+ // need to take the LiveRegion that defines this register and split it
+ // into two values.
+ assert(interval.containsOneValue());
+ unsigned DefIndex = getDefIndex(interval.getValNumInfo(0)->def);
+ unsigned RedefIndex = getDefIndex(MIIdx);
+ if (MO.isEarlyClobber())
+ RedefIndex = getUseIndex(MIIdx);
+
+ const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex-1);
+ VNInfo *OldValNo = OldLR->valno;
+
+ // Delete the initial value, which should be short and continuous,
+ // because the 2-addr copy must be in the same MBB as the redef.
+ interval.removeRange(DefIndex, RedefIndex);
+
+ // Two-address vregs should always only be redefined once. This means
+ // that at this point, there should be exactly one value number in it.
+ assert(interval.containsOneValue() && "Unexpected 2-addr liveint!");
+
+ // The new value number (#1) is defined by the instruction we claimed
+ // defined value #0.
+ VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy,
+ VNInfoAllocator);
+
+ // Value#0 is now defined by the 2-addr instruction.
+ OldValNo->def = RedefIndex;
+ OldValNo->copy = 0;
+ if (MO.isEarlyClobber())
+ OldValNo->redefByEC = true;
+
+ // Add the new live interval which replaces the range for the input copy.
+ LiveRange LR(DefIndex, RedefIndex, ValNo);
+ DOUT << " replace range with " << LR;
+ interval.addRange(LR);
+ interval.addKill(ValNo, RedefIndex);
+
+ // If this redefinition is dead, we need to add a dummy unit live
+ // range covering the def slot.
+ if (MO.isDead())
+ interval.addRange(LiveRange(RedefIndex, RedefIndex+1, OldValNo));
+
+ DOUT << " RESULT: ";
+ interval.print(DOUT, tri_);
+
+ } else {
+ // Otherwise, this must be because of phi elimination. If this is the
+ // first redefinition of the vreg that we have seen, go back and change
+ // the live range in the PHI block to be a different value number.
+ if (interval.containsOneValue()) {
+ assert(vi.Kills.size() == 1 &&
+ "PHI elimination vreg should have one kill, the PHI itself!");
+
+ // Remove the old range that we now know has an incorrect number.
+ VNInfo *VNI = interval.getValNumInfo(0);
+ MachineInstr *Killer = vi.Kills[0];
+ unsigned Start = getMBBStartIdx(Killer->getParent());
+ unsigned End = getUseIndex(getInstructionIndex(Killer))+1;
+ DOUT << " Removing [" << Start << "," << End << "] from: ";
+ interval.print(DOUT, tri_); DOUT << "\n";
+ interval.removeRange(Start, End);
+ VNI->hasPHIKill = true;
+ DOUT << " RESULT: "; interval.print(DOUT, tri_);
+
+ // Replace the interval with one of a NEW value number. Note that this
+ // value number isn't actually defined by an instruction, weird huh? :)
+ LiveRange LR(Start, End, interval.getNextValue(~0, 0, VNInfoAllocator));
+ DOUT << " replace range with " << LR;
+ interval.addRange(LR);
+ interval.addKill(LR.valno, End);
+ DOUT << " RESULT: "; interval.print(DOUT, tri_);
+ }
+
+ // In the case of PHI elimination, each variable definition is only
+ // live until the end of the block. We've already taken care of the
+ // rest of the live range.
+ unsigned defIndex = getDefIndex(MIIdx);
+ if (MO.isEarlyClobber())
+ defIndex = getUseIndex(MIIdx);
+
+ VNInfo *ValNo;
+ MachineInstr *CopyMI = NULL;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
+ mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
+ tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ CopyMI = mi;
+ ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+
+ unsigned killIndex = getMBBEndIdx(mbb) + 1;
+ LiveRange LR(defIndex, killIndex, ValNo);
+ interval.addRange(LR);
+ interval.addKill(ValNo, killIndex);
+ ValNo->hasPHIKill = true;
+ DOUT << " +" << LR;
+ }
+ }
+
+ DOUT << '\n';
+}
+
+void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator mi,
+ unsigned MIIdx,
+ MachineOperand& MO,
+ LiveInterval &interval,
+ MachineInstr *CopyMI) {
+ // A physical register cannot be live across basic block, so its
+ // lifetime must end somewhere in its defining basic block.
+ DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
+
+ unsigned baseIndex = MIIdx;
+ unsigned start = getDefIndex(baseIndex);
+ // Earlyclobbers move back one.
+ if (MO.isEarlyClobber())
+ start = getUseIndex(MIIdx);
+ unsigned end = start;
+
+ // If it is not used after definition, it is considered dead at
+ // the instruction defining it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ if (MO.isDead()) {
+ DOUT << " dead";
+ end = start + 1;
+ goto exit;
+ }
+
+ // If it is not dead on definition, it must be killed by a
+ // subsequent instruction. Hence its interval is:
+ // [defSlot(def), useSlot(kill)+1)
+ baseIndex += InstrSlots::NUM;
+ while (++mi != MBB->end()) {
+ while (baseIndex / InstrSlots::NUM < i2miMap_.size() &&
+ getInstructionFromIndex(baseIndex) == 0)
+ baseIndex += InstrSlots::NUM;
+ if (mi->killsRegister(interval.reg, tri_)) {
+ DOUT << " killed";
+ end = getUseIndex(baseIndex) + 1;
+ goto exit;
+ } else {
+ int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_);
+ if (DefIdx != -1) {
+ if (mi->isRegTiedToUseOperand(DefIdx)) {
+ // Two-address instruction.
+ end = getDefIndex(baseIndex);
+ if (mi->getOperand(DefIdx).isEarlyClobber())
+ end = getUseIndex(baseIndex);
+ } else {
+ // Another instruction redefines the register before it is ever read.
+ // Then the register is essentially dead at the instruction that defines
+ // it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ DOUT << " dead";
+ end = start + 1;
+ }
+ goto exit;
+ }
+ }
+
+ baseIndex += InstrSlots::NUM;
+ }
+
+ // The only case we should have a dead physreg here without a killing or
+ // instruction where we know it's dead is if it is live-in to the function
+ // and never used. Another possible case is the implicit use of the
+ // physical register has been deleted by two-address pass.
+ end = start + 1;
+
+exit:
+ assert(start < end && "did not find end of interval?");
+
+ // Already exists? Extend old live interval.
+ LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
+ bool Extend = OldLR != interval.end();
+ VNInfo *ValNo = Extend
+ ? OldLR->valno : interval.getNextValue(start, CopyMI, VNInfoAllocator);
+ if (MO.isEarlyClobber() && Extend)
+ ValNo->redefByEC = true;
+ LiveRange LR(start, end, ValNo);
+ interval.addRange(LR);
+ interval.addKill(LR.valno, end);
+ DOUT << " +" << LR << '\n';
+}
+
+void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned MIIdx,
+ MachineOperand& MO,
+ unsigned MOIdx) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
+ getOrCreateInterval(MO.getReg()));
+ else if (allocatableRegs_[MO.getReg()]) {
+ MachineInstr *CopyMI = NULL;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
+ MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
+ tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ CopyMI = MI;
+ handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
+ getOrCreateInterval(MO.getReg()), CopyMI);
+ // Def of a register also defines its sub-registers.
+ for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS)
+ // If MI also modifies the sub-register explicitly, avoid processing it
+ // more than once. Do not pass in TRI here so it checks for exact match.
+ if (!MI->modifiesRegister(*AS))
+ handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
+ getOrCreateInterval(*AS), 0);
+ }
+}
+
+void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
+ unsigned MIIdx,
+ LiveInterval &interval, bool isAlias) {
+ DOUT << "\t\tlivein register: "; DEBUG(printRegName(interval.reg));
+
+ // Look for kills, if it reaches a def before it's killed, then it shouldn't
+ // be considered a livein.
+ MachineBasicBlock::iterator mi = MBB->begin();
+ unsigned baseIndex = MIIdx;
+ unsigned start = baseIndex;
+ while (baseIndex / InstrSlots::NUM < i2miMap_.size() &&
+ getInstructionFromIndex(baseIndex) == 0)
+ baseIndex += InstrSlots::NUM;
+ unsigned end = baseIndex;
+ bool SeenDefUse = false;
+
+ while (mi != MBB->end()) {
+ if (mi->killsRegister(interval.reg, tri_)) {
+ DOUT << " killed";
+ end = getUseIndex(baseIndex) + 1;
+ SeenDefUse = true;
+ goto exit;
+ } else if (mi->modifiesRegister(interval.reg, tri_)) {
+ // Another instruction redefines the register before it is ever read.
+ // Then the register is essentially dead at the instruction that defines
+ // it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ DOUT << " dead";
+ end = getDefIndex(start) + 1;
+ SeenDefUse = true;
+ goto exit;
+ }
+
+ baseIndex += InstrSlots::NUM;
+ ++mi;
+ if (mi != MBB->end()) {
+ while (baseIndex / InstrSlots::NUM < i2miMap_.size() &&
+ getInstructionFromIndex(baseIndex) == 0)
+ baseIndex += InstrSlots::NUM;
+ }
+ }
+
+exit:
+ // Live-in register might not be used at all.
+ if (!SeenDefUse) {
+ if (isAlias) {
+ DOUT << " dead";
+ end = getDefIndex(MIIdx) + 1;
+ } else {
+ DOUT << " live through";
+ end = baseIndex;
+ }
+ }
+
+ LiveRange LR(start, end, interval.getNextValue(~0U, 0, VNInfoAllocator));
+ interval.addRange(LR);
+ interval.addKill(LR.valno, end);
+ DOUT << " +" << LR << '\n';
+}
+
+/// computeIntervals - computes the live intervals for virtual
+/// registers. for some ordering of the machine instructions [1,N] a
+/// live interval is an interval [i, j) where 1 <= i <= j < N for
+/// which a variable is live
+void LiveIntervals::computeIntervals() {
+
+ DOUT << "********** COMPUTING LIVE INTERVALS **********\n"
+ << "********** Function: "
+ << ((Value*)mf_->getFunction())->getName() << '\n';
+
+ for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ // Track the index of the current machine instr.
+ unsigned MIIndex = getMBBStartIdx(MBB);
+ DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+
+ MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
+
+ // Create intervals for live-ins to this BB first.
+ for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(),
+ LE = MBB->livein_end(); LI != LE; ++LI) {
+ handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
+ // Multiple live-ins can alias the same register.
+ for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS)
+ if (!hasInterval(*AS))
+ handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
+ true);
+ }
+
+ // Skip over empty initial indices.
+ while (MIIndex / InstrSlots::NUM < i2miMap_.size() &&
+ getInstructionFromIndex(MIIndex) == 0)
+ MIIndex += InstrSlots::NUM;
+
+ for (; MI != miEnd; ++MI) {
+ DOUT << MIIndex << "\t" << *MI;
+
+ // Handle defs.
+ for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+ MachineOperand &MO = MI->getOperand(i);
+ // handle register defs - build intervals
+ if (MO.isReg() && MO.getReg() && MO.isDef()) {
+ handleRegisterDef(MBB, MI, MIIndex, MO, i);
+ }
+ }
+
+ // Skip over the empty slots after each instruction.
+ unsigned Slots = MI->getDesc().getNumDefs();
+ if (Slots == 0)
+ Slots = 1;
+ MIIndex += InstrSlots::NUM * Slots;
+
+ // Skip over empty indices.
+ while (MIIndex / InstrSlots::NUM < i2miMap_.size() &&
+ getInstructionFromIndex(MIIndex) == 0)
+ MIIndex += InstrSlots::NUM;
+ }
+ }
+}
+
+bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End,
+ SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start);
+
+ bool ResVal = false;
+ while (I != Idx2MBBMap.end()) {
+ if (I->first >= End)
+ break;
+ MBBs.push_back(I->second);
+ ResVal = true;
+ ++I;
+ }
+ return ResVal;
+}
+
+bool LiveIntervals::findReachableMBBs(unsigned Start, unsigned End,
+ SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
+ std::vector<IdxMBBPair>::const_iterator I =
+ std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start);
+
+ bool ResVal = false;
+ while (I != Idx2MBBMap.end()) {
+ if (I->first > End)
+ break;
+ MachineBasicBlock *MBB = I->second;
+ if (getMBBEndIdx(MBB) > End)
+ break;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ MBBs.push_back(*SI);
+ ResVal = true;
+ ++I;
+ }
+ return ResVal;
+}
+
+LiveInterval* LiveIntervals::createInterval(unsigned reg) {
+ float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
+ return new LiveInterval(reg, Weight);
+}
+
+/// dupInterval - Duplicate a live interval. The caller is responsible for
+/// managing the allocated memory.
+LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
+ LiveInterval *NewLI = createInterval(li->reg);
+ NewLI->Copy(*li, getVNInfoAllocator());
+ return NewLI;
+}
+
+/// getVNInfoSourceReg - Helper function that parses the specified VNInfo
+/// copy field and returns the source register that defines it.
+unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
+ if (!VNI->copy)
+ return 0;
+
+ if (VNI->copy->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+ // If it's extracting out of a physical register, return the sub-register.
+ unsigned Reg = VNI->copy->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ Reg = tri_->getSubReg(Reg, VNI->copy->getOperand(2).getImm());
+ return Reg;
+ } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ VNI->copy->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
+ return VNI->copy->getOperand(2).getReg();
+
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (tii_->isMoveInstr(*VNI->copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ return SrcReg;
+ assert(0 && "Unrecognized copy instruction!");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Register allocator hooks.
+//
+
+/// getReMatImplicitUse - If the remat definition MI has one (for now, we only
+/// allow one) virtual register operand, then its uses are implicitly using
+/// the register. Returns the virtual register.
+unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
+ MachineInstr *MI) const {
+ unsigned RegOp = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == li.reg)
+ continue;
+ // FIXME: For now, only remat MI with at most one register operand.
+ assert(!RegOp &&
+ "Can't rematerialize instruction with multiple register operand!");
+ RegOp = MO.getReg();
+#ifndef NDEBUG
+ break;
+#endif
+ }
+ return RegOp;
+}
+
+/// isValNoAvailableAt - Return true if the val# of the specified interval
+/// which reaches the given instruction also reaches the specified use index.
+bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
+ unsigned UseIdx) const {
+ unsigned Index = getInstructionIndex(MI);
+ VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
+ LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
+ return UI != li.end() && UI->valno == ValNo;
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const VNInfo *ValNo, MachineInstr *MI,
+ SmallVectorImpl<LiveInterval*> &SpillIs,
+ bool &isLoad) {
+ if (DisableReMat)
+ return false;
+
+ if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+ return true;
+
+ int FrameIdx = 0;
+ if (tii_->isLoadFromStackSlot(MI, FrameIdx) &&
+ mf_->getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+ // FIXME: Let target specific isReallyTriviallyReMaterializable determines
+ // this but remember this is not safe to fold into a two-address
+ // instruction.
+ // This is a load from fixed stack slot. It can be rematerialized.
+ return true;
+
+ // If the target-specific rules don't identify an instruction as
+ // being trivially rematerializable, use some target-independent
+ // rules.
+ if (!MI->getDesc().isRematerializable() ||
+ !tii_->isTriviallyReMaterializable(MI)) {
+ if (!EnableAggressiveRemat)
+ return false;
+
+ // If the instruction accesses memory but the memoperands have been lost,
+ // we can't analyze it.
+ const TargetInstrDesc &TID = MI->getDesc();
+ if ((TID.mayLoad() || TID.mayStore()) && MI->memoperands_empty())
+ return false;
+
+ // Avoid instructions obviously unsafe for remat.
+ if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable())
+ return false;
+
+ // If the instruction accesses memory and the memory could be non-constant,
+ // assume the instruction is not rematerializable.
+ for (std::list<MachineMemOperand>::const_iterator
+ I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I){
+ const MachineMemOperand &MMO = *I;
+ if (MMO.isVolatile() || MMO.isStore())
+ return false;
+ const Value *V = MMO.getValue();
+ if (!V)
+ return false;
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ if (!PSV->isConstant(mf_->getFrameInfo()))
+ return false;
+ } else if (!aa_->pointsToConstantMemory(V))
+ return false;
+ }
+
+ // If any of the registers accessed are non-constant, conservatively assume
+ // the instruction is not rematerializable.
+ unsigned ImpUse = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+
+ // Only allow one def, and that in the first operand.
+ if (MO.isDef() != (i == 0))
+ return false;
+
+ // Only allow constant-valued registers.
+ bool IsLiveIn = mri_->isLiveIn(Reg);
+ MachineRegisterInfo::def_iterator I = mri_->def_begin(Reg),
+ E = mri_->def_end();
+
+ // For the def, it should be the only def of that register.
+ if (MO.isDef() && (next(I) != E || IsLiveIn))
+ return false;
+
+ if (MO.isUse()) {
+ // Only allow one use other register use, as that's all the
+ // remat mechanisms support currently.
+ if (Reg != li.reg) {
+ if (ImpUse == 0)
+ ImpUse = Reg;
+ else if (Reg != ImpUse)
+ return false;
+ }
+ // For the use, there should be only one associated def.
+ if (I != E && (next(I) != E || IsLiveIn))
+ return false;
+ }
+ }
+ }
+ }
+
+ unsigned ImpUse = getReMatImplicitUse(li, MI);
+ if (ImpUse) {
+ const LiveInterval &ImpLi = getInterval(ImpUse);
+ for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg),
+ re = mri_->use_end(); ri != re; ++ri) {
+ MachineInstr *UseMI = &*ri;
+ unsigned UseIdx = getInstructionIndex(UseMI);
+ if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
+ continue;
+ if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
+ return false;
+ }
+
+ // If a register operand of the re-materialized instruction is going to
+ // be spilled next, then it's not legal to re-materialize this instruction.
+ for (unsigned i = 0, e = SpillIs.size(); i != e; ++i)
+ if (ImpUse == SpillIs[i]->reg)
+ return false;
+ }
+ return true;
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const VNInfo *ValNo, MachineInstr *MI) {
+ SmallVector<LiveInterval*, 4> Dummy1;
+ bool Dummy2;
+ return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2);
+}
+
+/// isReMaterializable - Returns true if every definition of MI of every
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+ SmallVectorImpl<LiveInterval*> &SpillIs,
+ bool &isLoad) {
+ isLoad = false;
+ for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+ i != e; ++i) {
+ const VNInfo *VNI = *i;
+ unsigned DefIdx = VNI->def;
+ if (DefIdx == ~1U)
+ continue; // Dead val#.
+ // Is the def for the val# rematerializable?
+ if (DefIdx == ~0u)
+ return false;
+ MachineInstr *ReMatDefMI = getInstructionFromIndex(DefIdx);
+ bool DefIsLoad = false;
+ if (!ReMatDefMI ||
+ !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
+ return false;
+ isLoad |= DefIsLoad;
+ }
+ return true;
+}
+
+/// FilterFoldedOps - Filter out two-address use operands. Return
+/// true if it finds any issue with the operands that ought to prevent
+/// folding.
+static bool FilterFoldedOps(MachineInstr *MI,
+ SmallVector<unsigned, 2> &Ops,
+ unsigned &MRInfo,
+ SmallVector<unsigned, 2> &FoldOps) {
+ MRInfo = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ unsigned OpIdx = Ops[i];
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ // FIXME: fold subreg use.
+ if (MO.getSubReg())
+ return true;
+ if (MO.isDef())
+ MRInfo |= (unsigned)VirtRegMap::isMod;
+ else {
+ // Filter out two-address use operand(s).
+ if (MI->isRegTiedToDefOperand(OpIdx)) {
+ MRInfo = VirtRegMap::isModRef;
+ continue;
+ }
+ MRInfo |= (unsigned)VirtRegMap::isRef;
+ }
+ FoldOps.push_back(OpIdx);
+ }
+ return false;
+}
+
+
+/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
+/// slot / to reg or any rematerialized load into ith operand of specified
+/// MI. If it is successul, MI is updated with the newly created MI and
+/// returns true.
+bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
+ VirtRegMap &vrm, MachineInstr *DefMI,
+ unsigned InstrIdx,
+ SmallVector<unsigned, 2> &Ops,
+ bool isSS, int Slot, unsigned Reg) {
+ // If it is an implicit def instruction, just delete it.
+ if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++numFolds;
+ return true;
+ }
+
+ // Filter the list of operand indexes that are to be folded. Abort if
+ // any operand will prevent folding.
+ unsigned MRInfo = 0;
+ SmallVector<unsigned, 2> FoldOps;
+ if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+ return false;
+
+ // The only time it's safe to fold into a two address instruction is when
+ // it's folding reload and spill from / into a spill stack slot.
+ if (DefMI && (MRInfo & VirtRegMap::isMod))
+ return false;
+
+ MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(*mf_, MI, FoldOps, Slot)
+ : tii_->foldMemoryOperand(*mf_, MI, FoldOps, DefMI);
+ if (fmi) {
+ // Remember this instruction uses the spill slot.
+ if (isSS) vrm.addSpillSlotUse(Slot, fmi);
+
+ // Attempt to fold the memory reference into the instruction. If
+ // we can do this, we don't need to insert spill code.
+ MachineBasicBlock &MBB = *MI->getParent();
+ if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
+ vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
+ vrm.transferSpillPts(MI, fmi);
+ vrm.transferRestorePts(MI, fmi);
+ vrm.transferEmergencySpills(MI, fmi);
+ mi2iMap_.erase(MI);
+ i2miMap_[InstrIdx /InstrSlots::NUM] = fmi;
+ mi2iMap_[fmi] = InstrIdx;
+ MI = MBB.insert(MBB.erase(MI), fmi);
+ ++numFolds;
+ return true;
+ }
+ return false;
+}
+
+/// canFoldMemoryOperand - Returns true if the specified load / store
+/// folding is possible.
+bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI,
+ SmallVector<unsigned, 2> &Ops,
+ bool ReMat) const {
+ // Filter the list of operand indexes that are to be folded. Abort if
+ // any operand will prevent folding.
+ unsigned MRInfo = 0;
+ SmallVector<unsigned, 2> FoldOps;
+ if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+ return false;
+
+ // It's only legal to remat for a use, not a def.
+ if (ReMat && (MRInfo & VirtRegMap::isMod))
+ return false;
+
+ return tii_->canFoldMemoryOperand(MI, FoldOps);
+}
+
+bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const {
+ SmallPtrSet<MachineBasicBlock*, 4> MBBs;
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ std::vector<IdxMBBPair>::const_iterator II =
+ std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), I->start);
+ if (II == Idx2MBBMap.end())
+ continue;
+ if (I->end > II->first) // crossing a MBB.
+ return false;
+ MBBs.insert(II->second);
+ if (MBBs.size() > 1)
+ return false;
+ }
+ return true;
+}
+
+/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
+/// interval on to-be re-materialized operands of MI) with new register.
+void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
+ MachineInstr *MI, unsigned NewVReg,
+ VirtRegMap &vrm) {
+ // There is an implicit use. That means one of the other operand is
+ // being remat'ed and the remat'ed instruction has li.reg as an
+ // use operand. Make sure we rewrite that as well.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (!vrm.isReMaterialized(Reg))
+ continue;
+ MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg);
+ MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg);
+ if (UseMO)
+ UseMO->setReg(NewVReg);
+ }
+}
+
+/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions
+/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
+bool LiveIntervals::
+rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
+ bool TrySplit, unsigned index, unsigned end, MachineInstr *MI,
+ MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+ unsigned Slot, int LdSlot,
+ bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+ VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ SmallVector<int, 4> &ReMatIds,
+ const MachineLoopInfo *loopInfo,
+ unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
+ DenseMap<unsigned,unsigned> &MBBVRegsMap,
+ std::vector<LiveInterval*> &NewLIs) {
+ bool CanFold = false;
+ RestartInstruction:
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& mop = MI->getOperand(i);
+ if (!mop.isReg())
+ continue;
+ unsigned Reg = mop.getReg();
+ unsigned RegI = Reg;
+ if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (Reg != li.reg)
+ continue;
+
+ bool TryFold = !DefIsReMat;
+ bool FoldSS = true; // Default behavior unless it's a remat.
+ int FoldSlot = Slot;
+ if (DefIsReMat) {
+ // If this is the rematerializable definition MI itself and
+ // all of its uses are rematerialized, simply delete it.
+ if (MI == ReMatOrigDefMI && CanDelete) {
+ DOUT << "\t\t\t\tErasing re-materlizable def: ";
+ DOUT << MI << '\n';
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ break;
+ }
+
+ // If def for this use can't be rematerialized, then try folding.
+ // If def is rematerializable and it's a load, also try folding.
+ TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad));
+ if (isLoad) {
+ // Try fold loads (from stack slot, constant pool, etc.) into uses.
+ FoldSS = isLoadSS;
+ FoldSlot = LdSlot;
+ }
+ }
+
+ // Scan all of the operands of this instruction rewriting operands
+ // to use NewVReg instead of li.reg as appropriate. We do this for
+ // two reasons:
+ //
+ // 1. If the instr reads the same spilled vreg multiple times, we
+ // want to reuse the NewVReg.
+ // 2. If the instr is a two-addr instruction, we are required to
+ // keep the src/dst regs pinned.
+ //
+ // Keep track of whether we replace a use and/or def so that we can
+ // create the spill interval with the appropriate range.
+
+ HasUse = mop.isUse();
+ HasDef = mop.isDef();
+ SmallVector<unsigned, 2> Ops;
+ Ops.push_back(i);
+ for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
+ const MachineOperand &MOj = MI->getOperand(j);
+ if (!MOj.isReg())
+ continue;
+ unsigned RegJ = MOj.getReg();
+ if (RegJ == 0 || TargetRegisterInfo::isPhysicalRegister(RegJ))
+ continue;
+ if (RegJ == RegI) {
+ Ops.push_back(j);
+ HasUse |= MOj.isUse();
+ HasDef |= MOj.isDef();
+ }
+ }
+
+ if (HasUse && !li.liveAt(getUseIndex(index)))
+ // Must be defined by an implicit def. It should not be spilled. Note,
+ // this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ HasUse = false;
+
+ // Create a new virtual register for the spill interval.
+ // Create the new register now so we can map the fold instruction
+ // to the new register so when it is unfolded we get the correct
+ // answer.
+ bool CreatedNewVReg = false;
+ if (NewVReg == 0) {
+ NewVReg = mri_->createVirtualRegister(rc);
+ vrm.grow();
+ CreatedNewVReg = true;
+ }
+
+ if (!TryFold)
+ CanFold = false;
+ else {
+ // Do not fold load / store here if we are splitting. We'll find an
+ // optimal point to insert a load / store later.
+ if (!TrySplit) {
+ if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+ Ops, FoldSS, FoldSlot, NewVReg)) {
+ // Folding the load/store can completely change the instruction in
+ // unpredictable ways, rescan it from the beginning.
+
+ if (FoldSS) {
+ // We need to give the new vreg the same stack slot as the
+ // spilled interval.
+ vrm.assignVirt2StackSlot(NewVReg, FoldSlot);
+ }
+
+ HasUse = false;
+ HasDef = false;
+ CanFold = false;
+ if (isNotInMIMap(MI))
+ break;
+ goto RestartInstruction;
+ }
+ } else {
+ // We'll try to fold it later if it's profitable.
+ CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat);
+ }
+ }
+
+ mop.setReg(NewVReg);
+ if (mop.isImplicit())
+ rewriteImplicitOps(li, MI, NewVReg, vrm);
+
+ // Reuse NewVReg for other reads.
+ for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
+ MachineOperand &mopj = MI->getOperand(Ops[j]);
+ mopj.setReg(NewVReg);
+ if (mopj.isImplicit())
+ rewriteImplicitOps(li, MI, NewVReg, vrm);
+ }
+
+ if (CreatedNewVReg) {
+ if (DefIsReMat) {
+ vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI/*, CanDelete*/);
+ if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
+ // Each valnum may have its own remat id.
+ ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
+ } else {
+ vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]);
+ }
+ if (!CanDelete || (HasUse && HasDef)) {
+ // If this is a two-addr instruction then its use operands are
+ // rematerializable but its def is not. It should be assigned a
+ // stack slot.
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+ } else {
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+ } else if (HasUse && HasDef &&
+ vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) {
+ // If this interval hasn't been assigned a stack slot (because earlier
+ // def is a deleted remat def), do it now.
+ assert(Slot != VirtRegMap::NO_STACK_SLOT);
+ vrm.assignVirt2StackSlot(NewVReg, Slot);
+ }
+
+ // Re-matting an instruction with virtual register use. Add the
+ // register as an implicit use on the use MI.
+ if (DefIsReMat && ImpUse)
+ MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+
+ // Create a new register interval for this spill / remat.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ if (CreatedNewVReg) {
+ NewLIs.push_back(&nI);
+ MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg));
+ if (TrySplit)
+ vrm.setIsSplitFromReg(NewVReg, li.reg);
+ }
+
+ if (HasUse) {
+ if (CreatedNewVReg) {
+ LiveRange LR(getLoadIndex(index), getUseIndex(index)+1,
+ nI.getNextValue(~0U, 0, VNInfoAllocator));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ } else {
+ // Extend the split live interval to this def / use.
+ unsigned End = getUseIndex(index)+1;
+ LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
+ nI.getValNumInfo(nI.getNumValNums()-1));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ }
+ }
+ if (HasDef) {
+ LiveRange LR(getDefIndex(index), getStoreIndex(index),
+ nI.getNextValue(~0U, 0, VNInfoAllocator));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ }
+
+ DOUT << "\t\t\t\tAdded new interval: ";
+ nI.print(DOUT, tri_);
+ DOUT << '\n';
+ }
+ return CanFold;
+}
+bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
+ const VNInfo *VNI,
+ MachineBasicBlock *MBB, unsigned Idx) const {
+ unsigned End = getMBBEndIdx(MBB);
+ for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) {
+ unsigned KillIdx = VNI->kills[j];
+ if (KillIdx > Idx && KillIdx < End)
+ return true;
+ }
+ return false;
+}
+
+/// RewriteInfo - Keep track of machine instrs that will be rewritten
+/// during spilling.
+namespace {
+ struct RewriteInfo {
+ unsigned Index;
+ MachineInstr *MI;
+ bool HasUse;
+ bool HasDef;
+ RewriteInfo(unsigned i, MachineInstr *mi, bool u, bool d)
+ : Index(i), MI(mi), HasUse(u), HasDef(d) {}
+ };
+
+ struct RewriteInfoCompare {
+ bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const {
+ return LHS.Index < RHS.Index;
+ }
+ };
+}
+
+void LiveIntervals::
+rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
+ LiveInterval::Ranges::const_iterator &I,
+ MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+ unsigned Slot, int LdSlot,
+ bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+ VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ SmallVector<int, 4> &ReMatIds,
+ const MachineLoopInfo *loopInfo,
+ BitVector &SpillMBBs,
+ DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes,
+ BitVector &RestoreMBBs,
+ DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes,
+ DenseMap<unsigned,unsigned> &MBBVRegsMap,
+ std::vector<LiveInterval*> &NewLIs) {
+ bool AllCanFold = true;
+ unsigned NewVReg = 0;
+ unsigned start = getBaseIndex(I->start);
+ unsigned end = getBaseIndex(I->end-1) + InstrSlots::NUM;
+
+ // First collect all the def / use in this live range that will be rewritten.
+ // Make sure they are sorted according to instruction index.
+ std::vector<RewriteInfo> RewriteMIs;
+ for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+ re = mri_->reg_end(); ri != re; ) {
+ MachineInstr *MI = &*ri;
+ MachineOperand &O = ri.getOperand();
+ ++ri;
+ assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
+ unsigned index = getInstructionIndex(MI);
+ if (index < start || index >= end)
+ continue;
+ if (O.isUse() && !li.liveAt(getUseIndex(index)))
+ // Must be defined by an implicit def. It should not be spilled. Note,
+ // this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ continue;
+ RewriteMIs.push_back(RewriteInfo(index, MI, O.isUse(), O.isDef()));
+ }
+ std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
+
+ unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0;
+ // Now rewrite the defs and uses.
+ for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
+ RewriteInfo &rwi = RewriteMIs[i];
+ ++i;
+ unsigned index = rwi.Index;
+ bool MIHasUse = rwi.HasUse;
+ bool MIHasDef = rwi.HasDef;
+ MachineInstr *MI = rwi.MI;
+ // If MI def and/or use the same register multiple times, then there
+ // are multiple entries.
+ unsigned NumUses = MIHasUse;
+ while (i != e && RewriteMIs[i].MI == MI) {
+ assert(RewriteMIs[i].Index == index);
+ bool isUse = RewriteMIs[i].HasUse;
+ if (isUse) ++NumUses;
+ MIHasUse |= isUse;
+ MIHasDef |= RewriteMIs[i].HasDef;
+ ++i;
+ }
+ MachineBasicBlock *MBB = MI->getParent();
+
+ if (ImpUse && MI != ReMatDefMI) {
+ // Re-matting an instruction with virtual register use. Update the
+ // register interval's spill weight to HUGE_VALF to prevent it from
+ // being spilled.
+ LiveInterval &ImpLi = getInterval(ImpUse);
+ ImpLi.weight = HUGE_VALF;
+ }
+
+ unsigned MBBId = MBB->getNumber();
+ unsigned ThisVReg = 0;
+ if (TrySplit) {
+ DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId);
+ if (NVI != MBBVRegsMap.end()) {
+ ThisVReg = NVI->second;
+ // One common case:
+ // x = use
+ // ...
+ // ...
+ // def = ...
+ // = use
+ // It's better to start a new interval to avoid artifically
+ // extend the new interval.
+ if (MIHasDef && !MIHasUse) {
+ MBBVRegsMap.erase(MBB->getNumber());
+ ThisVReg = 0;
+ }
+ }
+ }
+
+ bool IsNew = ThisVReg == 0;
+ if (IsNew) {
+ // This ends the previous live interval. If all of its def / use
+ // can be folded, give it a low spill weight.
+ if (NewVReg && TrySplit && AllCanFold) {
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ nI.weight /= 10.0F;
+ }
+ AllCanFold = true;
+ }
+ NewVReg = ThisVReg;
+
+ bool HasDef = false;
+ bool HasUse = false;
+ bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit,
+ index, end, MI, ReMatOrigDefMI, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg,
+ ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs);
+ if (!HasDef && !HasUse)
+ continue;
+
+ AllCanFold &= CanFold;
+
+ // Update weight of spill interval.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ if (!TrySplit) {
+ // The spill weight is now infinity as it cannot be spilled again.
+ nI.weight = HUGE_VALF;
+ continue;
+ }
+
+ // Keep track of the last def and first use in each MBB.
+ if (HasDef) {
+ if (MI != ReMatOrigDefMI || !CanDelete) {
+ bool HasKill = false;
+ if (!HasUse)
+ HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, getDefIndex(index));
+ else {
+ // If this is a two-address code, then this index starts a new VNInfo.
+ const VNInfo *VNI = li.findDefinedVNInfo(getDefIndex(index));
+ if (VNI)
+ HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, getDefIndex(index));
+ }
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+ SpillIdxes.find(MBBId);
+ if (!HasKill) {
+ if (SII == SpillIdxes.end()) {
+ std::vector<SRInfo> S;
+ S.push_back(SRInfo(index, NewVReg, true));
+ SpillIdxes.insert(std::make_pair(MBBId, S));
+ } else if (SII->second.back().vreg != NewVReg) {
+ SII->second.push_back(SRInfo(index, NewVReg, true));
+ } else if ((int)index > SII->second.back().index) {
+ // If there is an earlier def and this is a two-address
+ // instruction, then it's not possible to fold the store (which
+ // would also fold the load).
+ SRInfo &Info = SII->second.back();
+ Info.index = index;
+ Info.canFold = !HasUse;
+ }
+ SpillMBBs.set(MBBId);
+ } else if (SII != SpillIdxes.end() &&
+ SII->second.back().vreg == NewVReg &&
+ (int)index > SII->second.back().index) {
+ // There is an earlier def that's not killed (must be two-address).
+ // The spill is no longer needed.
+ SII->second.pop_back();
+ if (SII->second.empty()) {
+ SpillIdxes.erase(MBBId);
+ SpillMBBs.reset(MBBId);
+ }
+ }
+ }
+ }
+
+ if (HasUse) {
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+ SpillIdxes.find(MBBId);
+ if (SII != SpillIdxes.end() &&
+ SII->second.back().vreg == NewVReg &&
+ (int)index > SII->second.back().index)
+ // Use(s) following the last def, it's not safe to fold the spill.
+ SII->second.back().canFold = false;
+ DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
+ RestoreIdxes.find(MBBId);
+ if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg)
+ // If we are splitting live intervals, only fold if it's the first
+ // use and there isn't another use later in the MBB.
+ RII->second.back().canFold = false;
+ else if (IsNew) {
+ // Only need a reload if there isn't an earlier def / use.
+ if (RII == RestoreIdxes.end()) {
+ std::vector<SRInfo> Infos;
+ Infos.push_back(SRInfo(index, NewVReg, true));
+ RestoreIdxes.insert(std::make_pair(MBBId, Infos));
+ } else {
+ RII->second.push_back(SRInfo(index, NewVReg, true));
+ }
+ RestoreMBBs.set(MBBId);
+ }
+ }
+
+ // Update spill weight.
+ unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+ nI.weight += getSpillWeight(HasDef, HasUse, loopDepth);
+ }
+
+ if (NewVReg && TrySplit && AllCanFold) {
+ // If all of its def / use can be folded, give it a low spill weight.
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ nI.weight /= 10.0F;
+ }
+}
+
+bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr,
+ BitVector &RestoreMBBs,
+ DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+ if (!RestoreMBBs[Id])
+ return false;
+ std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+ if (Restores[i].index == index &&
+ Restores[i].vreg == vr &&
+ Restores[i].canFold)
+ return true;
+ return false;
+}
+
+void LiveIntervals::eraseRestoreInfo(int Id, int index, unsigned vr,
+ BitVector &RestoreMBBs,
+ DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+ if (!RestoreMBBs[Id])
+ return;
+ std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+ if (Restores[i].index == index && Restores[i].vreg)
+ Restores[i].index = -1;
+}
+
+/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
+/// spilled and create empty intervals for their uses.
+void
+LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
+ const TargetRegisterClass* rc,
+ std::vector<LiveInterval*> &NewLIs) {
+ for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+ re = mri_->reg_end(); ri != re; ) {
+ MachineOperand &O = ri.getOperand();
+ MachineInstr *MI = &*ri;
+ ++ri;
+ if (O.isDef()) {
+ assert(MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF &&
+ "Register def was not rewritten?");
+ RemoveMachineInstrFromMaps(MI);
+ vrm.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ } else {
+ // This must be an use of an implicit_def so it's not part of the live
+ // interval. Create a new empty live interval for it.
+ // FIXME: Can we simply erase some of the instructions? e.g. Stores?
+ unsigned NewVReg = mri_->createVirtualRegister(rc);
+ vrm.grow();
+ vrm.setIsImplicitlyDefined(NewVReg);
+ NewLIs.push_back(&getOrCreateInterval(NewVReg));
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == li.reg)
+ MO.setReg(NewVReg);
+ }
+ }
+ }
+}
+
+std::vector<LiveInterval*> LiveIntervals::
+addIntervalsForSpillsFast(const LiveInterval &li,
+ const MachineLoopInfo *loopInfo,
+ VirtRegMap &vrm) {
+ unsigned slot = vrm.assignVirt2StackSlot(li.reg);
+
+ std::vector<LiveInterval*> added;
+
+ assert(li.weight != HUGE_VALF &&
+ "attempt to spill already spilled interval!");
+
+ DOUT << "\t\t\t\tadding intervals for spills for interval: ";
+ DEBUG(li.dump());
+ DOUT << '\n';
+
+ const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
+
+ MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg);
+ while (RI != mri_->reg_end()) {
+ MachineInstr* MI = &*RI;
+
+ SmallVector<unsigned, 2> Indices;
+ bool HasUse = false;
+ bool HasDef = false;
+
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& mop = MI->getOperand(i);
+ if (!mop.isReg() || mop.getReg() != li.reg) continue;
+
+ HasUse |= MI->getOperand(i).isUse();
+ HasDef |= MI->getOperand(i).isDef();
+
+ Indices.push_back(i);
+ }
+
+ if (!tryFoldMemoryOperand(MI, vrm, NULL, getInstructionIndex(MI),
+ Indices, true, slot, li.reg)) {
+ unsigned NewVReg = mri_->createVirtualRegister(rc);
+ vrm.grow();
+ vrm.assignVirt2StackSlot(NewVReg, slot);
+
+ // create a new register for this spill
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+
+ // the spill weight is now infinity as it
+ // cannot be spilled again
+ nI.weight = HUGE_VALF;
+
+ // Rewrite register operands to use the new vreg.
+ for (SmallVectorImpl<unsigned>::iterator I = Indices.begin(),
+ E = Indices.end(); I != E; ++I) {
+ MI->getOperand(*I).setReg(NewVReg);
+
+ if (MI->getOperand(*I).isUse())
+ MI->getOperand(*I).setIsKill(true);
+ }
+
+ // Fill in the new live interval.
+ unsigned index = getInstructionIndex(MI);
+ if (HasUse) {
+ LiveRange LR(getLoadIndex(index), getUseIndex(index),
+ nI.getNextValue(~0U, 0, getVNInfoAllocator()));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ vrm.addRestorePoint(NewVReg, MI);
+ }
+ if (HasDef) {
+ LiveRange LR(getDefIndex(index), getStoreIndex(index),
+ nI.getNextValue(~0U, 0, getVNInfoAllocator()));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ vrm.addSpillPoint(NewVReg, true, MI);
+ }
+
+ added.push_back(&nI);
+
+ DOUT << "\t\t\t\tadded new interval: ";
+ DEBUG(nI.dump());
+ DOUT << '\n';
+ }
+
+
+ RI = mri_->reg_begin(li.reg);
+ }
+
+ return added;
+}
+
+std::vector<LiveInterval*> LiveIntervals::
+addIntervalsForSpills(const LiveInterval &li,
+ SmallVectorImpl<LiveInterval*> &SpillIs,
+ const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
+
+ if (EnableFastSpilling)
+ return addIntervalsForSpillsFast(li, loopInfo, vrm);
+
+ assert(li.weight != HUGE_VALF &&
+ "attempt to spill already spilled interval!");
+
+ DOUT << "\t\t\t\tadding intervals for spills for interval: ";
+ li.print(DOUT, tri_);
+ DOUT << '\n';
+
+ // Each bit specify whether a spill is required in the MBB.
+ BitVector SpillMBBs(mf_->getNumBlockIDs());
+ DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes;
+ BitVector RestoreMBBs(mf_->getNumBlockIDs());
+ DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes;
+ DenseMap<unsigned,unsigned> MBBVRegsMap;
+ std::vector<LiveInterval*> NewLIs;
+ const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
+
+ unsigned NumValNums = li.getNumValNums();
+ SmallVector<MachineInstr*, 4> ReMatDefs;
+ ReMatDefs.resize(NumValNums, NULL);
+ SmallVector<MachineInstr*, 4> ReMatOrigDefs;
+ ReMatOrigDefs.resize(NumValNums, NULL);
+ SmallVector<int, 4> ReMatIds;
+ ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT);
+ BitVector ReMatDelete(NumValNums);
+ unsigned Slot = VirtRegMap::MAX_STACK_SLOT;
+
+ // Spilling a split live interval. It cannot be split any further. Also,
+ // it's also guaranteed to be a single val# / range interval.
+ if (vrm.getPreSplitReg(li.reg)) {
+ vrm.setIsSplitFromReg(li.reg, 0);
+ // Unset the split kill marker on the last use.
+ unsigned KillIdx = vrm.getKillPoint(li.reg);
+ if (KillIdx) {
+ MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
+ assert(KillMI && "Last use disappeared?");
+ int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
+ assert(KillOp != -1 && "Last use disappeared?");
+ KillMI->getOperand(KillOp).setIsKill(false);
+ }
+ vrm.removeKillPoint(li.reg);
+ bool DefIsReMat = vrm.isReMaterialized(li.reg);
+ Slot = vrm.getStackSlot(li.reg);
+ assert(Slot != VirtRegMap::MAX_STACK_SLOT);
+ MachineInstr *ReMatDefMI = DefIsReMat ?
+ vrm.getReMaterializedMI(li.reg) : NULL;
+ int LdSlot = 0;
+ bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ bool isLoad = isLoadSS ||
+ (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad()));
+ bool IsFirstRange = true;
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ // If this is a split live interval with multiple ranges, it means there
+ // are two-address instructions that re-defined the value. Only the
+ // first def can be rematerialized!
+ if (IsFirstRange) {
+ // Note ReMatOrigDefMI has already been deleted.
+ rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ false, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ } else {
+ rewriteInstructionsForSpills(li, false, I, NULL, 0,
+ Slot, 0, false, false, false,
+ false, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ }
+ IsFirstRange = false;
+ }
+
+ handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ return NewLIs;
+ }
+
+ bool TrySplit = SplitAtBB && !intervalIsInOneMBB(li);
+ if (SplitLimit != -1 && (int)numSplits >= SplitLimit)
+ TrySplit = false;
+ if (TrySplit)
+ ++numSplits;
+ bool NeedStackSlot = false;
+ for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+ i != e; ++i) {
+ const VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ unsigned DefIdx = VNI->def;
+ if (DefIdx == ~1U)
+ continue; // Dead val#.
+ // Is the def for the val# rematerializable?
+ MachineInstr *ReMatDefMI = (DefIdx == ~0u)
+ ? 0 : getInstructionFromIndex(DefIdx);
+ bool dummy;
+ if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
+ // Remember how to remat the def of this val#.
+ ReMatOrigDefs[VN] = ReMatDefMI;
+ // Original def may be modified so we have to make a copy here.
+ MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
+ ClonedMIs.push_back(Clone);
+ ReMatDefs[VN] = Clone;
+
+ bool CanDelete = true;
+ if (VNI->hasPHIKill) {
+ // A kill is a phi node, not all of its uses can be rematerialized.
+ // It must not be deleted.
+ CanDelete = false;
+ // Need a stack slot if there is any live range where uses cannot be
+ // rematerialized.
+ NeedStackSlot = true;
+ }
+ if (CanDelete)
+ ReMatDelete.set(VN);
+ } else {
+ // Need a stack slot if there is any live range where uses cannot be
+ // rematerialized.
+ NeedStackSlot = true;
+ }
+ }
+
+ // One stack slot per live interval.
+ if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
+ if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
+ Slot = vrm.assignVirt2StackSlot(li.reg);
+
+ // This case only occurs when the prealloc splitter has already assigned
+ // a stack slot to this vreg.
+ else
+ Slot = vrm.getStackSlot(li.reg);
+ }
+
+ // Create new intervals and rewrite defs and uses.
+ for (LiveInterval::Ranges::const_iterator
+ I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+ MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id];
+ MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id];
+ bool DefIsReMat = ReMatDefMI != NULL;
+ bool CanDelete = ReMatDelete[I->valno->id];
+ int LdSlot = 0;
+ bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ bool isLoad = isLoadSS ||
+ (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad());
+ rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI,
+ Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+ CanDelete, vrm, rc, ReMatIds, loopInfo,
+ SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+ MBBVRegsMap, NewLIs);
+ }
+
+ // Insert spills / restores if we are splitting.
+ if (!TrySplit) {
+ handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ return NewLIs;
+ }
+
+ SmallPtrSet<LiveInterval*, 4> AddedKill;
+ SmallVector<unsigned, 2> Ops;
+ if (NeedStackSlot) {
+ int Id = SpillMBBs.find_first();
+ while (Id != -1) {
+ std::vector<SRInfo> &spills = SpillIdxes[Id];
+ for (unsigned i = 0, e = spills.size(); i != e; ++i) {
+ int index = spills[i].index;
+ unsigned VReg = spills[i].vreg;
+ LiveInterval &nI = getOrCreateInterval(VReg);
+ bool isReMat = vrm.isReMaterialized(VReg);
+ MachineInstr *MI = getInstructionFromIndex(index);
+ bool CanFold = false;
+ bool FoundUse = false;
+ Ops.clear();
+ if (spills[i].canFold) {
+ CanFold = true;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() != VReg)
+ continue;
+
+ Ops.push_back(j);
+ if (MO.isDef())
+ continue;
+ if (isReMat ||
+ (!FoundUse && !alsoFoldARestore(Id, index, VReg,
+ RestoreMBBs, RestoreIdxes))) {
+ // MI has two-address uses of the same register. If the use
+ // isn't the first and only use in the BB, then we can't fold
+ // it. FIXME: Move this to rewriteInstructionsForSpills.
+ CanFold = false;
+ break;
+ }
+ FoundUse = true;
+ }
+ }
+ // Fold the store into the def if possible.
+ bool Folded = false;
+ if (CanFold && !Ops.empty()) {
+ if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){
+ Folded = true;
+ if (FoundUse) {
+ // Also folded uses, do not issue a load.
+ eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
+ nI.removeRange(getLoadIndex(index), getUseIndex(index)+1);
+ }
+ nI.removeRange(getDefIndex(index), getStoreIndex(index));
+ }
+ }
+
+ // Otherwise tell the spiller to issue a spill.
+ if (!Folded) {
+ LiveRange *LR = &nI.ranges[nI.ranges.size()-1];
+ bool isKill = LR->end == getStoreIndex(index);
+ if (!MI->registerDefIsDead(nI.reg))
+ // No need to spill a dead def.
+ vrm.addSpillPoint(VReg, isKill, MI);
+ if (isKill)
+ AddedKill.insert(&nI);
+ }
+ }
+ Id = SpillMBBs.find_next(Id);
+ }
+ }
+
+ int Id = RestoreMBBs.find_first();
+ while (Id != -1) {
+ std::vector<SRInfo> &restores = RestoreIdxes[Id];
+ for (unsigned i = 0, e = restores.size(); i != e; ++i) {
+ int index = restores[i].index;
+ if (index == -1)
+ continue;
+ unsigned VReg = restores[i].vreg;
+ LiveInterval &nI = getOrCreateInterval(VReg);
+ bool isReMat = vrm.isReMaterialized(VReg);
+ MachineInstr *MI = getInstructionFromIndex(index);
+ bool CanFold = false;
+ Ops.clear();
+ if (restores[i].canFold) {
+ CanFold = true;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() != VReg)
+ continue;
+
+ if (MO.isDef()) {
+ // If this restore were to be folded, it would have been folded
+ // already.
+ CanFold = false;
+ break;
+ }
+ Ops.push_back(j);
+ }
+ }
+
+ // Fold the load into the use if possible.
+ bool Folded = false;
+ if (CanFold && !Ops.empty()) {
+ if (!isReMat)
+ Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg);
+ else {
+ MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
+ int LdSlot = 0;
+ bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+ // If the rematerializable def is a load, also try to fold it.
+ if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad())
+ Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+ Ops, isLoadSS, LdSlot, VReg);
+ if (!Folded) {
+ unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
+ if (ImpUse) {
+ // Re-matting an instruction with virtual register use. Add the
+ // register as an implicit use on the use MI and update the register
+ // interval's spill weight to HUGE_VALF to prevent it from being
+ // spilled.
+ LiveInterval &ImpLi = getInterval(ImpUse);
+ ImpLi.weight = HUGE_VALF;
+ MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+ }
+ }
+ }
+ }
+ // If folding is not possible / failed, then tell the spiller to issue a
+ // load / rematerialization for us.
+ if (Folded)
+ nI.removeRange(getLoadIndex(index), getUseIndex(index)+1);
+ else
+ vrm.addRestorePoint(VReg, MI);
+ }
+ Id = RestoreMBBs.find_next(Id);
+ }
+
+ // Finalize intervals: add kills, finalize spill weights, and filter out
+ // dead intervals.
+ std::vector<LiveInterval*> RetNewLIs;
+ for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
+ LiveInterval *LI = NewLIs[i];
+ if (!LI->empty()) {
+ LI->weight /= InstrSlots::NUM * getApproximateInstructionCount(*LI);
+ if (!AddedKill.count(LI)) {
+ LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
+ unsigned LastUseIdx = getBaseIndex(LR->end);
+ MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
+ int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
+ assert(UseIdx != -1);
+ if (!LastUse->isRegTiedToDefOperand(UseIdx)) {
+ LastUse->getOperand(UseIdx).setIsKill();
+ vrm.addKillPoint(LI->reg, LastUseIdx);
+ }
+ }
+ RetNewLIs.push_back(LI);
+ }
+ }
+
+ handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
+ return RetNewLIs;
+}
+
+/// hasAllocatableSuperReg - Return true if the specified physical register has
+/// any super register that's allocatable.
+bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
+ for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS)
+ if (allocatableRegs_[*AS] && hasInterval(*AS))
+ return true;
+ return false;
+}
+
+/// getRepresentativeReg - Find the largest super register of the specified
+/// physical register.
+unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
+ // Find the largest super-register that is allocatable.
+ unsigned BestReg = Reg;
+ for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
+ unsigned SuperReg = *AS;
+ if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) {
+ BestReg = SuperReg;
+ break;
+ }
+ }
+ return BestReg;
+}
+
+/// getNumConflictsWithPhysReg - Return the number of uses and defs of the
+/// specified interval that conflicts with the specified physical register.
+unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
+ unsigned PhysReg) const {
+ unsigned NumConflicts = 0;
+ const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg));
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *MI = O.getParent();
+ unsigned Index = getInstructionIndex(MI);
+ if (pli.liveAt(Index))
+ ++NumConflicts;
+ }
+ return NumConflicts;
+}
+
+/// spillPhysRegAroundRegDefsUses - Spill the specified physical register
+/// around all defs and uses of the specified interval. Return true if it
+/// was able to cut its interval.
+bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
+ unsigned PhysReg, VirtRegMap &vrm) {
+ unsigned SpillReg = getRepresentativeReg(PhysReg);
+
+ for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
+ // If there are registers which alias PhysReg, but which are not a
+ // sub-register of the chosen representative super register. Assert
+ // since we can't handle it yet.
+ assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) ||
+ tri_->isSuperRegister(*AS, SpillReg));
+
+ bool Cut = false;
+ LiveInterval &pli = getInterval(SpillReg);
+ SmallPtrSet<MachineInstr*, 8> SeenMIs;
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *MI = O.getParent();
+ if (SeenMIs.count(MI))
+ continue;
+ SeenMIs.insert(MI);
+ unsigned Index = getInstructionIndex(MI);
+ if (pli.liveAt(Index)) {
+ vrm.addEmergencySpill(SpillReg, MI);
+ unsigned StartIdx = getLoadIndex(Index);
+ unsigned EndIdx = getStoreIndex(Index)+1;
+ if (pli.isInOneLiveRange(StartIdx, EndIdx)) {
+ pli.removeRange(StartIdx, EndIdx);
+ Cut = true;
+ } else {
+ cerr << "Ran out of registers during register allocation!\n";
+ if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+ cerr << "Please check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(cerr.stream(), tm_);
+ }
+ exit(1);
+ }
+ for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) {
+ if (!hasInterval(*AS))
+ continue;
+ LiveInterval &spli = getInterval(*AS);
+ if (spli.liveAt(Index))
+ spli.removeRange(getLoadIndex(Index), getStoreIndex(Index)+1);
+ }
+ }
+ }
+ return Cut;
+}
+
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+ MachineInstr* startInst) {
+ LiveInterval& Interval = getOrCreateInterval(reg);
+ VNInfo* VN = Interval.getNextValue(
+ getInstructionIndex(startInst) + InstrSlots::DEF,
+ startInst, getVNInfoAllocator());
+ VN->hasPHIKill = true;
+ VN->kills.push_back(getMBBEndIdx(startInst->getParent()));
+ LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF,
+ getMBBEndIdx(startInst->getParent()) + 1, VN);
+ Interval.addRange(LR);
+
+ return LR;
+}
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
new file mode 100644
index 0000000..86f7ea2
--- /dev/null
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -0,0 +1,66 @@
+//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the live stack slot analysis pass. It is analogous to
+// live interval analysis except it's analyzing liveness of stack slots rather
+// than registers.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livestacks"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <limits>
+using namespace llvm;
+
+char LiveStacks::ID = 0;
+static RegisterPass<LiveStacks> X("livestacks", "Live Stack Slot Analysis");
+
+void LiveStacks::scaleNumbering(int factor) {
+ // Scale the intervals.
+ for (iterator LI = begin(), LE = end(); LI != LE; ++LI) {
+ LI->second.scaleNumbering(factor);
+ }
+}
+
+void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveStacks::releaseMemory() {
+ // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
+ VNInfoAllocator.Reset();
+ S2IMap.clear();
+ S2RCMap.clear();
+}
+
+bool LiveStacks::runOnMachineFunction(MachineFunction &) {
+ // FIXME: No analysis is being done right now. We are relying on the
+ // register allocators to provide the information.
+ return false;
+}
+
+/// print - Implement the dump method.
+void LiveStacks::print(std::ostream &O, const Module*) const {
+ O << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second.print(O);
+ int Slot = I->first;
+ const TargetRegisterClass *RC = getIntervalRegClass(Slot);
+ if (RC)
+ O << " [" << RC->getName() << "]\n";
+ else
+ O << " [Unknown]\n";
+ }
+}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 0000000..c33d81e
--- /dev/null
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,695 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass. For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using are sparse implementation based on
+// the machine code SSA form. This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function. It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block). If a physical
+// register is not register allocatable, it is not tracked. This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/alloca.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis");
+
+
+void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(UnreachableMachineBlockElimID);
+ AU.setPreservesAll();
+}
+
+void LiveVariables::VarInfo::dump() const {
+ cerr << " Alive in blocks: ";
+ for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
+ E = AliveBlocks.end(); I != E; ++I)
+ cerr << *I << ", ";
+ cerr << "\n Killed by:";
+ if (Kills.empty())
+ cerr << " No instructions.\n";
+ else {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ cerr << "\n #" << i << ": " << *Kills[i];
+ cerr << "\n";
+ }
+}
+
+/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
+LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
+ assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
+ "getVarInfo: not a virtual register!");
+ RegIdx -= TargetRegisterInfo::FirstVirtualRegister;
+ if (RegIdx >= VirtRegInfo.size()) {
+ if (RegIdx >= 2*VirtRegInfo.size())
+ VirtRegInfo.resize(RegIdx*2);
+ else
+ VirtRegInfo.resize(2*VirtRegInfo.size());
+ }
+ return VirtRegInfo[RegIdx];
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB,
+ std::vector<MachineBasicBlock*> &WorkList) {
+ unsigned BBNum = MBB->getNumber();
+
+ // Check to see if this basic block is one of the killing blocks. If so,
+ // remove it.
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ if (VRInfo.Kills[i]->getParent() == MBB) {
+ VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry
+ break;
+ }
+
+ if (MBB == DefBlock) return; // Terminate recursion
+
+ if (VRInfo.AliveBlocks.test(BBNum))
+ return; // We already know the block is live
+
+ // Mark the variable known alive in this bb
+ VRInfo.AliveBlocks.set(BBNum);
+
+ for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(),
+ E = MBB->pred_rend(); PI != E; ++PI)
+ WorkList.push_back(*PI);
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB) {
+ std::vector<MachineBasicBlock*> WorkList;
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
+
+ while (!WorkList.empty()) {
+ MachineBasicBlock *Pred = WorkList.back();
+ WorkList.pop_back();
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
+ }
+}
+
+void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ assert(MRI->getVRegDef(reg) && "Register use before def!");
+
+ unsigned BBNum = MBB->getNumber();
+
+ VarInfo& VRInfo = getVarInfo(reg);
+ VRInfo.NumUses++;
+
+ // Check to see if this basic block is already a kill block.
+ if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+ // Yes, this register is killed in this basic block already. Increase the
+ // live range by updating the kill instruction.
+ VRInfo.Kills.back() = MI;
+ return;
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+#endif
+
+ // This situation can occur:
+ //
+ // ,------.
+ // | |
+ // | v
+ // | t2 = phi ... t1 ...
+ // | |
+ // | v
+ // | t1 = ...
+ // | ... = ... t1 ...
+ // | |
+ // `------'
+ //
+ // where there is a use in a PHI node that's a predecessor to the defining
+ // block. We don't want to mark all predecessors as having the value "alive"
+ // in this case.
+ if (MBB == MRI->getVRegDef(reg)->getParent()) return;
+
+ // Add a new kill entry for this basic block. If this virtual register is
+ // already marked as alive in this basic block, that means it is alive in at
+ // least one of the successor blocks, it's not a kill.
+ if (!VRInfo.AliveBlocks.test(BBNum))
+ VRInfo.Kills.push_back(MI);
+
+ // Update all dominating blocks to mark them as "known live".
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI)
+ MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
+}
+
+void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) {
+ VarInfo &VRInfo = getVarInfo(Reg);
+
+ if (VRInfo.AliveBlocks.empty())
+ // If vr is not alive in any block, then defaults to dead.
+ VRInfo.Kills.push_back(MI);
+}
+
+/// FindLastPartialDef - Return the last partial def of the specified register.
+/// Also returns the sub-register that's defined.
+MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
+ unsigned &PartDefReg) {
+ unsigned LastDefReg = 0;
+ unsigned LastDefDist = 0;
+ MachineInstr *LastDef = NULL;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (!Def)
+ continue;
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastDefDist) {
+ LastDefReg = SubReg;
+ LastDef = Def;
+ LastDefDist = Dist;
+ }
+ }
+ PartDefReg = LastDefReg;
+ return LastDef;
+}
+
+/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
+/// implicit defs to a machine instruction if there was an earlier def of its
+/// super-register.
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+ // If there was a previous use or a "full" def all is well.
+ if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) {
+ // Otherwise, the last sub-register def implicitly defines this register.
+ // e.g.
+ // AH =
+ // AL = ... <imp-def EAX>, <imp-kill AH>
+ // = AH
+ // ...
+ // = EAX
+ // All of the sub-registers must have been defined before the use of Reg!
+ unsigned PartDefReg = 0;
+ MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefReg);
+ // If LastPartialDef is NULL, it must be using a livein register.
+ if (LastPartialDef) {
+ LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[Reg] = LastPartialDef;
+ SmallSet<unsigned, 8> Processed;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (Processed.count(SubReg))
+ continue;
+ if (SubReg == PartDefReg || TRI->isSubRegister(PartDefReg, SubReg))
+ continue;
+ // This part of Reg was defined before the last partial def. It's killed
+ // here.
+ LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
+ false/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[SubReg] = LastPartialDef;
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ Processed.insert(*SS);
+ }
+ }
+ }
+
+ // There was an earlier def of a super-register. Add implicit def to that MI.
+ //
+ // A: EAX = ...
+ // B: ... = AX
+ //
+ // Add implicit def to A if there isn't a use of AX (or EAX) before B.
+ if (!PhysRegUse[Reg]) {
+ MachineInstr *Def = PhysRegDef[Reg];
+ if (Def && !Def->modifiesRegister(Reg))
+ Def->addOperand(MachineOperand::CreateReg(Reg,
+ true /*IsDef*/,
+ true /*IsImp*/));
+ }
+
+ // Remember this use.
+ PhysRegUse[Reg] = MI;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ PhysRegUse[SubReg] = MI;
+}
+
+/// hasRegisterUseBelow - Return true if the specified register is used after
+/// the current instruction and before it's next definition.
+bool LiveVariables::hasRegisterUseBelow(unsigned Reg,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock *MBB) {
+ if (I == MBB->end())
+ return false;
+
+ // First find out if there are any uses / defs below.
+ bool hasDistInfo = true;
+ unsigned CurDist = DistanceMap[I];
+ SmallVector<MachineInstr*, 4> Uses;
+ SmallVector<MachineInstr*, 4> Defs;
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
+ RE = MRI->reg_end(); RI != RE; ++RI) {
+ MachineOperand &UDO = RI.getOperand();
+ MachineInstr *UDMI = &*RI;
+ if (UDMI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+ bool isBelow = false;
+ if (DI == DistanceMap.end()) {
+ // Must be below if it hasn't been assigned a distance yet.
+ isBelow = true;
+ hasDistInfo = false;
+ } else if (DI->second > CurDist)
+ isBelow = true;
+ if (isBelow) {
+ if (UDO.isUse())
+ Uses.push_back(UDMI);
+ if (UDO.isDef())
+ Defs.push_back(UDMI);
+ }
+ }
+
+ if (Uses.empty())
+ // No uses below.
+ return false;
+ else if (!Uses.empty() && Defs.empty())
+ // There are uses below but no defs below.
+ return true;
+ // There are both uses and defs below. We need to know which comes first.
+ if (!hasDistInfo) {
+ // Complete DistanceMap for this MBB. This information is computed only
+ // once per MBB.
+ ++I;
+ ++CurDist;
+ for (MachineBasicBlock::iterator E = MBB->end(); I != E; ++I, ++CurDist)
+ DistanceMap.insert(std::make_pair(I, CurDist));
+ }
+
+ unsigned EarliestUse = DistanceMap[Uses[0]];
+ for (unsigned i = 1, e = Uses.size(); i != e; ++i) {
+ unsigned Dist = DistanceMap[Uses[i]];
+ if (Dist < EarliestUse)
+ EarliestUse = Dist;
+ }
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Dist = DistanceMap[Defs[i]];
+ if (Dist < EarliestUse)
+ // The register is defined before its first use below.
+ return false;
+ }
+ return true;
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
+ if (!PhysRegUse[Reg] && !PhysRegDef[Reg])
+ return false;
+
+ MachineInstr *LastRefOrPartRef = PhysRegUse[Reg]
+ ? PhysRegUse[Reg] : PhysRegDef[Reg];
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ // The whole register is used.
+ // AL =
+ // AH =
+ //
+ // = AX
+ // = AL, AX<imp-use, kill>
+ // AX =
+ //
+ // Or whole register is defined, but not used at all.
+ // AX<dead> =
+ // ...
+ // AX =
+ //
+ // Or whole register is defined, but only partly used.
+ // AX<dead> = AL<imp-def>
+ // = AL<kill>
+ // AX =
+ SmallSet<unsigned, 8> PartUses;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ PartUses.insert(SubReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ PartUses.insert(*SS);
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI)
+ // If the last reference is the last def, then it's not used at all.
+ // That is, unless we are currently processing the last reference itself.
+ LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+
+ /* Partial uses. Mark register def dead and add implicit def of
+ sub-registers which are used.
+ FIXME: LiveIntervalAnalysis can't handle this yet!
+ EAX<dead> = op AL<imp-def>
+ That is, EAX def is dead but AL def extends pass it.
+ Enable this after live interval analysis is fixed to improve codegen!
+ else if (!PhysRegUse[Reg]) {
+ PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (PartUses.count(SubReg)) {
+ PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+ true, true));
+ LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ PartUses.erase(*SS);
+ }
+ }
+ } */
+ else
+ LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
+ return true;
+}
+
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
+ // What parts of the register are previously defined?
+ SmallSet<unsigned, 32> Live;
+ if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
+ Live.insert(Reg);
+ for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+ Live.insert(*SS);
+ } else {
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ // If a register isn't itself defined, but all parts that make up of it
+ // are defined, then consider it also defined.
+ // e.g.
+ // AL =
+ // AH =
+ // = AX
+ if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
+ Live.insert(SubReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ Live.insert(*SS);
+ }
+ }
+ }
+
+ // Start from the largest piece, find the last time any part of the register
+ // is referenced.
+ if (!HandlePhysRegKill(Reg, MI)) {
+ // Only some of the sub-registers are used.
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (!Live.count(SubReg))
+ // Skip if this sub-register isn't defined.
+ continue;
+ if (HandlePhysRegKill(SubReg, MI)) {
+ Live.erase(SubReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ Live.erase(*SS);
+ }
+ }
+ assert(Live.empty() && "Not all defined registers are killed / dead?");
+ }
+
+ if (MI) {
+ // Does this extend the live range of a super-register?
+ SmallSet<unsigned, 8> Processed;
+ for (const unsigned *SuperRegs = TRI->getSuperRegisters(Reg);
+ unsigned SuperReg = *SuperRegs; ++SuperRegs) {
+ if (Processed.count(SuperReg))
+ continue;
+ MachineInstr *LastRef = PhysRegUse[SuperReg]
+ ? PhysRegUse[SuperReg] : PhysRegDef[SuperReg];
+ if (LastRef && LastRef != MI) {
+ // The larger register is previously defined. Now a smaller part is
+ // being re-defined. Treat it as read/mod/write if there are uses
+ // below.
+ // EAX =
+ // AX = EAX<imp-use,kill>, EAX<imp-def>
+ // ...
+ /// = EAX
+ if (hasRegisterUseBelow(SuperReg, MI, MI->getParent())) {
+ MI->addOperand(MachineOperand::CreateReg(SuperReg, false/*IsDef*/,
+ true/*IsImp*/,true/*IsKill*/));
+ MI->addOperand(MachineOperand::CreateReg(SuperReg, true/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[SuperReg] = MI;
+ PhysRegUse[SuperReg] = NULL;
+ Processed.insert(SuperReg);
+ for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) {
+ PhysRegDef[*SS] = MI;
+ PhysRegUse[*SS] = NULL;
+ Processed.insert(*SS);
+ }
+ } else {
+ // Otherwise, the super register is killed.
+ if (HandlePhysRegKill(SuperReg, MI)) {
+ PhysRegDef[SuperReg] = NULL;
+ PhysRegUse[SuperReg] = NULL;
+ for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) {
+ PhysRegDef[*SS] = NULL;
+ PhysRegUse[*SS] = NULL;
+ Processed.insert(*SS);
+ }
+ }
+ }
+ }
+ }
+
+ // Remember this def.
+ PhysRegDef[Reg] = MI;
+ PhysRegUse[Reg] = NULL;
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ PhysRegDef[SubReg] = MI;
+ PhysRegUse[SubReg] = NULL;
+ }
+ }
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MRI = &mf.getRegInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+
+ ReservedRegisters = TRI->getReservedRegs(mf);
+
+ unsigned NumRegs = TRI->getNumRegs();
+ PhysRegDef = new MachineInstr*[NumRegs];
+ PhysRegUse = new MachineInstr*[NumRegs];
+ PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+
+ /// Get some space for a respectable number of registers.
+ VirtRegInfo.resize(64);
+
+ analyzePHINodes(mf);
+
+ // Calculate live variable information in depth first order on the CFG of the
+ // function. This guarantees that we will see the definition of a virtual
+ // register before its uses due to dominance properties of SSA (except for PHI
+ // nodes, which are treated as a special case).
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+
+ // Mark live-in registers as live-in.
+ for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(),
+ EE = MBB->livein_end(); II != EE; ++II) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegDef(*II, 0);
+ }
+
+ // Loop over all of the instructions, processing them.
+ DistanceMap.clear();
+ unsigned Dist = 0;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+ DistanceMap.insert(std::make_pair(MI, Dist++));
+
+ // Process all of the operands of the instruction...
+ unsigned NumOperandsToProcess = MI->getNumOperands();
+
+ // Unless it is a PHI node. In this case, ONLY process the DEF, not any
+ // of the uses. They will be handled in other basic blocks.
+ if (MI->getOpcode() == TargetInstrInfo::PHI)
+ NumOperandsToProcess = 1;
+
+ SmallVector<unsigned, 4> UseRegs;
+ SmallVector<unsigned, 4> DefRegs;
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse())
+ UseRegs.push_back(MOReg);
+ if (MO.isDef())
+ DefRegs.push_back(MOReg);
+ }
+
+ // Process all uses.
+ for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
+ unsigned MOReg = UseRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegUse(MOReg, MBB, MI);
+ else if (!ReservedRegisters[MOReg])
+ HandlePhysRegUse(MOReg, MI);
+ }
+
+ // Process all defs.
+ for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
+ unsigned MOReg = DefRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegDef(MOReg, MI);
+ else if (!ReservedRegisters[MOReg])
+ HandlePhysRegDef(MOReg, MI);
+ }
+ }
+
+ // Handle any virtual assignments from PHI nodes which might be at the
+ // bottom of this basic block. We check all of our successor blocks to see
+ // if they have PHI nodes, and if so, we simulate an assignment at the end
+ // of the current block.
+ if (!PHIVarInfo[MBB->getNumber()].empty()) {
+ SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+ for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
+ E = VarInfoVec.end(); I != E; ++I)
+ // Mark it alive only in the block we are representing.
+ MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+ MBB);
+ }
+
+ // Finally, if the last instruction in the block is a return, make sure to
+ // mark it as using all of the live-out values in the function.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn()) {
+ MachineInstr *Ret = &MBB->back();
+
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+ "Cannot have a live-out virtual register!");
+ HandlePhysRegUse(*I, Ret);
+
+ // Add live-out registers as implicit uses.
+ if (!Ret->readsRegister(*I))
+ Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
+ }
+ }
+
+ // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+ // available at the end of the basic block.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if (PhysRegDef[i] || PhysRegUse[i])
+ HandlePhysRegDef(i, 0);
+
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ }
+
+ // Convert and transfer the dead / killed information we have gathered into
+ // VirtRegInfo onto MI's.
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i)
+ for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j)
+ if (VirtRegInfo[i].Kills[j] ==
+ MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister))
+ VirtRegInfo[i]
+ .Kills[j]->addRegisterDead(i +
+ TargetRegisterInfo::FirstVirtualRegister,
+ TRI);
+ else
+ VirtRegInfo[i]
+ .Kills[j]->addRegisterKilled(i +
+ TargetRegisterInfo::FirstVirtualRegister,
+ TRI);
+
+ // Check to make sure there are no unreachable blocks in the MC CFG for the
+ // function. If so, it is due to a bug in the instruction selector or some
+ // other part of the code generator if this happens.
+#ifndef NDEBUG
+ for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
+ assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+#endif
+
+ delete[] PhysRegDef;
+ delete[] PhysRegUse;
+ delete[] PHIVarInfo;
+
+ return false;
+}
+
+/// replaceKillInstruction - Update register kill info by replacing a kill
+/// instruction with a new one.
+void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI,
+ MachineInstr *NewMI) {
+ VarInfo &VI = getVarInfo(Reg);
+ std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI);
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ MO.setIsKill(false);
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ bool removed = getVarInfo(Reg).removeKill(MI);
+ assert(removed && "kill not in register's VarInfo?");
+ removed = true;
+ }
+ }
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual register
+/// which is used in a PHI node. We map that to the BB the vreg is coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+ for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
+ .push_back(BBI->getOperand(i).getReg());
+}
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
new file mode 100644
index 0000000..14acb71
--- /dev/null
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -0,0 +1,292 @@
+//===-- LowerSubregs.cpp - Subregister Lowering instruction pass ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a MachineFunction pass which runs after register
+// allocation that turns subreg insert/extract instructions into register
+// copies, as needed. This ensures correct codegen even if the coalescer
+// isn't able to remove all subreg instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowersubregs"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN LowerSubregsInstructionPass
+ : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LowerSubregsInstructionPass() : MachineFunctionPass(&ID) {}
+
+ const char *getPassName() const {
+ return "Subregister lowering instruction pass";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction&);
+
+ bool LowerExtract(MachineInstr *MI);
+ bool LowerInsert(MachineInstr *MI);
+ bool LowerSubregToReg(MachineInstr *MI);
+
+ void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
+ const TargetRegisterInfo &TRI);
+ void TransferKillFlag(MachineInstr *MI, unsigned SrcReg,
+ const TargetRegisterInfo &TRI);
+ };
+
+ char LowerSubregsInstructionPass::ID = 0;
+}
+
+FunctionPass *llvm::createLowerSubregsPass() {
+ return new LowerSubregsInstructionPass();
+}
+
+/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
+/// and the lowered replacement instructions immediately precede it.
+/// Mark the replacement instructions with the dead flag.
+void
+LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
+ unsigned DstReg,
+ const TargetRegisterInfo &TRI) {
+ for (MachineBasicBlock::iterator MII =
+ prior(MachineBasicBlock::iterator(MI)); ; --MII) {
+ if (MII->addRegisterDead(DstReg, &TRI))
+ break;
+ assert(MII != MI->getParent()->begin() &&
+ "copyRegToReg output doesn't reference destination register!");
+ }
+}
+
+/// TransferKillFlag - MI is a pseudo-instruction with SrcReg killed,
+/// and the lowered replacement instructions immediately precede it.
+/// Mark the replacement instructions with the kill flag.
+void
+LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI,
+ unsigned SrcReg,
+ const TargetRegisterInfo &TRI) {
+ for (MachineBasicBlock::iterator MII =
+ prior(MachineBasicBlock::iterator(MI)); ; --MII) {
+ if (MII->addRegisterKilled(SrcReg, &TRI))
+ break;
+ assert(MII != MI->getParent()->begin() &&
+ "copyRegToReg output doesn't reference source register!");
+ }
+}
+
+bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+ MI->getOperand(1).isReg() && MI->getOperand(1).isUse() &&
+ MI->getOperand(2).isImm() && "Malformed extract_subreg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SuperReg = MI->getOperand(1).getReg();
+ unsigned SubIdx = MI->getOperand(2).getImm();
+ unsigned SrcReg = TRI.getSubReg(SuperReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(SuperReg) &&
+ "Extract supperg source must be a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Extract destination must be in a physical register");
+
+ DOUT << "subreg: CONVERTING: " << *MI;
+
+ if (SrcReg == DstReg) {
+ // No need to insert an identify copy instruction.
+ DOUT << "subreg: eliminated!";
+ // Find the kill of the destination register's live range, and insert
+ // a kill of the source register at that point.
+ if (MI->getOperand(1).isKill() && !MI->getOperand(0).isDead())
+ for (MachineBasicBlock::iterator MII =
+ next(MachineBasicBlock::iterator(MI));
+ MII != MBB->end(); ++MII)
+ if (MII->killsRegister(DstReg, &TRI)) {
+ MII->addRegisterKilled(SuperReg, &TRI, /*AddIfNotFound=*/true);
+ break;
+ }
+ } else {
+ // Insert copy
+ const TargetRegisterClass *TRC = TRI.getPhysicalRegisterRegClass(DstReg);
+ assert(TRC == TRI.getPhysicalRegisterRegClass(SrcReg) &&
+ "Extract subreg and Dst must be of same register class");
+ TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRC, TRC);
+ // Transfer the kill/dead flags, if needed.
+ if (MI->getOperand(0).isDead())
+ TransferDeadFlag(MI, DstReg, TRI);
+ if (MI->getOperand(1).isKill())
+ TransferKillFlag(MI, SrcReg, TRI);
+
+#ifndef NDEBUG
+ MachineBasicBlock::iterator dMI = MI;
+ DOUT << "subreg: " << *(--dMI);
+#endif
+ }
+
+ DOUT << "\n";
+ MBB->erase(MI);
+ return true;
+}
+
+bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ MI->getOperand(1).isImm() &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned InsReg = MI->getOperand(2).getReg();
+ unsigned InsSIdx = MI->getOperand(2).getSubReg();
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Insert destination must be in a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ "Inserted value must be in a physical register");
+
+ DOUT << "subreg: CONVERTING: " << *MI;
+
+ if (DstSubReg == InsReg && InsSIdx == 0) {
+ // No need to insert an identify copy instruction.
+ // Watch out for case like this:
+ // %RAX<def> = ...
+ // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3
+ // The first def is defining RAX, not EAX so the top bits were not
+ // zero extended.
+ DOUT << "subreg: eliminated!";
+ } else {
+ // Insert sub-register copy
+ const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg);
+ const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg);
+ TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+ // Transfer the kill/dead flags, if needed.
+ if (MI->getOperand(0).isDead())
+ TransferDeadFlag(MI, DstSubReg, TRI);
+ if (MI->getOperand(2).isKill())
+ TransferKillFlag(MI, InsReg, TRI);
+
+#ifndef NDEBUG
+ MachineBasicBlock::iterator dMI = MI;
+ DOUT << "subreg: " << *(--dMI);
+#endif
+ }
+
+ DOUT << "\n";
+ MBB->erase(MI);
+ return true;
+}
+
+bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ (MI->getOperand(1).isReg() && MI->getOperand(1).isUse()) &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid insert_subreg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+#ifndef NDEBUG
+ unsigned SrcReg = MI->getOperand(1).getReg();
+#endif
+ unsigned InsReg = MI->getOperand(2).getReg();
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?");
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ "Insert superreg source must be in a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ "Inserted value must be in a physical register");
+
+ DOUT << "subreg: CONVERTING: " << *MI;
+
+ if (DstSubReg == InsReg) {
+ // No need to insert an identify copy instruction.
+ DOUT << "subreg: eliminated!";
+ } else {
+ // Insert sub-register copy
+ const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg);
+ const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg);
+ TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+ // Transfer the kill/dead flags, if needed.
+ if (MI->getOperand(0).isDead())
+ TransferDeadFlag(MI, DstSubReg, TRI);
+ if (MI->getOperand(1).isKill())
+ TransferKillFlag(MI, InsReg, TRI);
+
+#ifndef NDEBUG
+ MachineBasicBlock::iterator dMI = MI;
+ DOUT << "subreg: " << *(--dMI);
+#endif
+ }
+
+ DOUT << "\n";
+ MBB->erase(MI);
+ return true;
+}
+
+/// runOnMachineFunction - Reduce subregister inserts and extracts to register
+/// copies.
+///
+bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+ DOUT << "Machine Function\n";
+
+ bool MadeChange = false;
+
+ DOUT << "********** LOWERING SUBREG INSTRS **********\n";
+ DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me;) {
+ MachineInstr *MI = mi++;
+
+ if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+ MadeChange |= LowerExtract(MI);
+ } else if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+ MadeChange |= LowerInsert(MI);
+ } else if (MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+ MadeChange |= LowerSubregToReg(MI);
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp
new file mode 100644
index 0000000..4332627
--- /dev/null
+++ b/lib/CodeGen/MachOWriter.cpp
@@ -0,0 +1,976 @@
+//===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent Mach-O writer. This file writes
+// out the Mach-O file in the following order:
+//
+// #1 FatHeader (universal-only)
+// #2 FatArch (universal-only, 1 per universal arch)
+// Per arch:
+// #3 Header
+// #4 Load Commands
+// #5 Sections
+// #6 Relocations
+// #7 Symbols
+// #8 Strings
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOWriter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/FileWriters.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstring>
+using namespace llvm;
+
+/// AddMachOWriter - Concrete function to add the Mach-O writer to the function
+/// pass manager.
+MachineCodeEmitter *llvm::AddMachOWriter(PassManagerBase &PM,
+ raw_ostream &O,
+ TargetMachine &TM) {
+ MachOWriter *MOW = new MachOWriter(O, TM);
+ PM.add(MOW);
+ return &MOW->getMachineCodeEmitter();
+}
+
+//===----------------------------------------------------------------------===//
+// MachOCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+ /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code
+ /// for functions to the Mach-O file.
+ class MachOCodeEmitter : public MachineCodeEmitter {
+ MachOWriter &MOW;
+
+ /// Target machine description.
+ TargetMachine &TM;
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating what header values and flags to set.
+ bool is64Bit, isLittleEndian;
+
+ /// Relocations - These are the relocations that the function needs, as
+ /// emitted.
+ std::vector<MachineRelocation> Relocations;
+
+ /// CPLocations - This is a map of constant pool indices to offsets from the
+ /// start of the section for that constant pool index.
+ std::vector<uintptr_t> CPLocations;
+
+ /// CPSections - This is a map of constant pool indices to the MachOSection
+ /// containing the constant pool entry for that index.
+ std::vector<unsigned> CPSections;
+
+ /// JTLocations - This is a map of jump table indices to offsets from the
+ /// start of the section for that jump table index.
+ std::vector<uintptr_t> JTLocations;
+
+ /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+ /// It is filled in by the StartMachineBasicBlock callback and queried by
+ /// the getMachineBasicBlockAddress callback.
+ std::vector<uintptr_t> MBBLocations;
+
+ public:
+ MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) {
+ is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ isLittleEndian = TM.getTargetData()->isLittleEndian();
+ }
+
+ virtual void startFunction(MachineFunction &MF);
+ virtual bool finishFunction(MachineFunction &MF);
+
+ virtual void addRelocation(const MachineRelocation &MR) {
+ Relocations.push_back(MR);
+ }
+
+ void emitConstantPool(MachineConstantPool *MCP);
+ void emitJumpTables(MachineJumpTableInfo *MJTI);
+
+ virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
+ assert(CPLocations.size() > Index && "CP not emitted!");
+ return CPLocations[Index];
+ }
+ virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
+ assert(JTLocations.size() > Index && "JT not emitted!");
+ return JTLocations[Index];
+ }
+
+ virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+ MBBLocations.resize((MBB->getNumber()+1)*2);
+ MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
+ }
+
+ virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+ MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+ return MBBLocations[MBB->getNumber()];
+ }
+
+ virtual uintptr_t getLabelAddress(uint64_t Label) const {
+ assert(0 && "get Label not implemented");
+ abort();
+ return 0;
+ }
+
+ virtual void emitLabel(uint64_t LabelID) {
+ assert(0 && "emit Label not implemented");
+ abort();
+ }
+
+
+ virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { }
+
+ /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
+ virtual void startGVStub(const GlobalValue* F, unsigned StubSize,
+ unsigned Alignment = 1) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ }
+ virtual void startGVStub(const GlobalValue* F, void *Buffer,
+ unsigned StubSize) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ }
+ virtual void *finishGVStub(const GlobalValue* F) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ return 0;
+ }
+ };
+}
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void MachOCodeEmitter::startFunction(MachineFunction &MF) {
+ const TargetData *TD = TM.getTargetData();
+ const Function *F = MF.getFunction();
+
+ // Align the output buffer to the appropriate alignment, power of 2.
+ unsigned FnAlign = F->getAlignment();
+ unsigned TDAlign = TD->getPrefTypeAlignment(F->getType());
+ unsigned Align = Log2_32(std::max(FnAlign, TDAlign));
+ assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+
+ // Get the Mach-O Section that this function belongs in.
+ MachOWriter::MachOSection *MOS = MOW.getTextSection();
+
+ // FIXME: better memory management
+ MOS->SectionData.reserve(4096);
+ BufferBegin = &MOS->SectionData[0];
+ BufferEnd = BufferBegin + MOS->SectionData.capacity();
+
+ // Upgrade the section alignment if required.
+ if (MOS->align < Align) MOS->align = Align;
+
+ // Round the size up to the correct alignment for starting the new function.
+ if ((MOS->size & ((1 << Align) - 1)) != 0) {
+ MOS->size += (1 << Align);
+ MOS->size &= ~((1 << Align) - 1);
+ }
+
+ // FIXME: Using MOS->size directly here instead of calculating it from the
+ // output buffer size (impossible because the code emitter deals only in raw
+ // bytes) forces us to manually synchronize size and write padding zero bytes
+ // to the output buffer for all non-text sections. For text sections, we do
+ // not synchonize the output buffer, and we just blow up if anyone tries to
+ // write non-code to it. An assert should probably be added to
+ // AddSymbolToSection to prevent calling it on the text section.
+ CurBufferPtr = BufferBegin + MOS->size;
+
+ // Clear per-function data structures.
+ CPLocations.clear();
+ CPSections.clear();
+ JTLocations.clear();
+ MBBLocations.clear();
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool MachOCodeEmitter::finishFunction(MachineFunction &MF) {
+ // Get the Mach-O Section that this function belongs in.
+ MachOWriter::MachOSection *MOS = MOW.getTextSection();
+
+ // Get a symbol for the function to add to the symbol table
+ // FIXME: it seems like we should call something like AddSymbolToSection
+ // in startFunction rather than changing the section size and symbol n_value
+ // here.
+ const GlobalValue *FuncV = MF.getFunction();
+ MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TM);
+ FnSym.n_value = MOS->size;
+ MOS->size = CurBufferPtr - BufferBegin;
+
+ // Emit constant pool to appropriate section(s)
+ emitConstantPool(MF.getConstantPool());
+
+ // Emit jump tables to appropriate section
+ emitJumpTables(MF.getJumpTableInfo());
+
+ // If we have emitted any relocations to function-specific objects such as
+ // basic blocks, constant pools entries, or jump tables, record their
+ // addresses now so that we can rewrite them with the correct addresses
+ // later.
+ for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = Relocations[i];
+ intptr_t Addr;
+
+ if (MR.isBasicBlock()) {
+ Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
+ MR.setConstantVal(MOS->Index);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isJumpTableIndex()) {
+ Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+ MR.setConstantVal(MOW.getJumpTableSection()->Index);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isConstantPoolIndex()) {
+ Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+ MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isGlobalValue()) {
+ // FIXME: This should be a set or something that uniques
+ MOW.PendingGlobals.push_back(MR.getGlobalValue());
+ } else {
+ assert(0 && "Unhandled relocation type");
+ }
+ MOS->Relocations.push_back(MR);
+ }
+ Relocations.clear();
+
+ // Finally, add it to the symtab.
+ MOW.SymbolTable.push_back(FnSym);
+ return false;
+}
+
+/// emitConstantPool - For each constant pool entry, figure out which section
+/// the constant should live in, allocate space for it, and emit it to the
+/// Section data buffer.
+void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // FIXME: handle PIC codegen
+ assert(TM.getRelocationModel() != Reloc::PIC_ &&
+ "PIC codegen not yet handled for mach-o jump tables!");
+
+ // Although there is no strict necessity that I am aware of, we will do what
+ // gcc for OS X does and put each constant pool entry in a section of constant
+ // objects of a certain size. That means that float constants go in the
+ // literal4 section, and double objects go in literal8, etc.
+ //
+ // FIXME: revisit this decision if we ever do the "stick everything into one
+ // "giant object for PIC" optimization.
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ const Type *Ty = CP[i].getType();
+ unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
+
+ MachOWriter::MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal);
+ OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+ CPLocations.push_back(Sec->SectionData.size());
+ CPSections.push_back(Sec->Index);
+
+ // FIXME: remove when we have unified size + output buffer
+ Sec->size += Size;
+
+ // Allocate space in the section for the global.
+ // FIXME: need alignment?
+ // FIXME: share between here and AddSymbolToSection?
+ for (unsigned j = 0; j < Size; ++j)
+ SecDataOut.outbyte(0);
+
+ MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i],
+ TM.getTargetData(), Sec->Relocations);
+ }
+}
+
+/// emitJumpTables - Emit all the jump tables for a given jump table info
+/// record to the appropriate section.
+void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // FIXME: handle PIC codegen
+ assert(TM.getRelocationModel() != Reloc::PIC_ &&
+ "PIC codegen not yet handled for mach-o jump tables!");
+
+ MachOWriter::MachOSection *Sec = MOW.getJumpTableSection();
+ unsigned TextSecIndex = MOW.getTextSection()->Index;
+ OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ // For each jump table, record its offset from the start of the section,
+ // reserve space for the relocations to the MBBs, and add the relocations.
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+ JTLocations.push_back(Sec->SectionData.size());
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+ MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(),
+ MBBs[mi]));
+ MR.setResultPointer((void *)JTLocations[i]);
+ MR.setConstantVal(TextSecIndex);
+ Sec->Relocations.push_back(MR);
+ SecDataOut.outaddr(0);
+ }
+ }
+ // FIXME: remove when we have unified size + output buffer
+ Sec->size = Sec->SectionData.size();
+}
+
+//===----------------------------------------------------------------------===//
+// MachOWriter Implementation
+//===----------------------------------------------------------------------===//
+
+char MachOWriter::ID = 0;
+MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm)
+ : MachineFunctionPass(&ID), O(o), TM(tm) {
+ is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ isLittleEndian = TM.getTargetData()->isLittleEndian();
+
+ // Create the machine code emitter object for this target.
+ MCE = new MachOCodeEmitter(*this);
+}
+
+MachOWriter::~MachOWriter() {
+ delete MCE;
+}
+
+void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
+ const Type *Ty = GV->getType()->getElementType();
+ unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align = TM.getTargetData()->getPreferredAlignment(GV);
+
+ // Reserve space in the .bss section for this symbol while maintaining the
+ // desired section alignment, which must be at least as much as required by
+ // this symbol.
+ OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+ if (Align) {
+ uint64_t OrigSize = Sec->size;
+ Align = Log2_32(Align);
+ Sec->align = std::max(unsigned(Sec->align), Align);
+ Sec->size = (Sec->size + Align - 1) & ~(Align-1);
+
+ // Add alignment padding to buffer as well.
+ // FIXME: remove when we have unified size + output buffer
+ unsigned AlignedSize = Sec->size - OrigSize;
+ for (unsigned i = 0; i < AlignedSize; ++i)
+ SecDataOut.outbyte(0);
+ }
+ // Globals without external linkage apparently do not go in the symbol table.
+ if (!GV->hasLocalLinkage()) {
+ MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TM);
+ Sym.n_value = Sec->size;
+ SymbolTable.push_back(Sym);
+ }
+
+ // Record the offset of the symbol, and then allocate space for it.
+ // FIXME: remove when we have unified size + output buffer
+ Sec->size += Size;
+
+ // Now that we know what section the GlovalVariable is going to be emitted
+ // into, update our mappings.
+ // FIXME: We may also need to update this when outputting non-GlobalVariable
+ // GlobalValues such as functions.
+ GVSection[GV] = Sec;
+ GVOffset[GV] = Sec->SectionData.size();
+
+ // Allocate space in the section for the global.
+ for (unsigned i = 0; i < Size; ++i)
+ SecDataOut.outbyte(0);
+}
+
+void MachOWriter::EmitGlobal(GlobalVariable *GV) {
+ const Type *Ty = GV->getType()->getElementType();
+ unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
+ bool NoInit = !GV->hasInitializer();
+
+ // If this global has a zero initializer, it is part of the .bss or common
+ // section.
+ if (NoInit || GV->getInitializer()->isNullValue()) {
+ // If this global is part of the common block, add it now. Variables are
+ // part of the common block if they are zero initialized and allowed to be
+ // merged with other symbols.
+ if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
+ GV->hasCommonLinkage()) {
+ MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), MachOSym::NO_SECT,TM);
+ // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in
+ // bytes of the symbol.
+ ExtOrCommonSym.n_value = Size;
+ SymbolTable.push_back(ExtOrCommonSym);
+ // Remember that we've seen this symbol
+ GVOffset[GV] = Size;
+ return;
+ }
+ // Otherwise, this symbol is part of the .bss section.
+ MachOSection *BSS = getBSSSection();
+ AddSymbolToSection(BSS, GV);
+ return;
+ }
+
+ // Scalar read-only data goes in a literal section if the scalar is 4, 8, or
+ // 16 bytes, or a cstring. Other read only data goes into a regular const
+ // section. Read-write data goes in the data section.
+ MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) :
+ getDataSection();
+ AddSymbolToSection(Sec, GV);
+ InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV],
+ TM.getTargetData(), Sec->Relocations);
+}
+
+
+bool MachOWriter::runOnMachineFunction(MachineFunction &MF) {
+ // Nothing to do here, this is all done through the MCE object.
+ return false;
+}
+
+bool MachOWriter::doInitialization(Module &M) {
+ // Set the magic value, now that we know the pointer size and endianness
+ Header.setMagic(isLittleEndian, is64Bit);
+
+ // Set the file type
+ // FIXME: this only works for object files, we do not support the creation
+ // of dynamic libraries or executables at this time.
+ Header.filetype = MachOHeader::MH_OBJECT;
+
+ Mang = new Mangler(M);
+ return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the Mach-O file to 'O'.
+bool MachOWriter::doFinalization(Module &M) {
+ // FIXME: we don't handle debug info yet, we should probably do that.
+
+ // Okay, the.text section has been completed, build the .data, .bss, and
+ // "common" sections next.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobal(I);
+
+ // Emit the header and load commands.
+ EmitHeaderAndLoadCommands();
+
+ // Emit the various sections and their relocation info.
+ EmitSections();
+
+ // Write the symbol table and the string table to the end of the file.
+ O.write((char*)&SymT[0], SymT.size());
+ O.write((char*)&StrT[0], StrT.size());
+
+ // We are done with the abstract symbols.
+ SectionList.clear();
+ SymbolTable.clear();
+ DynamicSymbolTable.clear();
+
+ // Release the name mangler object.
+ delete Mang; Mang = 0;
+ return false;
+}
+
+void MachOWriter::EmitHeaderAndLoadCommands() {
+ // Step #0: Fill in the segment load command size, since we need it to figure
+ // out the rest of the header fields
+ MachOSegment SEG("", is64Bit);
+ SEG.nsects = SectionList.size();
+ SEG.cmdsize = SEG.cmdSize(is64Bit) +
+ SEG.nsects * SectionList[0]->cmdSize(is64Bit);
+
+ // Step #1: calculate the number of load commands. We always have at least
+ // one, for the LC_SEGMENT load command, plus two for the normal
+ // and dynamic symbol tables, if there are any symbols.
+ Header.ncmds = SymbolTable.empty() ? 1 : 3;
+
+ // Step #2: calculate the size of the load commands
+ Header.sizeofcmds = SEG.cmdsize;
+ if (!SymbolTable.empty())
+ Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize;
+
+ // Step #3: write the header to the file
+ // Local alias to shortenify coming code.
+ DataBuffer &FH = Header.HeaderData;
+ OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
+
+ FHOut.outword(Header.magic);
+ FHOut.outword(TM.getMachOWriterInfo()->getCPUType());
+ FHOut.outword(TM.getMachOWriterInfo()->getCPUSubType());
+ FHOut.outword(Header.filetype);
+ FHOut.outword(Header.ncmds);
+ FHOut.outword(Header.sizeofcmds);
+ FHOut.outword(Header.flags);
+ if (is64Bit)
+ FHOut.outword(Header.reserved);
+
+ // Step #4: Finish filling in the segment load command and write it out
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I)
+ SEG.filesize += (*I)->size;
+
+ SEG.vmsize = SEG.filesize;
+ SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds;
+
+ FHOut.outword(SEG.cmd);
+ FHOut.outword(SEG.cmdsize);
+ FHOut.outstring(SEG.segname, 16);
+ FHOut.outaddr(SEG.vmaddr);
+ FHOut.outaddr(SEG.vmsize);
+ FHOut.outaddr(SEG.fileoff);
+ FHOut.outaddr(SEG.filesize);
+ FHOut.outword(SEG.maxprot);
+ FHOut.outword(SEG.initprot);
+ FHOut.outword(SEG.nsects);
+ FHOut.outword(SEG.flags);
+
+ // Step #5: Finish filling in the fields of the MachOSections
+ uint64_t currentAddr = 0;
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ MachOSection *MOS = *I;
+ MOS->addr = currentAddr;
+ MOS->offset = currentAddr + SEG.fileoff;
+
+ // FIXME: do we need to do something with alignment here?
+ currentAddr += MOS->size;
+ }
+
+ // Step #6: Emit the symbol table to temporary buffers, so that we know the
+ // size of the string table when we write the next load command. This also
+ // sorts and assigns indices to each of the symbols, which is necessary for
+ // emitting relocations to externally-defined objects.
+ BufferSymbolAndStringTable();
+
+ // Step #7: Calculate the number of relocations for each section and write out
+ // the section commands for each section
+ currentAddr += SEG.fileoff;
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ MachOSection *MOS = *I;
+ // Convert the relocations to target-specific relocations, and fill in the
+ // relocation offset for this section.
+ CalculateRelocations(*MOS);
+ MOS->reloff = MOS->nreloc ? currentAddr : 0;
+ currentAddr += MOS->nreloc * 8;
+
+ // write the finalized section command to the output buffer
+ FHOut.outstring(MOS->sectname, 16);
+ FHOut.outstring(MOS->segname, 16);
+ FHOut.outaddr(MOS->addr);
+ FHOut.outaddr(MOS->size);
+ FHOut.outword(MOS->offset);
+ FHOut.outword(MOS->align);
+ FHOut.outword(MOS->reloff);
+ FHOut.outword(MOS->nreloc);
+ FHOut.outword(MOS->flags);
+ FHOut.outword(MOS->reserved1);
+ FHOut.outword(MOS->reserved2);
+ if (is64Bit)
+ FHOut.outword(MOS->reserved3);
+ }
+
+ // Step #8: Emit LC_SYMTAB/LC_DYSYMTAB load commands
+ SymTab.symoff = currentAddr;
+ SymTab.nsyms = SymbolTable.size();
+ SymTab.stroff = SymTab.symoff + SymT.size();
+ SymTab.strsize = StrT.size();
+ FHOut.outword(SymTab.cmd);
+ FHOut.outword(SymTab.cmdsize);
+ FHOut.outword(SymTab.symoff);
+ FHOut.outword(SymTab.nsyms);
+ FHOut.outword(SymTab.stroff);
+ FHOut.outword(SymTab.strsize);
+
+ // FIXME: set DySymTab fields appropriately
+ // We should probably just update these in BufferSymbolAndStringTable since
+ // thats where we're partitioning up the different kinds of symbols.
+ FHOut.outword(DySymTab.cmd);
+ FHOut.outword(DySymTab.cmdsize);
+ FHOut.outword(DySymTab.ilocalsym);
+ FHOut.outword(DySymTab.nlocalsym);
+ FHOut.outword(DySymTab.iextdefsym);
+ FHOut.outword(DySymTab.nextdefsym);
+ FHOut.outword(DySymTab.iundefsym);
+ FHOut.outword(DySymTab.nundefsym);
+ FHOut.outword(DySymTab.tocoff);
+ FHOut.outword(DySymTab.ntoc);
+ FHOut.outword(DySymTab.modtaboff);
+ FHOut.outword(DySymTab.nmodtab);
+ FHOut.outword(DySymTab.extrefsymoff);
+ FHOut.outword(DySymTab.nextrefsyms);
+ FHOut.outword(DySymTab.indirectsymoff);
+ FHOut.outword(DySymTab.nindirectsyms);
+ FHOut.outword(DySymTab.extreloff);
+ FHOut.outword(DySymTab.nextrel);
+ FHOut.outword(DySymTab.locreloff);
+ FHOut.outword(DySymTab.nlocrel);
+
+ O.write((char*)&FH[0], FH.size());
+}
+
+/// EmitSections - Now that we have constructed the file header and load
+/// commands, emit the data for each section to the file.
+void MachOWriter::EmitSections() {
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I)
+ // Emit the contents of each section
+ O.write((char*)&(*I)->SectionData[0], (*I)->size);
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I)
+ // Emit the relocation entry data for each section.
+ O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size());
+}
+
+/// PartitionByLocal - Simple boolean predicate that returns true if Sym is
+/// a local symbol rather than an external symbol.
+bool MachOWriter::PartitionByLocal(const MachOSym &Sym) {
+ return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0;
+}
+
+/// PartitionByDefined - Simple boolean predicate that returns true if Sym is
+/// defined in this module.
+bool MachOWriter::PartitionByDefined(const MachOSym &Sym) {
+ // FIXME: Do N_ABS or N_INDR count as defined?
+ return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT;
+}
+
+/// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them
+/// each a string table index so that they appear in the correct order in the
+/// output file.
+void MachOWriter::BufferSymbolAndStringTable() {
+ // The order of the symbol table is:
+ // 1. local symbols
+ // 2. defined external symbols (sorted by name)
+ // 3. undefined external symbols (sorted by name)
+
+ // Before sorting the symbols, check the PendingGlobals for any undefined
+ // globals that need to be put in the symbol table.
+ for (std::vector<GlobalValue*>::iterator I = PendingGlobals.begin(),
+ E = PendingGlobals.end(); I != E; ++I) {
+ if (GVOffset[*I] == 0 && GVSection[*I] == 0) {
+ MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TM);
+ SymbolTable.push_back(UndfSym);
+ GVOffset[*I] = -1;
+ }
+ }
+
+ // Sort the symbols by name, so that when we partition the symbols by scope
+ // of definition, we won't have to sort by name within each partition.
+ std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSymCmp());
+
+ // Parition the symbol table entries so that all local symbols come before
+ // all symbols with external linkage. { 1 | 2 3 }
+ std::partition(SymbolTable.begin(), SymbolTable.end(), PartitionByLocal);
+
+ // Advance iterator to beginning of external symbols and partition so that
+ // all external symbols defined in this module come before all external
+ // symbols defined elsewhere. { 1 | 2 | 3 }
+ for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+ E = SymbolTable.end(); I != E; ++I) {
+ if (!PartitionByLocal(*I)) {
+ std::partition(I, E, PartitionByDefined);
+ break;
+ }
+ }
+
+ // Calculate the starting index for each of the local, extern defined, and
+ // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB
+ // load command.
+ for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+ E = SymbolTable.end(); I != E; ++I) {
+ if (PartitionByLocal(*I)) {
+ ++DySymTab.nlocalsym;
+ ++DySymTab.iextdefsym;
+ ++DySymTab.iundefsym;
+ } else if (PartitionByDefined(*I)) {
+ ++DySymTab.nextdefsym;
+ ++DySymTab.iundefsym;
+ } else {
+ ++DySymTab.nundefsym;
+ }
+ }
+
+ // Write out a leading zero byte when emitting string table, for n_strx == 0
+ // which means an empty string.
+ OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian);
+ StrTOut.outbyte(0);
+
+ // The order of the string table is:
+ // 1. strings for external symbols
+ // 2. strings for local symbols
+ // Since this is the opposite order from the symbol table, which we have just
+ // sorted, we can walk the symbol table backwards to output the string table.
+ for (std::vector<MachOSym>::reverse_iterator I = SymbolTable.rbegin(),
+ E = SymbolTable.rend(); I != E; ++I) {
+ if (I->GVName == "") {
+ I->n_strx = 0;
+ } else {
+ I->n_strx = StrT.size();
+ StrTOut.outstring(I->GVName, I->GVName.length()+1);
+ }
+ }
+
+ OutputBuffer SymTOut(SymT, is64Bit, isLittleEndian);
+
+ unsigned index = 0;
+ for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+ E = SymbolTable.end(); I != E; ++I, ++index) {
+ // Add the section base address to the section offset in the n_value field
+ // to calculate the full address.
+ // FIXME: handle symbols where the n_value field is not the address
+ GlobalValue *GV = const_cast<GlobalValue*>(I->GV);
+ if (GV && GVSection[GV])
+ I->n_value += GVSection[GV]->addr;
+ if (GV && (GVOffset[GV] == -1))
+ GVOffset[GV] = index;
+
+ // Emit nlist to buffer
+ SymTOut.outword(I->n_strx);
+ SymTOut.outbyte(I->n_type);
+ SymTOut.outbyte(I->n_sect);
+ SymTOut.outhalf(I->n_desc);
+ SymTOut.outaddr(I->n_value);
+ }
+}
+
+/// CalculateRelocations - For each MachineRelocation in the current section,
+/// calculate the index of the section containing the object to be relocated,
+/// and the offset into that section. From this information, create the
+/// appropriate target-specific MachORelocation type and add buffer it to be
+/// written out after we are finished writing out sections.
+void MachOWriter::CalculateRelocations(MachOSection &MOS) {
+ for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = MOS.Relocations[i];
+ unsigned TargetSection = MR.getConstantVal();
+ unsigned TargetAddr = 0;
+ unsigned TargetIndex = 0;
+
+ // This is a scattered relocation entry if it points to a global value with
+ // a non-zero offset.
+ bool Scattered = false;
+ bool Extern = false;
+
+ // Since we may not have seen the GlobalValue we were interested in yet at
+ // the time we emitted the relocation for it, fix it up now so that it
+ // points to the offset into the correct section.
+ if (MR.isGlobalValue()) {
+ GlobalValue *GV = MR.getGlobalValue();
+ MachOSection *MOSPtr = GVSection[GV];
+ intptr_t Offset = GVOffset[GV];
+
+ // If we have never seen the global before, it must be to a symbol
+ // defined in another module (N_UNDF).
+ if (!MOSPtr) {
+ // FIXME: need to append stub suffix
+ Extern = true;
+ TargetAddr = 0;
+ TargetIndex = GVOffset[GV];
+ } else {
+ Scattered = TargetSection != 0;
+ TargetSection = MOSPtr->Index;
+ }
+ MR.setResultPointer((void*)Offset);
+ }
+
+ // If the symbol is locally defined, pass in the address of the section and
+ // the section index to the code which will generate the target relocation.
+ if (!Extern) {
+ MachOSection &To = *SectionList[TargetSection - 1];
+ TargetAddr = To.addr;
+ TargetIndex = To.Index;
+ }
+
+ OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian);
+ OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian);
+
+ MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex,
+ RelocOut, SecOut, Scattered, Extern);
+ }
+}
+
+// InitMem - Write the value of a Constant to the specified memory location,
+// converting it into bytes and relocations.
+void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
+ const TargetData *TD,
+ std::vector<MachineRelocation> &MRs) {
+ typedef std::pair<const Constant*, intptr_t> CPair;
+ std::vector<CPair> WorkList;
+
+ WorkList.push_back(CPair(C,(intptr_t)Addr + Offset));
+
+ intptr_t ScatteredOffset = 0;
+
+ while (!WorkList.empty()) {
+ const Constant *PC = WorkList.back().first;
+ intptr_t PA = WorkList.back().second;
+ WorkList.pop_back();
+
+ if (isa<UndefValue>(PC)) {
+ continue;
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(PC)) {
+ unsigned ElementSize =
+ TD->getTypeAllocSize(CP->getType()->getElementType());
+ for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+ WorkList.push_back(CPair(CP->getOperand(i), PA+i*ElementSize));
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(PC)) {
+ //
+ // FIXME: Handle ConstantExpression. See EE::getConstantValue()
+ //
+ switch (CE->getOpcode()) {
+ case Instruction::GetElementPtr: {
+ SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
+ ScatteredOffset = TD->getIndexedOffset(CE->getOperand(0)->getType(),
+ &Indices[0], Indices.size());
+ WorkList.push_back(CPair(CE->getOperand(0), PA));
+ break;
+ }
+ case Instruction::Add:
+ default:
+ cerr << "ConstantExpr not handled as global var init: " << *CE << "\n";
+ abort();
+ break;
+ }
+ } else if (PC->getType()->isSingleValueType()) {
+ uint8_t *ptr = (uint8_t *)PA;
+ switch (PC->getType()->getTypeID()) {
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(PC->getType())->getBitWidth();
+ uint64_t val = cast<ConstantInt>(PC)->getZExtValue();
+ if (NumBits <= 8)
+ ptr[0] = val;
+ else if (NumBits <= 16) {
+ if (TD->isBigEndian())
+ val = ByteSwap_16(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ } else if (NumBits <= 32) {
+ if (TD->isBigEndian())
+ val = ByteSwap_32(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ } else if (NumBits <= 64) {
+ if (TD->isBigEndian())
+ val = ByteSwap_64(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ ptr[4] = val >> 32;
+ ptr[5] = val >> 40;
+ ptr[6] = val >> 48;
+ ptr[7] = val >> 56;
+ } else {
+ assert(0 && "Not implemented: bit widths > 64");
+ }
+ break;
+ }
+ case Type::FloatTyID: {
+ uint32_t val = cast<ConstantFP>(PC)->getValueAPF().bitcastToAPInt().
+ getZExtValue();
+ if (TD->isBigEndian())
+ val = ByteSwap_32(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ break;
+ }
+ case Type::DoubleTyID: {
+ uint64_t val = cast<ConstantFP>(PC)->getValueAPF().bitcastToAPInt().
+ getZExtValue();
+ if (TD->isBigEndian())
+ val = ByteSwap_64(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ ptr[4] = val >> 32;
+ ptr[5] = val >> 40;
+ ptr[6] = val >> 48;
+ ptr[7] = val >> 56;
+ break;
+ }
+ case Type::PointerTyID:
+ if (isa<ConstantPointerNull>(PC))
+ memset(ptr, 0, TD->getPointerSize());
+ else if (const GlobalValue* GV = dyn_cast<GlobalValue>(PC)) {
+ // FIXME: what about function stubs?
+ MRs.push_back(MachineRelocation::getGV(PA-(intptr_t)Addr,
+ MachineRelocation::VANILLA,
+ const_cast<GlobalValue*>(GV),
+ ScatteredOffset));
+ ScatteredOffset = 0;
+ } else
+ assert(0 && "Unknown constant pointer type!");
+ break;
+ default:
+ cerr << "ERROR: Constant unimp for type: " << *PC->getType() << "\n";
+ abort();
+ }
+ } else if (isa<ConstantAggregateZero>(PC)) {
+ memset((void*)PA, 0, (size_t)TD->getTypeAllocSize(PC->getType()));
+ } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(PC)) {
+ unsigned ElementSize =
+ TD->getTypeAllocSize(CPA->getType()->getElementType());
+ for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
+ WorkList.push_back(CPair(CPA->getOperand(i), PA+i*ElementSize));
+ } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(PC)) {
+ const StructLayout *SL =
+ TD->getStructLayout(cast<StructType>(CPS->getType()));
+ for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
+ WorkList.push_back(CPair(CPS->getOperand(i),
+ PA+SL->getElementOffset(i)));
+ } else {
+ cerr << "Bad Type: " << *PC->getType() << "\n";
+ assert(0 && "Unknown constant type to initialize memory with!");
+ }
+ }
+}
+
+MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
+ TargetMachine &TM) :
+ GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect),
+ n_desc(0), n_value(0) {
+
+ const TargetAsmInfo *TAI = TM.getTargetAsmInfo();
+
+ switch (GV->getLinkage()) {
+ default:
+ assert(0 && "Unexpected linkage type!");
+ break;
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::CommonLinkage:
+ assert(!isa<Function>(gv) && "Unexpected linkage type for Function!");
+ case GlobalValue::ExternalLinkage:
+ GVName = TAI->getGlobalPrefix() + name;
+ n_type |= GV->hasHiddenVisibility() ? N_PEXT : N_EXT;
+ break;
+ case GlobalValue::PrivateLinkage:
+ GVName = TAI->getPrivateGlobalPrefix() + name;
+ break;
+ case GlobalValue::InternalLinkage:
+ GVName = TAI->getGlobalPrefix() + name;
+ break;
+ }
+}
diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h
new file mode 100644
index 0000000..6ab66ee
--- /dev/null
+++ b/lib/CodeGen/MachOWriter.h
@@ -0,0 +1,629 @@
+//=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachOWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MACHOWRITER_H
+#define MACHOWRITER_H
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachOWriterInfo.h"
+#include <map>
+
+namespace llvm {
+ class GlobalVariable;
+ class Mangler;
+ class MachineCodeEmitter;
+ class MachOCodeEmitter;
+ class OutputBuffer;
+ class raw_ostream;
+
+ /// MachOSym - This struct contains information about each symbol that is
+ /// added to logical symbol table for the module. This is eventually
+ /// turned into a real symbol table in the file.
+ struct MachOSym {
+ const GlobalValue *GV; // The global value this corresponds to.
+ std::string GVName; // The mangled name of the global value.
+ uint32_t n_strx; // index into the string table
+ uint8_t n_type; // type flag
+ uint8_t n_sect; // section number or NO_SECT
+ int16_t n_desc; // see <mach-o/stab.h>
+ uint64_t n_value; // value for this symbol (or stab offset)
+
+ // Constants for the n_sect field
+ // see <mach-o/nlist.h>
+ enum { NO_SECT = 0 }; // symbol is not in any section
+
+ // Constants for the n_type field
+ // see <mach-o/nlist.h>
+ enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT
+ N_ABS = 0x2, // absolute, n_sect == NO_SECT
+ N_SECT = 0xe, // defined in section number n_sect
+ N_PBUD = 0xc, // prebound undefined (defined in a dylib)
+ N_INDR = 0xa // indirect
+ };
+ // The following bits are OR'd into the types above. For example, a type
+ // of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
+ enum { N_EXT = 0x01, // external symbol bit
+ N_PEXT = 0x10 // private external symbol bit
+ };
+
+ // Constants for the n_desc field
+ // see <mach-o/loader.h>
+ enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0,
+ REFERENCE_FLAG_UNDEFINED_LAZY = 1,
+ REFERENCE_FLAG_DEFINED = 2,
+ REFERENCE_FLAG_PRIVATE_DEFINED = 3,
+ REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
+ REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5
+ };
+ enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped
+ N_WEAK_REF = 0x0040, // symbol is weak referenced
+ N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition
+ };
+
+ MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
+ TargetMachine &TM);
+ };
+
+ /// MachOWriter - This class implements the common target-independent code for
+ /// writing Mach-O files. Targets should derive a class from this to
+ /// parameterize the output format.
+ ///
+ class MachOWriter : public MachineFunctionPass {
+ friend class MachOCodeEmitter;
+ public:
+ static char ID;
+ MachineCodeEmitter &getMachineCodeEmitter() const {
+ return *(MachineCodeEmitter*)MCE;
+ }
+
+ MachOWriter(raw_ostream &O, TargetMachine &TM);
+ virtual ~MachOWriter();
+
+ virtual const char *getPassName() const {
+ return "Mach-O Writer";
+ }
+
+ typedef std::vector<uint8_t> DataBuffer;
+ protected:
+ /// Output stream to send the resultant object file to.
+ ///
+ raw_ostream &O;
+
+ /// Target machine description.
+ ///
+ TargetMachine &TM;
+
+ /// Mang - The object used to perform name mangling for this module.
+ ///
+ Mangler *Mang;
+
+ /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+ /// code for functions to the .o file.
+ MachOCodeEmitter *MCE;
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating what header values and flags to set.
+ bool is64Bit, isLittleEndian;
+
+ /// doInitialization - Emit the file header and all of the global variables
+ /// for the module to the Mach-O file.
+ bool doInitialization(Module &M);
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// doFinalization - Now that the module has been completely processed, emit
+ /// the Mach-O file to 'O'.
+ bool doFinalization(Module &M);
+
+ /// MachOHeader - This struct contains the header information about a
+ /// specific architecture type/subtype pair that is emitted to the file.
+ struct MachOHeader {
+ uint32_t magic; // mach magic number identifier
+ uint32_t filetype; // type of file
+ uint32_t ncmds; // number of load commands
+ uint32_t sizeofcmds; // the size of all the load commands
+ uint32_t flags; // flags
+ uint32_t reserved; // 64-bit only
+
+ /// HeaderData - The actual data for the header which we are building
+ /// up for emission to the file.
+ DataBuffer HeaderData;
+
+ // Constants for the filetype field
+ // see <mach-o/loader.h> for additional info on the various types
+ enum { MH_OBJECT = 1, // relocatable object file
+ MH_EXECUTE = 2, // demand paged executable file
+ MH_FVMLIB = 3, // fixed VM shared library file
+ MH_CORE = 4, // core file
+ MH_PRELOAD = 5, // preloaded executable file
+ MH_DYLIB = 6, // dynamically bound shared library
+ MH_DYLINKER = 7, // dynamic link editor
+ MH_BUNDLE = 8, // dynamically bound bundle file
+ MH_DYLIB_STUB = 9, // shared library stub for static linking only
+ MH_DSYM = 10 // companion file wiht only debug sections
+ };
+
+ // Constants for the flags field
+ enum { MH_NOUNDEFS = 1 << 0,
+ // the object file has no undefined references
+ MH_INCRLINK = 1 << 1,
+ // the object file is the output of an incremental link against
+ // a base file and cannot be link edited again
+ MH_DYLDLINK = 1 << 2,
+ // the object file is input for the dynamic linker and cannot be
+ // statically link edited again.
+ MH_BINDATLOAD = 1 << 3,
+ // the object file's undefined references are bound by the
+ // dynamic linker when loaded.
+ MH_PREBOUND = 1 << 4,
+ // the file has its dynamic undefined references prebound
+ MH_SPLIT_SEGS = 1 << 5,
+ // the file has its read-only and read-write segments split
+ // see <mach/shared_memory_server.h>
+ MH_LAZY_INIT = 1 << 6,
+ // the shared library init routine is to be run lazily via
+ // catching memory faults to its writable segments (obsolete)
+ MH_TWOLEVEL = 1 << 7,
+ // the image is using two-level namespace bindings
+ MH_FORCE_FLAT = 1 << 8,
+ // the executable is forcing all images to use flat namespace
+ // bindings.
+ MH_NOMULTIDEFS = 1 << 8,
+ // this umbrella guarantees no multiple definitions of symbols
+ // in its sub-images so the two-level namespace hints can
+ // always be used.
+ MH_NOFIXPREBINDING = 1 << 10,
+ // do not have dyld notify the prebidning agent about this
+ // executable.
+ MH_PREBINDABLE = 1 << 11,
+ // the binary is not prebound but can have its prebinding
+ // redone. only used when MH_PREBOUND is not set.
+ MH_ALLMODSBOUND = 1 << 12,
+ // indicates that this binary binds to all two-level namespace
+ // modules of its dependent libraries. Only used when
+ // MH_PREBINDABLE and MH_TWOLEVEL are both set.
+ MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13,
+ // safe to divide up the sections into sub-sections via symbols
+ // for dead code stripping.
+ MH_CANONICAL = 1 << 14,
+ // the binary has been canonicalized via the unprebind operation
+ MH_WEAK_DEFINES = 1 << 15,
+ // the final linked image contains external weak symbols
+ MH_BINDS_TO_WEAK = 1 << 16,
+ // the final linked image uses weak symbols
+ MH_ALLOW_STACK_EXECUTION = 1 << 17
+ // When this bit is set, all stacks in the task will be given
+ // stack execution privilege. Only used in MH_EXECUTE filetype
+ };
+
+ MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
+ reserved(0) { }
+
+ /// cmdSize - This routine returns the size of the MachOSection as written
+ /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+ unsigned cmdSize(bool is64Bit) const {
+ if (is64Bit)
+ return 8 * sizeof(uint32_t);
+ else
+ return 7 * sizeof(uint32_t);
+ }
+
+ /// setMagic - This routine sets the appropriate value for the 'magic'
+ /// field based on pointer size and endianness.
+ void setMagic(bool isLittleEndian, bool is64Bit) {
+ if (isLittleEndian)
+ if (is64Bit) magic = 0xcffaedfe;
+ else magic = 0xcefaedfe;
+ else
+ if (is64Bit) magic = 0xfeedfacf;
+ else magic = 0xfeedface;
+ }
+ };
+
+ /// Header - An instance of MachOHeader that we will update while we build
+ /// the file, and then emit during finalization.
+ MachOHeader Header;
+
+ /// MachOSegment - This struct contains the necessary information to
+ /// emit the load commands for each section in the file.
+ struct MachOSegment {
+ uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64
+ uint32_t cmdsize; // Total size of this struct and section commands
+ std::string segname; // segment name
+ uint64_t vmaddr; // address of this segment
+ uint64_t vmsize; // size of this segment, may be larger than filesize
+ uint64_t fileoff; // offset in file
+ uint64_t filesize; // amount to read from file
+ uint32_t maxprot; // maximum VM protection
+ uint32_t initprot; // initial VM protection
+ uint32_t nsects; // number of sections in this segment
+ uint32_t flags; // flags
+
+ // The following constants are getting pulled in by one of the
+ // system headers, which creates a neat clash with the enum.
+#if !defined(VM_PROT_NONE)
+#define VM_PROT_NONE 0x00
+#endif
+#if !defined(VM_PROT_READ)
+#define VM_PROT_READ 0x01
+#endif
+#if !defined(VM_PROT_WRITE)
+#define VM_PROT_WRITE 0x02
+#endif
+#if !defined(VM_PROT_EXECUTE)
+#define VM_PROT_EXECUTE 0x04
+#endif
+#if !defined(VM_PROT_ALL)
+#define VM_PROT_ALL 0x07
+#endif
+
+ // Constants for the vm protection fields
+ // see <mach-o/vm_prot.h>
+ enum { SEG_VM_PROT_NONE = VM_PROT_NONE,
+ SEG_VM_PROT_READ = VM_PROT_READ, // read permission
+ SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission
+ SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE,
+ SEG_VM_PROT_ALL = VM_PROT_ALL
+ };
+
+ // Constants for the cmd field
+ // see <mach-o/loader.h>
+ enum { LC_SEGMENT = 0x01, // segment of this file to be mapped
+ LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped
+ };
+
+ /// cmdSize - This routine returns the size of the MachOSection as written
+ /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+ unsigned cmdSize(bool is64Bit) const {
+ if (is64Bit)
+ return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16;
+ else
+ return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits
+ }
+
+ MachOSegment(const std::string &seg, bool is64Bit)
+ : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg),
+ vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL),
+ initprot(VM_PROT_ALL), nsects(0), flags(0) { }
+ };
+
+ /// MachOSection - This struct contains information about each section in a
+ /// particular segment that is emitted to the file. This is eventually
+ /// turned into the SectionCommand in the load command for a particlar
+ /// segment.
+ struct MachOSection {
+ std::string sectname; // name of this section,
+ std::string segname; // segment this section goes in
+ uint64_t addr; // memory address of this section
+ uint64_t size; // size in bytes of this section
+ uint32_t offset; // file offset of this section
+ uint32_t align; // section alignment (power of 2)
+ uint32_t reloff; // file offset of relocation entries
+ uint32_t nreloc; // number of relocation entries
+ uint32_t flags; // flags (section type and attributes)
+ uint32_t reserved1; // reserved (for offset or index)
+ uint32_t reserved2; // reserved (for count or sizeof)
+ uint32_t reserved3; // reserved (64 bit only)
+
+ /// A unique number for this section, which will be used to match symbols
+ /// to the correct section.
+ uint32_t Index;
+
+ /// SectionData - The actual data for this section which we are building
+ /// up for emission to the file.
+ DataBuffer SectionData;
+
+ /// RelocBuffer - A buffer to hold the mach-o relocations before we write
+ /// them out at the appropriate location in the file.
+ DataBuffer RelocBuffer;
+
+ /// Relocations - The relocations that we have encountered so far in this
+ /// section that we will need to convert to MachORelocation entries when
+ /// the file is written.
+ std::vector<MachineRelocation> Relocations;
+
+ // Constants for the section types (low 8 bits of flags field)
+ // see <mach-o/loader.h>
+ enum { S_REGULAR = 0,
+ // regular section
+ S_ZEROFILL = 1,
+ // zero fill on demand section
+ S_CSTRING_LITERALS = 2,
+ // section with only literal C strings
+ S_4BYTE_LITERALS = 3,
+ // section with only 4 byte literals
+ S_8BYTE_LITERALS = 4,
+ // section with only 8 byte literals
+ S_LITERAL_POINTERS = 5,
+ // section with only pointers to literals
+ S_NON_LAZY_SYMBOL_POINTERS = 6,
+ // section with only non-lazy symbol pointers
+ S_LAZY_SYMBOL_POINTERS = 7,
+ // section with only lazy symbol pointers
+ S_SYMBOL_STUBS = 8,
+ // section with only symbol stubs
+ // byte size of stub in the reserved2 field
+ S_MOD_INIT_FUNC_POINTERS = 9,
+ // section with only function pointers for initialization
+ S_MOD_TERM_FUNC_POINTERS = 10,
+ // section with only function pointers for termination
+ S_COALESCED = 11,
+ // section contains symbols that are coalesced
+ S_GB_ZEROFILL = 12,
+ // zero fill on demand section (that can be larger than 4GB)
+ S_INTERPOSING = 13,
+ // section with only pairs of function pointers for interposing
+ S_16BYTE_LITERALS = 14
+ // section with only 16 byte literals
+ };
+
+ // Constants for the section flags (high 24 bits of flags field)
+ // see <mach-o/loader.h>
+ enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31,
+ // section contains only true machine instructions
+ S_ATTR_NO_TOC = 1 << 30,
+ // section contains coalesced symbols that are not to be in a
+ // ranlib table of contents
+ S_ATTR_STRIP_STATIC_SYMS = 1 << 29,
+ // ok to strip static symbols in this section in files with the
+ // MY_DYLDLINK flag
+ S_ATTR_NO_DEAD_STRIP = 1 << 28,
+ // no dead stripping
+ S_ATTR_LIVE_SUPPORT = 1 << 27,
+ // blocks are live if they reference live blocks
+ S_ATTR_SELF_MODIFYING_CODE = 1 << 26,
+ // used with i386 code stubs written on by dyld
+ S_ATTR_DEBUG = 1 << 25,
+ // a debug section
+ S_ATTR_SOME_INSTRUCTIONS = 1 << 10,
+ // section contains some machine instructions
+ S_ATTR_EXT_RELOC = 1 << 9,
+ // section has external relocation entries
+ S_ATTR_LOC_RELOC = 1 << 8
+ // section has local relocation entries
+ };
+
+ /// cmdSize - This routine returns the size of the MachOSection as written
+ /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+ unsigned cmdSize(bool is64Bit) const {
+ if (is64Bit)
+ return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32;
+ else
+ return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits
+ }
+
+ MachOSection(const std::string &seg, const std::string &sect)
+ : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2),
+ reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
+ reserved3(0) { }
+ };
+
+ private:
+
+ /// SectionList - This is the list of sections that we have emitted to the
+ /// file. Once the file has been completely built, the segment load command
+ /// SectionCommands are constructed from this info.
+ std::vector<MachOSection*> SectionList;
+
+ /// SectionLookup - This is a mapping from section name to SectionList entry
+ std::map<std::string, MachOSection*> SectionLookup;
+
+ /// GVSection - This is a mapping from a GlobalValue to a MachOSection,
+ /// to aid in emitting relocations.
+ std::map<GlobalValue*, MachOSection*> GVSection;
+
+ /// GVOffset - This is a mapping from a GlobalValue to an offset from the
+ /// start of the section in which the GV resides, to aid in emitting
+ /// relocations.
+ std::map<GlobalValue*, intptr_t> GVOffset;
+
+ /// getSection - Return the section with the specified name, creating a new
+ /// section if one does not already exist.
+ MachOSection *getSection(const std::string &seg, const std::string &sect,
+ unsigned Flags = 0) {
+ MachOSection *MOS = SectionLookup[seg+sect];
+ if (MOS) return MOS;
+
+ MOS = new MachOSection(seg, sect);
+ SectionList.push_back(MOS);
+ MOS->Index = SectionList.size();
+ MOS->flags = MachOSection::S_REGULAR | Flags;
+ SectionLookup[seg+sect] = MOS;
+ return MOS;
+ }
+ MachOSection *getTextSection(bool isCode = true) {
+ if (isCode)
+ return getSection("__TEXT", "__text",
+ MachOSection::S_ATTR_PURE_INSTRUCTIONS |
+ MachOSection::S_ATTR_SOME_INSTRUCTIONS);
+ else
+ return getSection("__TEXT", "__text");
+ }
+ MachOSection *getBSSSection() {
+ return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL);
+ }
+ MachOSection *getDataSection() {
+ return getSection("__DATA", "__data");
+ }
+ MachOSection *getConstSection(Constant *C) {
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ if (CVA && CVA->isCString())
+ return getSection("__TEXT", "__cstring",
+ MachOSection::S_CSTRING_LITERALS);
+
+ const Type *Ty = C->getType();
+ if (Ty->isPrimitiveType() || Ty->isInteger()) {
+ unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty);
+ switch(Size) {
+ default: break; // Fall through to __TEXT,__const
+ case 4:
+ return getSection("__TEXT", "__literal4",
+ MachOSection::S_4BYTE_LITERALS);
+ case 8:
+ return getSection("__TEXT", "__literal8",
+ MachOSection::S_8BYTE_LITERALS);
+ case 16:
+ return getSection("__TEXT", "__literal16",
+ MachOSection::S_16BYTE_LITERALS);
+ }
+ }
+ return getSection("__TEXT", "__const");
+ }
+ MachOSection *getJumpTableSection() {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return getTextSection(false);
+ else
+ return getSection("__TEXT", "__const");
+ }
+
+ /// MachOSymTab - This struct contains information about the offsets and
+ /// size of symbol table information.
+ /// segment.
+ struct MachOSymTab {
+ uint32_t cmd; // LC_SYMTAB
+ uint32_t cmdsize; // sizeof( MachOSymTab )
+ uint32_t symoff; // symbol table offset
+ uint32_t nsyms; // number of symbol table entries
+ uint32_t stroff; // string table offset
+ uint32_t strsize; // string table size in bytes
+
+ // Constants for the cmd field
+ // see <mach-o/loader.h>
+ enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info
+ };
+
+ MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0),
+ nsyms(0), stroff(0), strsize(0) { }
+ };
+
+ /// MachOSymTab - This struct contains information about the offsets and
+ /// size of symbol table information.
+ /// segment.
+ struct MachODySymTab {
+ uint32_t cmd; // LC_DYSYMTAB
+ uint32_t cmdsize; // sizeof( MachODySymTab )
+ uint32_t ilocalsym; // index to local symbols
+ uint32_t nlocalsym; // number of local symbols
+ uint32_t iextdefsym; // index to externally defined symbols
+ uint32_t nextdefsym; // number of externally defined symbols
+ uint32_t iundefsym; // index to undefined symbols
+ uint32_t nundefsym; // number of undefined symbols
+ uint32_t tocoff; // file offset to table of contents
+ uint32_t ntoc; // number of entries in table of contents
+ uint32_t modtaboff; // file offset to module table
+ uint32_t nmodtab; // number of module table entries
+ uint32_t extrefsymoff; // offset to referenced symbol table
+ uint32_t nextrefsyms; // number of referenced symbol table entries
+ uint32_t indirectsymoff; // file offset to the indirect symbol table
+ uint32_t nindirectsyms; // number of indirect symbol table entries
+ uint32_t extreloff; // offset to external relocation entries
+ uint32_t nextrel; // number of external relocation entries
+ uint32_t locreloff; // offset to local relocation entries
+ uint32_t nlocrel; // number of local relocation entries
+
+ // Constants for the cmd field
+ // see <mach-o/loader.h>
+ enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info
+ };
+
+ MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
+ ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
+ iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
+ nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
+ nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
+ };
+
+ /// SymTab - The "stab" style symbol table information
+ MachOSymTab SymTab;
+ /// DySymTab - symbol table info for the dynamic link editor
+ MachODySymTab DySymTab;
+
+ struct MachOSymCmp {
+ // FIXME: this does not appear to be sorting 'f' after 'F'
+ bool operator()(const MachOSym &LHS, const MachOSym &RHS) {
+ return LHS.GVName < RHS.GVName;
+ }
+ };
+
+ /// PartitionByLocal - Simple boolean predicate that returns true if Sym is
+ /// a local symbol rather than an external symbol.
+ static bool PartitionByLocal(const MachOSym &Sym);
+
+ /// PartitionByDefined - Simple boolean predicate that returns true if Sym
+ /// is defined in this module.
+ static bool PartitionByDefined(const MachOSym &Sym);
+
+ protected:
+
+ /// SymbolTable - This is the list of symbols we have emitted to the file.
+ /// This actually gets rearranged before emission to the file (to put the
+ /// local symbols first in the list).
+ std::vector<MachOSym> SymbolTable;
+
+ /// SymT - A buffer to hold the symbol table before we write it out at the
+ /// appropriate location in the file.
+ DataBuffer SymT;
+
+ /// StrT - A buffer to hold the string table before we write it out at the
+ /// appropriate location in the file.
+ DataBuffer StrT;
+
+ /// PendingSyms - This is a list of externally defined symbols that we have
+ /// been asked to emit, but have not seen a reference to. When a reference
+ /// is seen, the symbol will move from this list to the SymbolTable.
+ std::vector<GlobalValue*> PendingGlobals;
+
+ /// DynamicSymbolTable - This is just a vector of indices into
+ /// SymbolTable to aid in emitting the DYSYMTAB load command.
+ std::vector<unsigned> DynamicSymbolTable;
+
+ static void InitMem(const Constant *C, void *Addr, intptr_t Offset,
+ const TargetData *TD,
+ std::vector<MachineRelocation> &MRs);
+
+ private:
+ void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV);
+ void EmitGlobal(GlobalVariable *GV);
+ void EmitHeaderAndLoadCommands();
+ void EmitSections();
+ void BufferSymbolAndStringTable();
+ void CalculateRelocations(MachOSection &MOS);
+
+ MachineRelocation GetJTRelocation(unsigned Offset,
+ MachineBasicBlock *MBB) const {
+ return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB);
+ }
+
+ /// GetTargetRelocation - Returns the number of relocations.
+ unsigned GetTargetRelocation(MachineRelocation &MR,
+ unsigned FromIdx,
+ unsigned ToAddr,
+ unsigned ToIndex,
+ OutputBuffer &RelocOut,
+ OutputBuffer &SecOut,
+ bool Scattered,
+ bool Extern) {
+ return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr,
+ ToIndex, RelocOut,
+ SecOut, Scattered,
+ Extern);
+ }
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 0000000..71e6b3e
--- /dev/null
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,372 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/LeakDetector.h"
+#include <algorithm>
+using namespace llvm;
+
+MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
+ : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false) {
+ Insts.Parent = this;
+}
+
+MachineBasicBlock::~MachineBasicBlock() {
+ LeakDetector::removeGarbageObject(this);
+}
+
+std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) {
+ MBB.print(OS);
+ return OS;
+}
+
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
+/// parent pointer of the MBB, the MBB numbering, and any instructions in the
+/// MBB to be on the right operand list for registers.
+///
+/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+/// gets the next available unique MBB number. If it is removed from a
+/// MachineFunction, it goes back to being #-1.
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) {
+ MachineFunction &MF = *N->getParent();
+ N->Number = MF.addToMBBNumbering(N);
+
+ // Make sure the instructions have their operands in the reginfo lists.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+ I->AddRegOperandsToUseLists(RegInfo);
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) {
+ N->getParent()->removeFromMBBNumbering(N->Number);
+ N->Number = -1;
+ LeakDetector::addGarbageObject(N);
+}
+
+
+/// addNodeToList (MI) - When we add an instruction to a basic block
+/// list, we update its parent pointer and add its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) {
+ assert(N->getParent() == 0 && "machine instruction already in a basic block");
+ N->setParent(Parent);
+
+ // Add the instruction's register operands to their corresponding
+ // use/def lists.
+ MachineFunction *MF = Parent->getParent();
+ N->AddRegOperandsToUseLists(MF->getRegInfo());
+
+ LeakDetector::removeGarbageObject(N);
+}
+
+/// removeNodeFromList (MI) - When we remove an instruction from a basic block
+/// list, we update its parent pointer and remove its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) {
+ assert(N->getParent() != 0 && "machine instruction not in a basic block");
+
+ // Remove from the use/def lists.
+ N->RemoveRegOperandsFromUseLists();
+
+ N->setParent(0);
+
+ LeakDetector::addGarbageObject(N);
+}
+
+/// transferNodesFromList (MI) - When moving a range of instructions from one
+/// MBB list to another, we need to update the parent pointers and the use/def
+/// lists.
+void ilist_traits<MachineInstr>::transferNodesFromList(
+ ilist_traits<MachineInstr>& fromList,
+ MachineBasicBlock::iterator first,
+ MachineBasicBlock::iterator last) {
+ assert(Parent->getParent() == fromList.Parent->getParent() &&
+ "MachineInstr parent mismatch!");
+
+ // Splice within the same MBB -> no change.
+ if (Parent == fromList.Parent) return;
+
+ // If splicing between two blocks within the same function, just update the
+ // parent pointers.
+ for (; first != last; ++first)
+ first->setParent(Parent);
+}
+
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+ assert(!MI->getParent() && "MI is still in a block!");
+ Parent->getParent()->DeleteMachineInstr(MI);
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+ iterator I = end();
+ while (I != begin() && (--I)->getDesc().isTerminator())
+ ; /*noop */
+ if (I != end() && !I->getDesc().isTerminator()) ++I;
+ return I;
+}
+
+bool
+MachineBasicBlock::isOnlyReachableByFallthrough() const {
+ return !isLandingPad() &&
+ !pred_empty() &&
+ next(pred_begin()) == pred_end() &&
+ (*pred_begin())->isLayoutSuccessor(this) &&
+ ((*pred_begin())->empty() ||
+ !(*pred_begin())->back().getDesc().isBarrier());
+}
+
+void MachineBasicBlock::dump() const {
+ print(*cerr.stream());
+}
+
+static inline void OutputReg(std::ostream &os, unsigned RegNo,
+ const TargetRegisterInfo *TRI = 0) {
+ if (!RegNo || TargetRegisterInfo::isPhysicalRegister(RegNo)) {
+ if (TRI)
+ os << " %" << TRI->get(RegNo).Name;
+ else
+ os << " %mreg(" << RegNo << ")";
+ } else
+ os << " %reg" << RegNo;
+}
+
+void MachineBasicBlock::print(std::ostream &OS) const {
+ const MachineFunction *MF = getParent();
+ if(!MF) {
+ OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+ << " is null\n";
+ return;
+ }
+
+ const BasicBlock *LBB = getBasicBlock();
+ OS << "\n";
+ if (LBB) OS << LBB->getName() << ": ";
+ OS << (const void*)this
+ << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber();
+ if (Alignment) OS << ", Alignment " << Alignment;
+ if (isLandingPad()) OS << ", EH LANDING PAD";
+ OS << ":\n";
+
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ if (!livein_empty()) {
+ OS << "Live Ins:";
+ for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+ OutputReg(OS, *I, TRI);
+ OS << "\n";
+ }
+ // Print the preds of this block according to the CFG.
+ if (!pred_empty()) {
+ OS << " Predecessors according to CFG:";
+ for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
+ OS << " " << *PI << " (#" << (*PI)->getNumber() << ")";
+ OS << "\n";
+ }
+
+ for (const_iterator I = begin(); I != end(); ++I) {
+ OS << "\t";
+ I->print(OS, &getParent()->getTarget());
+ }
+
+ // Print the successors of this block according to the CFG.
+ if (!succ_empty()) {
+ OS << " Successors according to CFG:";
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+ OS << " " << *SI << " (#" << (*SI)->getNumber() << ")";
+ OS << "\n";
+ }
+}
+
+void MachineBasicBlock::removeLiveIn(unsigned Reg) {
+ livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+ assert(I != livein_end() && "Not a live in!");
+ LiveIns.erase(I);
+}
+
+bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
+ const_livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+ return I != livein_end();
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+ getParent()->splice(NewAfter, this);
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+ MachineFunction::iterator BBI = NewBefore;
+ getParent()->splice(++BBI, this);
+}
+
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
+ Successors.push_back(succ);
+ succ->addPredecessor(this);
+}
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
+ succ->removePredecessor(this);
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+ assert(I != Successors.end() && "Not a current successor!");
+ Successors.erase(I);
+}
+
+MachineBasicBlock::succ_iterator
+MachineBasicBlock::removeSuccessor(succ_iterator I) {
+ assert(I != Successors.end() && "Not a current successor!");
+ (*I)->removePredecessor(this);
+ return Successors.erase(I);
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
+ Predecessors.push_back(pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
+ std::vector<MachineBasicBlock *>::iterator I =
+ std::find(Predecessors.begin(), Predecessors.end(), pred);
+ assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+ Predecessors.erase(I);
+}
+
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB)
+{
+ if (this == fromMBB)
+ return;
+
+ for(MachineBasicBlock::succ_iterator iter = fromMBB->succ_begin(),
+ end = fromMBB->succ_end(); iter != end; ++iter) {
+ addSuccessor(*iter);
+ }
+ while(!fromMBB->succ_empty())
+ fromMBB->removeSuccessor(fromMBB->succ_begin());
+}
+
+bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
+ std::vector<MachineBasicBlock *>::const_iterator I =
+ std::find(Successors.begin(), Successors.end(), MBB);
+ return I != Successors.end();
+}
+
+bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
+ MachineFunction::const_iterator I(this);
+ return next(I) == MachineFunction::const_iterator(MBB);
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing function,
+/// and returns it, but does not delete it.
+MachineBasicBlock *MachineBasicBlock::removeFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->remove(this);
+ return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing function,
+/// and deletes it.
+void MachineBasicBlock::eraseFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->erase(this);
+}
+
+
+/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
+/// 'Old', change the code and CFG so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Cannot replace self with self!");
+
+ MachineBasicBlock::iterator I = end();
+ while (I != begin()) {
+ --I;
+ if (!I->getDesc().isTerminator()) break;
+
+ // Scan the operands of this machine instruction, replacing any uses of Old
+ // with New.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isMBB() &&
+ I->getOperand(i).getMBB() == Old)
+ I->getOperand(i).setMBB(New);
+ }
+
+ // Update the successor information.
+ removeSuccessor(Old);
+ addSuccessor(New);
+}
+
+/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
+/// CFG to be inserted. If we have proven that MBB can only branch to DestA and
+/// DestB, remove any other MBB successors from the CFG. DestA and DestB can
+/// be null.
+/// Besides DestA and DestB, retain other edges leading to LandingPads
+/// (currently there can be only one; we don't check or require that here).
+/// Note it is possible that DestA and/or DestB are LandingPads.
+bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+ MachineBasicBlock *DestB,
+ bool isCond) {
+ bool MadeChange = false;
+ bool AddedFallThrough = false;
+
+ MachineFunction::iterator FallThru = next(MachineFunction::iterator(this));
+
+ // If this block ends with a conditional branch that falls through to its
+ // successor, set DestB as the successor.
+ if (isCond) {
+ if (DestB == 0 && FallThru != getParent()->end()) {
+ DestB = FallThru;
+ AddedFallThrough = true;
+ }
+ } else {
+ // If this is an unconditional branch with no explicit dest, it must just be
+ // a fallthrough into DestB.
+ if (DestA == 0 && FallThru != getParent()->end()) {
+ DestA = FallThru;
+ AddedFallThrough = true;
+ }
+ }
+
+ MachineBasicBlock::succ_iterator SI = succ_begin();
+ MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB;
+ while (SI != succ_end()) {
+ if (*SI == DestA && DestA == DestB) {
+ DestA = DestB = 0;
+ ++SI;
+ } else if (*SI == DestA) {
+ DestA = 0;
+ ++SI;
+ } else if (*SI == DestB) {
+ DestB = 0;
+ ++SI;
+ } else if ((*SI)->isLandingPad() &&
+ *SI!=OrigDestA && *SI!=OrigDestB) {
+ ++SI;
+ } else {
+ // Otherwise, this is a superfluous edge, remove it.
+ SI = removeSuccessor(SI);
+ MadeChange = true;
+ }
+ }
+ if (!AddedFallThrough) {
+ assert(DestA == 0 && DestB == 0 &&
+ "MachineCFG is missing edges!");
+ } else if (isCond) {
+ assert(DestA == 0 && "MachineCFG is missing edges!");
+ }
+ return MadeChange;
+}
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
new file mode 100644
index 0000000..37c8601
--- /dev/null
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -0,0 +1,53 @@
+//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
+TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
+
+char MachineDominatorTree::ID = 0;
+
+static RegisterPass<MachineDominatorTree>
+E("machinedomtree", "MachineDominator Tree Construction", true);
+
+const PassInfo *const llvm::MachineDominatorsID = &E;
+
+void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+
+ return false;
+}
+
+MachineDominatorTree::MachineDominatorTree()
+ : MachineFunctionPass(&ID) {
+ DT = new DominatorTreeBase<MachineBasicBlock>(false);
+}
+
+MachineDominatorTree::~MachineDominatorTree() {
+ DT->releaseMemory();
+ delete DT;
+}
+
+void MachineDominatorTree::releaseMemory() {
+ DT->releaseMemory();
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 0000000..cacfed1
--- /dev/null
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,598 @@
+//===-- MachineFunction.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function. This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+#include <sstream>
+using namespace llvm;
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ return runOnMachineFunction(MachineFunction::get(&F));
+}
+
+namespace {
+ struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass {
+ static char ID;
+
+ std::ostream *OS;
+ const std::string Banner;
+
+ Printer (std::ostream *os, const std::string &banner)
+ : MachineFunctionPass(&ID), OS(os), Banner(banner) {}
+
+ const char *getPassName() const { return "MachineFunction Printer"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ (*OS) << Banner;
+ MF.print (*OS);
+ return false;
+ }
+ };
+ char Printer::ID = 0;
+}
+
+/// Returns a newly-created MachineFunction Printer pass. The default output
+/// stream is std::cerr; the default banner is empty.
+///
+FunctionPass *llvm::createMachineFunctionPrinterPass(std::ostream *OS,
+ const std::string &Banner){
+ return new Printer(OS, Banner);
+}
+
+namespace {
+ struct VISIBILITY_HIDDEN Deleter : public MachineFunctionPass {
+ static char ID;
+ Deleter() : MachineFunctionPass(&ID) {}
+
+ const char *getPassName() const { return "Machine Code Deleter"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ // Delete the annotation from the function now.
+ MachineFunction::destruct(MF.getFunction());
+ return true;
+ }
+ };
+ char Deleter::ID = 0;
+}
+
+/// MachineCodeDeletion Pass - This pass deletes all of the machine code for
+/// the current function, which should happen after the function has been
+/// emitted to a .s file or to memory.
+FunctionPass *llvm::createMachineCodeDeleter() {
+ return new Deleter();
+}
+
+
+
+//===---------------------------------------------------------------------===//
+// MachineFunction implementation
+//===---------------------------------------------------------------------===//
+
+void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+ MBB->getParent()->DeleteMachineBasicBlock(MBB);
+}
+
+MachineFunction::MachineFunction(const Function *F,
+ const TargetMachine &TM)
+ : Annotation(AnnotationManager::getID("CodeGen::MachineCodeForFunction")),
+ Fn(F), Target(TM) {
+ if (TM.getRegisterInfo())
+ RegInfo = new (Allocator.Allocate<MachineRegisterInfo>())
+ MachineRegisterInfo(*TM.getRegisterInfo());
+ else
+ RegInfo = 0;
+ MFInfo = 0;
+ FrameInfo = new (Allocator.Allocate<MachineFrameInfo>())
+ MachineFrameInfo(*TM.getFrameInfo());
+ ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
+ MachineConstantPool(TM.getTargetData());
+
+ // Set up jump table.
+ const TargetData &TD = *TM.getTargetData();
+ bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+ unsigned EntrySize = IsPic ? 4 : TD.getPointerSize();
+ unsigned Alignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty)
+ : TD.getPointerABIAlignment();
+ JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
+ MachineJumpTableInfo(EntrySize, Alignment);
+}
+
+MachineFunction::~MachineFunction() {
+ BasicBlocks.clear();
+ InstructionRecycler.clear(Allocator);
+ BasicBlockRecycler.clear(Allocator);
+ if (RegInfo)
+ RegInfo->~MachineRegisterInfo(); Allocator.Deallocate(RegInfo);
+ if (MFInfo) {
+ MFInfo->~MachineFunctionInfo(); Allocator.Deallocate(MFInfo);
+ }
+ FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo);
+ ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool);
+ JumpTableInfo->~MachineJumpTableInfo(); Allocator.Deallocate(JumpTableInfo);
+}
+
+
+/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
+/// recomputes them. This guarantees that the MBB numbers are sequential,
+/// dense, and match the ordering of the blocks within the function. If a
+/// specific MachineBasicBlock is specified, only that block and those after
+/// it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+ if (empty()) { MBBNumbering.clear(); return; }
+ MachineFunction::iterator MBBI, E = end();
+ if (MBB == 0)
+ MBBI = begin();
+ else
+ MBBI = MBB;
+
+ // Figure out the block number this should have.
+ unsigned BlockNo = 0;
+ if (MBBI != begin())
+ BlockNo = prior(MBBI)->getNumber()+1;
+
+ for (; MBBI != E; ++MBBI, ++BlockNo) {
+ if (MBBI->getNumber() != (int)BlockNo) {
+ // Remove use of the old number.
+ if (MBBI->getNumber() != -1) {
+ assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+ "MBB number mismatch!");
+ MBBNumbering[MBBI->getNumber()] = 0;
+ }
+
+ // If BlockNo is already taken, set that block's number to -1.
+ if (MBBNumbering[BlockNo])
+ MBBNumbering[BlockNo]->setNumber(-1);
+
+ MBBNumbering[BlockNo] = MBBI;
+ MBBI->setNumber(BlockNo);
+ }
+ }
+
+ // Okay, all the blocks are renumbered. If we have compactified the block
+ // numbering, shrink MBBNumbering now.
+ assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+ MBBNumbering.resize(BlockNo);
+}
+
+/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
+/// of `new MachineInstr'.
+///
+MachineInstr *
+MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID,
+ DebugLoc DL, bool NoImp) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(TID, DL, NoImp);
+}
+
+/// CloneMachineInstr - Create a new MachineInstr which is a copy of the
+/// 'Orig' instruction, identical in all ways except the the instruction
+/// has no parent, prev, or next.
+///
+MachineInstr *
+MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, *Orig);
+}
+
+/// DeleteMachineInstr - Delete the given MachineInstr.
+///
+void
+MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+ // Clear the instructions memoperands. This must be done manually because
+ // the instruction's parent pointer is now null, so it can't properly
+ // deallocate them on its own.
+ MI->clearMemOperands(*this);
+
+ MI->~MachineInstr();
+ InstructionRecycler.Deallocate(Allocator, MI);
+}
+
+/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
+/// instead of `new MachineBasicBlock'.
+///
+MachineBasicBlock *
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+ return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
+ MachineBasicBlock(*this, bb);
+}
+
+/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
+///
+void
+MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
+ assert(MBB->getParent() == this && "MBB parent mismatch!");
+ MBB->~MachineBasicBlock();
+ BasicBlockRecycler.Deallocate(Allocator, MBB);
+}
+
+void MachineFunction::dump() const {
+ print(*cerr.stream());
+}
+
+void MachineFunction::print(std::ostream &OS) const {
+ OS << "# Machine code for " << Fn->getName () << "():\n";
+
+ // Print Frame Information
+ FrameInfo->print(*this, OS);
+
+ // Print JumpTable Information
+ JumpTableInfo->print(OS);
+
+ // Print Constant Pool
+ {
+ raw_os_ostream OSS(OS);
+ ConstantPool->print(OSS);
+ }
+
+ const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
+
+ if (RegInfo && !RegInfo->livein_empty()) {
+ OS << "Live Ins:";
+ for (MachineRegisterInfo::livein_iterator
+ I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
+ if (TRI)
+ OS << " " << TRI->getName(I->first);
+ else
+ OS << " Reg #" << I->first;
+
+ if (I->second)
+ OS << " in VR#" << I->second << " ";
+ }
+ OS << "\n";
+ }
+ if (RegInfo && !RegInfo->liveout_empty()) {
+ OS << "Live Outs:";
+ for (MachineRegisterInfo::liveout_iterator
+ I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I)
+ if (TRI)
+ OS << " " << TRI->getName(*I);
+ else
+ OS << " Reg #" << *I;
+ OS << "\n";
+ }
+
+ for (const_iterator BB = begin(); BB != end(); ++BB)
+ BB->print(OS);
+
+ OS << "\n# End machine code for " << Fn->getName () << "().\n\n";
+}
+
+/// CFGOnly flag - This is used to control whether or not the CFG graph printer
+/// prints out the contents of basic blocks or not. This is acceptable because
+/// this code is only really used for debugging purposes.
+///
+static bool CFGOnly = false;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+ static std::string getGraphName(const MachineFunction *F) {
+ return "CFG for '" + F->getFunction()->getName() + "' function";
+ }
+
+ static std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineFunction *Graph) {
+ if (CFGOnly && Node->getBasicBlock() &&
+ !Node->getBasicBlock()->getName().empty())
+ return Node->getBasicBlock()->getName() + ":";
+
+ std::ostringstream Out;
+ if (CFGOnly) {
+ Out << Node->getNumber() << ':';
+ return Out.str();
+ }
+
+ Node->print(Out);
+
+ std::string OutStr = Out.str();
+ if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+ // Process string output to make it nicer...
+ for (unsigned i = 0; i != OutStr.length(); ++i)
+ if (OutStr[i] == '\n') { // Left justify
+ OutStr[i] = '\\';
+ OutStr.insert(OutStr.begin()+i+1, 'l');
+ }
+ return OutStr;
+ }
+ };
+}
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getFunction()->getName());
+#else
+ cerr << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+ CFGOnly = true;
+ viewCFG();
+ CFGOnly = false;
+}
+
+// The next two methods are used to construct and to retrieve
+// the MachineCodeForFunction object for the given function.
+// construct() -- Allocates and initializes for a given function and target
+// get() -- Returns a handle to the object.
+// This should not be called before "construct()"
+// for a given Function.
+//
+MachineFunction&
+MachineFunction::construct(const Function *Fn, const TargetMachine &Tar)
+{
+ AnnotationID MF_AID =
+ AnnotationManager::getID("CodeGen::MachineCodeForFunction");
+ assert(Fn->getAnnotation(MF_AID) == 0 &&
+ "Object already exists for this function!");
+ MachineFunction* mcInfo = new MachineFunction(Fn, Tar);
+ Fn->addAnnotation(mcInfo);
+ return *mcInfo;
+}
+
+void MachineFunction::destruct(const Function *Fn) {
+ AnnotationID MF_AID =
+ AnnotationManager::getID("CodeGen::MachineCodeForFunction");
+ bool Deleted = Fn->deleteAnnotation(MF_AID);
+ assert(Deleted && "Machine code did not exist for function!");
+ Deleted = Deleted; // silence warning when no assertions.
+}
+
+MachineFunction& MachineFunction::get(const Function *F)
+{
+ AnnotationID MF_AID =
+ AnnotationManager::getID("CodeGen::MachineCodeForFunction");
+ MachineFunction *mc = (MachineFunction*)F->getAnnotation(MF_AID);
+ assert(mc && "Call construct() method first to allocate the object");
+ return *mc;
+}
+
+/// addLiveIn - Add the specified physical register as a live-in value and
+/// create a corresponding virtual register for it.
+unsigned MachineFunction::addLiveIn(unsigned PReg,
+ const TargetRegisterClass *RC) {
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = getRegInfo().createVirtualRegister(RC);
+ getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+/// getOrCreateDebugLocID - Look up the DebugLocTuple index with the given
+/// source file, line, and column. If none currently exists, create a new
+/// DebugLocTuple, and insert it into the DebugIdMap.
+unsigned MachineFunction::getOrCreateDebugLocID(GlobalVariable *CompileUnit,
+ unsigned Line, unsigned Col) {
+ DebugLocTuple Tuple(CompileUnit, Line, Col);
+ DenseMap<DebugLocTuple, unsigned>::iterator II
+ = DebugLocInfo.DebugIdMap.find(Tuple);
+ if (II != DebugLocInfo.DebugIdMap.end())
+ return II->second;
+ // Add a new tuple.
+ unsigned Id = DebugLocInfo.DebugLocations.size();
+ DebugLocInfo.DebugLocations.push_back(Tuple);
+ DebugLocInfo.DebugIdMap[Tuple] = Id;
+ return Id;
+}
+
+/// getDebugLocTuple - Get the DebugLocTuple for a given DebugLoc object.
+DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const {
+ unsigned Idx = DL.getIndex();
+ assert(Idx < DebugLocInfo.DebugLocations.size() &&
+ "Invalid index into debug locations!");
+ return DebugLocInfo.DebugLocations[Idx];
+}
+
+//===----------------------------------------------------------------------===//
+// MachineFrameInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// CreateFixedObject - Create a new object at a fixed location on the stack.
+/// All fixed objects should be created before other objects are created for
+/// efficiency. By default, fixed objects are immutable. This returns an
+/// index with a negative value.
+///
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+ bool Immutable) {
+ assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+ Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable));
+ return -++NumFixedObjects;
+}
+
+
+void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{
+ const TargetFrameInfo *FI = MF.getTarget().getFrameInfo();
+ int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+ for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+ const StackObject &SO = Objects[i];
+ OS << " <fi#" << (int)(i-NumFixedObjects) << ">: ";
+ if (SO.Size == ~0ULL) {
+ OS << "dead\n";
+ continue;
+ }
+ if (SO.Size == 0)
+ OS << "variable sized";
+ else
+ OS << "size is " << SO.Size << " byte" << (SO.Size != 1 ? "s," : ",");
+ OS << " alignment is " << SO.Alignment << " byte"
+ << (SO.Alignment != 1 ? "s," : ",");
+
+ if (i < NumFixedObjects)
+ OS << " fixed";
+ if (i < NumFixedObjects || SO.SPOffset != -1) {
+ int64_t Off = SO.SPOffset - ValOffset;
+ OS << " at location [SP";
+ if (Off > 0)
+ OS << "+" << Off;
+ else if (Off < 0)
+ OS << Off;
+ OS << "]";
+ }
+ OS << "\n";
+ }
+
+ if (HasVarSizedObjects)
+ OS << " Stack frame contains variable sized objects\n";
+}
+
+void MachineFrameInfo::dump(const MachineFunction &MF) const {
+ print(MF, *cerr.stream());
+}
+
+
+//===----------------------------------------------------------------------===//
+// MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// getJumpTableIndex - Create a new jump table entry in the jump table info
+/// or return an existing one.
+///
+unsigned MachineJumpTableInfo::getJumpTableIndex(
+ const std::vector<MachineBasicBlock*> &DestBBs) {
+ assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i)
+ if (JumpTables[i].MBBs == DestBBs)
+ return i;
+
+ JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+ return JumpTables.size()-1;
+}
+
+/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
+/// the jump tables to branch to New instead.
+bool
+MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ for (size_t i = 0, e = JumpTables.size(); i != e; ++i) {
+ MachineJumpTableEntry &JTE = JumpTables[i];
+ for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+ if (JTE.MBBs[j] == Old) {
+ JTE.MBBs[j] = New;
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
+
+void MachineJumpTableInfo::print(std::ostream &OS) const {
+ // FIXME: this is lame, maybe we could print out the MBB numbers or something
+ // like {1, 2, 4, 5, 3, 0}
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+ OS << " <jt#" << i << "> has " << JumpTables[i].MBBs.size()
+ << " entries\n";
+ }
+}
+
+void MachineJumpTableInfo::dump() const { print(*cerr.stream()); }
+
+
+//===----------------------------------------------------------------------===//
+// MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+const Type *MachineConstantPoolEntry::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+MachineConstantPool::~MachineConstantPool() {
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (Constants[i].isMachineConstantPoolEntry())
+ delete Constants[i].Val.MachineCPVal;
+}
+
+/// getConstantPoolIndex - Create a new entry in the constant pool or return
+/// an existing one. User must specify the log2 of the minimum required
+/// alignment for the object.
+///
+unsigned MachineConstantPool::getConstantPoolIndex(Constant *C,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (Constants[i].Val.ConstVal == C &&
+ (Constants[i].getAlignment() & (Alignment - 1)) == 0)
+ return i;
+
+ Constants.push_back(MachineConstantPoolEntry(C, Alignment));
+ return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ int Idx = V->getExistingMachineCPValue(this, Alignment);
+ if (Idx != -1)
+ return (unsigned)Idx;
+
+ Constants.push_back(MachineConstantPoolEntry(V, Alignment));
+ return Constants.size()-1;
+}
+
+void MachineConstantPool::print(raw_ostream &OS) const {
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ OS << " <cp#" << i << "> is";
+ if (Constants[i].isMachineConstantPoolEntry())
+ Constants[i].Val.MachineCPVal->print(OS);
+ else
+ OS << *(Value*)Constants[i].Val.ConstVal;
+ OS << " , alignment=" << Constants[i].getAlignment();
+ OS << "\n";
+ }
+}
+
+void MachineConstantPool::dump() const { print(errs()); }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 0000000..b8c8563
--- /dev/null
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,1105 @@
+//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/FoldingSet.h"
+#include <ostream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// AddRegOperandToRegInfo - Add this register operand to the specified
+/// MachineRegisterInfo. If it is null, then the next/prev fields should be
+/// explicitly nulled out.
+void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
+ assert(isReg() && "Can only add reg operand to use lists");
+
+ // If the reginfo pointer is null, just explicitly null out or next/prev
+ // pointers, to ensure they are not garbage.
+ if (RegInfo == 0) {
+ Contents.Reg.Prev = 0;
+ Contents.Reg.Next = 0;
+ return;
+ }
+
+ // Otherwise, add this operand to the head of the registers use/def list.
+ MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
+
+ // For SSA values, we prefer to keep the definition at the start of the list.
+ // we do this by skipping over the definition if it is at the head of the
+ // list.
+ if (*Head && (*Head)->isDef())
+ Head = &(*Head)->Contents.Reg.Next;
+
+ Contents.Reg.Next = *Head;
+ if (Contents.Reg.Next) {
+ assert(getReg() == Contents.Reg.Next->getReg() &&
+ "Different regs on the same list!");
+ Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
+ }
+
+ Contents.Reg.Prev = Head;
+ *Head = this;
+}
+
+/// RemoveRegOperandFromRegInfo - Remove this register operand from the
+/// MachineRegisterInfo it is linked with.
+void MachineOperand::RemoveRegOperandFromRegInfo() {
+ assert(isOnRegUseList() && "Reg operand is not on a use list");
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *NextOp = Contents.Reg.Next;
+ *Contents.Reg.Prev = NextOp;
+ if (NextOp) {
+ assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
+ NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
+ }
+ Contents.Reg.Prev = 0;
+ Contents.Reg.Next = 0;
+}
+
+void MachineOperand::setReg(unsigned Reg) {
+ if (getReg() == Reg) return; // No change.
+
+ // Otherwise, we have to change the register. If this operand is embedded
+ // into a machine function, we need to update the old and new register's
+ // use/def lists.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ RemoveRegOperandFromRegInfo();
+ Contents.Reg.RegNo = Reg;
+ AddRegOperandToRegInfo(&MF->getRegInfo());
+ return;
+ }
+
+ // Otherwise, just change the register, no problem. :)
+ Contents.Reg.RegNo = Reg;
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value. If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+ // If this operand is currently a register operand, and if this is in a
+ // function, deregister the operand from the register's use/def list.
+ if (isReg() && getParent() && getParent()->getParent() &&
+ getParent()->getParent()->getParent())
+ RemoveRegOperandFromRegInfo();
+
+ OpKind = MO_Immediate;
+ Contents.ImmVal = ImmVal;
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value. If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+ bool isKill, bool isDead) {
+ // If this operand is already a register operand, use setReg to update the
+ // register's use/def lists.
+ if (isReg()) {
+ assert(!isEarlyClobber());
+ setReg(Reg);
+ } else {
+ // Otherwise, change this to a register and set the reg#.
+ OpKind = MO_Register;
+ Contents.Reg.RegNo = Reg;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ AddRegOperandToRegInfo(&MF->getRegInfo());
+ }
+
+ IsDef = isDef;
+ IsImp = isImp;
+ IsKill = isKill;
+ IsDead = isDead;
+ IsEarlyClobber = false;
+ SubReg = 0;
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+ if (getType() != Other.getType()) return false;
+
+ switch (getType()) {
+ default: assert(0 && "Unrecognized operand type");
+ case MachineOperand::MO_Register:
+ return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+ getSubReg() == Other.getSubReg();
+ case MachineOperand::MO_Immediate:
+ return getImm() == Other.getImm();
+ case MachineOperand::MO_FPImmediate:
+ return getFPImm() == Other.getFPImm();
+ case MachineOperand::MO_MachineBasicBlock:
+ return getMBB() == Other.getMBB();
+ case MachineOperand::MO_FrameIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_ConstantPoolIndex:
+ return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_JumpTableIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_GlobalAddress:
+ return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_ExternalSymbol:
+ return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+ getOffset() == Other.getOffset();
+ }
+}
+
+/// print - Print the specified machine operand.
+///
+void MachineOperand::print(std::ostream &OS, const TargetMachine *TM) const {
+ raw_os_ostream RawOS(OS);
+ print(RawOS, TM);
+}
+
+void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) {
+ OS << "%reg" << getReg();
+ } else {
+ // If the instruction is embedded into a basic block, we can find the
+ // target info for the instruction.
+ if (TM == 0)
+ if (const MachineInstr *MI = getParent())
+ if (const MachineBasicBlock *MBB = MI->getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ TM = &MF->getTarget();
+
+ if (TM)
+ OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
+ else
+ OS << "%mreg" << getReg();
+ }
+
+ if (getSubReg() != 0) {
+ OS << ":" << getSubReg();
+ }
+
+ if (isDef() || isKill() || isDead() || isImplicit() || isEarlyClobber()) {
+ OS << "<";
+ bool NeedComma = false;
+ if (isImplicit()) {
+ if (NeedComma) OS << ",";
+ OS << (isDef() ? "imp-def" : "imp-use");
+ NeedComma = true;
+ } else if (isDef()) {
+ if (NeedComma) OS << ",";
+ if (isEarlyClobber())
+ OS << "earlyclobber,";
+ OS << "def";
+ NeedComma = true;
+ }
+ if (isKill() || isDead()) {
+ if (NeedComma) OS << ",";
+ if (isKill()) OS << "kill";
+ if (isDead()) OS << "dead";
+ }
+ OS << ">";
+ }
+ break;
+ case MachineOperand::MO_Immediate:
+ OS << getImm();
+ break;
+ case MachineOperand::MO_FPImmediate:
+ if (getFPImm()->getType() == Type::FloatTy) {
+ OS << getFPImm()->getValueAPF().convertToFloat();
+ } else {
+ OS << getFPImm()->getValueAPF().convertToDouble();
+ }
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << "mbb<"
+ << ((Value*)getMBB()->getBasicBlock())->getName()
+ << "," << (void*)getMBB() << ">";
+ break;
+ case MachineOperand::MO_FrameIndex:
+ OS << "<fi#" << getIndex() << ">";
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "<cp#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << ">";
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ OS << "<jt#" << getIndex() << ">";
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ OS << "<ga:" << ((Value*)getGlobal())->getName();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << ">";
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ OS << "<es:" << getSymbolName();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << ">";
+ break;
+ default:
+ assert(0 && "Unrecognized operand type");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
+ int64_t o, uint64_t s, unsigned int a)
+ : Offset(o), Size(s), V(v),
+ Flags((f & 7) | ((Log2_32(a) + 1) << 3)) {
+ assert(isPowerOf2_32(a) && "Alignment is not a power of 2!");
+ assert((isLoad() || isStore()) && "Not a load/store!");
+}
+
+/// Profile - Gather unique data for the object.
+///
+void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(Offset);
+ ID.AddInteger(Size);
+ ID.AddPointer(V);
+ ID.AddInteger(Flags);
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr Implementation
+//===----------------------------------------------------------------------===//
+
+/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
+/// TID NULL and no operands.
+MachineInstr::MachineInstr()
+ : TID(0), NumImplicitOps(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+void MachineInstr::addImplicitDefUseOperands() {
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+ addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses)
+ addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
+}
+
+/// MachineInstr ctor - This constructor create a MachineInstr and add the
+/// implicit operands. It reserves space for number of operands specified by
+/// TargetInstrDesc or the numOperands if it is not zero. (for
+/// instructions with variable number of operands).
+MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
+ : TID(&tid), NumImplicitOps(0), Parent(0),
+ debugLoc(DebugLoc::getUnknownLoc()) {
+ if (!NoImp && TID->getImplicitDefs())
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (!NoImp && TID->getImplicitUses())
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ if (!NoImp)
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
+ bool NoImp)
+ : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) {
+ if (!NoImp && TID->getImplicitDefs())
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (!NoImp && TID->getImplicitUses())
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ if (!NoImp)
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - Work exactly the same as the ctor two above, except
+/// that the MachineInstr is created and added to the end of the specified
+/// basic block.
+///
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
+ : TID(&tid), NumImplicitOps(0), Parent(0),
+ debugLoc(DebugLoc::getUnknownLoc()) {
+ assert(MBB && "Cannot use inserting ctor with null basic block!");
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+ MBB->push_back(this); // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+///
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
+ const TargetInstrDesc &tid)
+ : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) {
+ assert(MBB && "Cannot use inserting ctor with null basic block!");
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->getNumOperands());
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+ MBB->push_back(this); // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly
+///
+MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
+ : TID(&MI.getDesc()), NumImplicitOps(0), Parent(0),
+ debugLoc(MI.getDebugLoc()) {
+ Operands.reserve(MI.getNumOperands());
+
+ // Add operands
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i)
+ addOperand(MI.getOperand(i));
+ NumImplicitOps = MI.NumImplicitOps;
+
+ // Add memory operands.
+ for (std::list<MachineMemOperand>::const_iterator i = MI.memoperands_begin(),
+ j = MI.memoperands_end(); i != j; ++i)
+ addMemOperand(MF, *i);
+
+ // Set parent to null.
+ Parent = 0;
+
+ LeakDetector::addGarbageObject(this);
+}
+
+MachineInstr::~MachineInstr() {
+ LeakDetector::removeGarbageObject(this);
+ assert(MemOperands.empty() &&
+ "MachineInstr being deleted with live memoperands!");
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].ParentMI == this && "ParentMI mismatch!");
+ assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) &&
+ "Reg operand def/use list corrupted");
+ }
+#endif
+}
+
+/// getRegInfo - If this instruction is embedded into a MachineFunction,
+/// return the MachineRegisterInfo object for the current function, otherwise
+/// return null.
+MachineRegisterInfo *MachineInstr::getRegInfo() {
+ if (MachineBasicBlock *MBB = getParent())
+ return &MBB->getParent()->getRegInfo();
+ return 0;
+}
+
+/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands already be on their use lists.
+void MachineInstr::RemoveRegOperandsFromUseLists() {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+}
+
+/// AddRegOperandsToUseLists - Add all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands not be on their use lists yet.
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].AddRegOperandToRegInfo(&RegInfo);
+ }
+}
+
+
+/// addOperand - Add the specified operand to the instruction. If it is an
+/// implicit operand, it is added to the end of the operand list. If it is
+/// an explicit operand it is added at the end of the explicit operand list
+/// (before the first implicit operand).
+void MachineInstr::addOperand(const MachineOperand &Op) {
+ bool isImpReg = Op.isReg() && Op.isImplicit();
+ assert((isImpReg || !OperandsComplete()) &&
+ "Trying to add an operand to a machine instr that is already done!");
+
+ MachineRegisterInfo *RegInfo = getRegInfo();
+
+ // If we are adding the operand to the end of the list, our job is simpler.
+ // This is true most of the time, so this is a reasonable optimization.
+ if (isImpReg || NumImplicitOps == 0) {
+ // We can only do this optimization if we know that the operand list won't
+ // reallocate.
+ if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) {
+ Operands.push_back(Op);
+
+ // Set the parent of the operand.
+ Operands.back().ParentMI = this;
+
+ // If the operand is a register, update the operand's use list.
+ if (Op.isReg())
+ Operands.back().AddRegOperandToRegInfo(RegInfo);
+ return;
+ }
+ }
+
+ // Otherwise, we have to insert a real operand before any implicit ones.
+ unsigned OpNo = Operands.size()-NumImplicitOps;
+
+ // If this instruction isn't embedded into a function, then we don't need to
+ // update any operand lists.
+ if (RegInfo == 0) {
+ // Simple insertion, no reginfo update needed for other register operands.
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ // Do explicitly set the reginfo for this operand though, to ensure the
+ // next/prev fields are properly nulled out.
+ if (Operands[OpNo].isReg())
+ Operands[OpNo].AddRegOperandToRegInfo(0);
+
+ } else if (Operands.size()+1 <= Operands.capacity()) {
+ // Otherwise, we have to remove register operands from their register use
+ // list, add the operand, then add the register operands back to their use
+ // list. This also must handle the case when the operand list reallocates
+ // to somewhere else.
+
+ // If insertion of this operand won't cause reallocation of the operand
+ // list, just remove the implicit operands, add the operand, then re-add all
+ // the rest of the operands.
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].isReg() && "Should only be an implicit reg!");
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+
+ // Add the operand. If it is a register, add it to the reg list.
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ if (Operands[OpNo].isReg())
+ Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+
+ // Re-add all the implicit ops.
+ for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) {
+ assert(Operands[i].isReg() && "Should only be an implicit reg!");
+ Operands[i].AddRegOperandToRegInfo(RegInfo);
+ }
+ } else {
+ // Otherwise, we will be reallocating the operand list. Remove all reg
+ // operands from their list, then readd them after the operand list is
+ // reallocated.
+ RemoveRegOperandsFromUseLists();
+
+ Operands.insert(Operands.begin()+OpNo, Op);
+ Operands[OpNo].ParentMI = this;
+
+ // Re-add all the operands.
+ AddRegOperandsToUseLists(*RegInfo);
+ }
+}
+
+/// RemoveOperand - Erase an operand from an instruction, leaving it with one
+/// fewer operand than it started with.
+///
+void MachineInstr::RemoveOperand(unsigned OpNo) {
+ assert(OpNo < Operands.size() && "Invalid operand number");
+
+ // Special case removing the last one.
+ if (OpNo == Operands.size()-1) {
+ // If needed, remove from the reg def/use list.
+ if (Operands.back().isReg() && Operands.back().isOnRegUseList())
+ Operands.back().RemoveRegOperandFromRegInfo();
+
+ Operands.pop_back();
+ return;
+ }
+
+ // Otherwise, we are removing an interior operand. If we have reginfo to
+ // update, remove all operands that will be shifted down from their reg lists,
+ // move everything down, then re-add them.
+ MachineRegisterInfo *RegInfo = getRegInfo();
+ if (RegInfo) {
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].RemoveRegOperandFromRegInfo();
+ }
+ }
+
+ Operands.erase(Operands.begin()+OpNo);
+
+ if (RegInfo) {
+ for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+ if (Operands[i].isReg())
+ Operands[i].AddRegOperandToRegInfo(RegInfo);
+ }
+ }
+}
+
+/// addMemOperand - Add a MachineMemOperand to the machine instruction,
+/// referencing arbitrary storage.
+void MachineInstr::addMemOperand(MachineFunction &MF,
+ const MachineMemOperand &MO) {
+ MemOperands.push_back(MO);
+}
+
+/// clearMemOperands - Erase all of this MachineInstr's MachineMemOperands.
+void MachineInstr::clearMemOperands(MachineFunction &MF) {
+ MemOperands.clear();
+}
+
+
+/// removeFromParent - This method unlinks 'this' from the containing basic
+/// block, and returns it, but does not delete it.
+MachineInstr *MachineInstr::removeFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->remove(this);
+ return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing basic
+/// block, and deletes it.
+void MachineInstr::eraseFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase(this);
+}
+
+
+/// OperandComplete - Return true if it's illegal to add a new operand
+///
+bool MachineInstr::OperandsComplete() const {
+ unsigned short NumOperands = TID->getNumOperands();
+ if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
+ return true; // Broken: we have all the operands of this instruction!
+ return false;
+}
+
+/// getNumExplicitOperands - Returns the number of non-implicit operands.
+///
+unsigned MachineInstr::getNumExplicitOperands() const {
+ unsigned NumOperands = TID->getNumOperands();
+ if (!TID->isVariadic())
+ return NumOperands;
+
+ for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit())
+ NumOperands++;
+ }
+ return NumOperands;
+}
+
+
+/// isLabel - Returns true if the MachineInstr represents a label.
+///
+bool MachineInstr::isLabel() const {
+ return getOpcode() == TargetInstrInfo::DBG_LABEL ||
+ getOpcode() == TargetInstrInfo::EH_LABEL ||
+ getOpcode() == TargetInstrInfo::GC_LABEL;
+}
+
+/// isDebugLabel - Returns true if the MachineInstr represents a debug label.
+///
+bool MachineInstr::isDebugLabel() const {
+ return getOpcode() == TargetInstrInfo::DBG_LABEL;
+}
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightening
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
+ const TargetRegisterInfo *TRI) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TRI &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg)))
+ if (!isKill || MO.isKill())
+ return i;
+ }
+ return -1;
+}
+
+/// findRegisterDefOperandIdx() - Returns the operand index that is a def of
+/// the specified register or -1 if it is not found. If isDead is true, defs
+/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
+/// also checks if there is a def of a super-register.
+int MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead,
+ const TargetRegisterInfo *TRI) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg == Reg ||
+ (TRI &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg)))
+ if (!isDead || MO.isDead())
+ return i;
+ }
+ return -1;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+ const TargetInstrDesc &TID = getDesc();
+ if (TID.isPredicable()) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (TID.OpInfo[i].isPredicate())
+ return i;
+ }
+
+ return -1;
+}
+
+/// isRegTiedToUseOperand - Given the index of a register def operand,
+/// check if the register def is tied to a source operand, due to either
+/// two-address elimination or inline assembly constraints. Returns the
+/// first tied use operand index by reference is UseOpIdx is not null.
+bool MachineInstr::
+isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
+ if (getOpcode() == TargetInstrInfo::INLINEASM) {
+ assert(DefOpIdx >= 2);
+ const MachineOperand &MO = getOperand(DefOpIdx);
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ return false;
+ // Determine the actual operand no corresponding to this index.
+ unsigned DefNo = 0;
+ for (unsigned i = 1, e = getNumOperands(); i < e; ) {
+ const MachineOperand &FMO = getOperand(i);
+ assert(FMO.isImm());
+ // Skip over this def.
+ i += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
+ if (i > DefOpIdx)
+ break;
+ ++DefNo;
+ }
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &FMO = getOperand(i);
+ if (!FMO.isImm())
+ continue;
+ if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
+ continue;
+ unsigned Idx;
+ if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) &&
+ Idx == DefNo) {
+ if (UseOpIdx)
+ *UseOpIdx = (unsigned)i + 1;
+ return true;
+ }
+ }
+ }
+
+ assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
+ const TargetInstrDesc &TID = getDesc();
+ for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse() &&
+ TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) {
+ if (UseOpIdx)
+ *UseOpIdx = (unsigned)i;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// isRegTiedToDefOperand - Return true if the operand of the specified index
+/// is a register use and it is tied to an def operand. It also returns the def
+/// operand index by reference.
+bool MachineInstr::
+isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
+ if (getOpcode() == TargetInstrInfo::INLINEASM) {
+ const MachineOperand &MO = getOperand(UseOpIdx);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
+ return false;
+ assert(UseOpIdx > 0);
+ const MachineOperand &UFMO = getOperand(UseOpIdx-1);
+ if (!UFMO.isImm())
+ return false; // Must be physreg uses.
+ unsigned DefNo;
+ if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
+ if (!DefOpIdx)
+ return true;
+
+ unsigned DefIdx = 1;
+ // Remember to adjust the index. First operand is asm string, then there
+ // is a flag for each.
+ while (DefNo) {
+ const MachineOperand &FMO = getOperand(DefIdx);
+ assert(FMO.isImm());
+ // Skip over this def.
+ DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
+ --DefNo;
+ }
+ *DefOpIdx = DefIdx+1;
+ return true;
+ }
+ return false;
+ }
+
+ const TargetInstrDesc &TID = getDesc();
+ if (UseOpIdx >= TID.getNumOperands())
+ return false;
+ const MachineOperand &MO = getOperand(UseOpIdx);
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+ int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO);
+ if (DefIdx == -1)
+ return false;
+ if (DefOpIdx)
+ *DefOpIdx = (unsigned)DefIdx;
+ return true;
+}
+
+/// copyKillDeadInfo - Copies kill / dead operand properties from MI.
+///
+void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || (!MO.isKill() && !MO.isDead()))
+ continue;
+ for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) {
+ MachineOperand &MOp = getOperand(j);
+ if (!MOp.isIdenticalTo(MO))
+ continue;
+ if (MO.isKill())
+ MOp.setIsKill();
+ else
+ MOp.setIsDead();
+ break;
+ }
+ }
+}
+
+/// copyPredicates - Copies predicate operand(s) from MI.
+void MachineInstr::copyPredicates(const MachineInstr *MI) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isPredicable())
+ return;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (TID.OpInfo[i].isPredicate()) {
+ // Predicated operands must be last operands.
+ addOperand(MI->getOperand(i));
+ }
+ }
+}
+
+/// isSafeToMove - Return true if it is safe to move this instruction. If
+/// SawStore is set to true, it means that there is a store (or call) between
+/// the instruction's location and its intended destination.
+bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
+ bool &SawStore) const {
+ // Ignore stuff that we obviously can't move.
+ if (TID->mayStore() || TID->isCall()) {
+ SawStore = true;
+ return false;
+ }
+ if (TID->isTerminator() || TID->hasUnmodeledSideEffects())
+ return false;
+
+ // See if this instruction does a load. If so, we have to guarantee that the
+ // loaded value doesn't change between the load and the its intended
+ // destination. The check for isInvariantLoad gives the targe the chance to
+ // classify the load as always returning a constant, e.g. a constant pool
+ // load.
+ if (TID->mayLoad() && !TII->isInvariantLoad(this))
+ // Otherwise, this is a real load. If there is a store between the load and
+ // end of block, or if the laod is volatile, we can't move it.
+ return !SawStore && !hasVolatileMemoryRef();
+
+ return true;
+}
+
+/// isSafeToReMat - Return true if it's safe to rematerialize the specified
+/// instruction which defined the specified register instead of copying it.
+bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
+ unsigned DstReg) const {
+ bool SawStore = false;
+ if (!getDesc().isRematerializable() ||
+ !TII->isTriviallyReMaterializable(this) ||
+ !isSafeToMove(TII, SawStore))
+ return false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg())
+ continue;
+ // FIXME: For now, do not remat any instruction with register operands.
+ // Later on, we can loosen the restriction is the register operands have
+ // not been modified between the def and use. Note, this is different from
+ // MachineSink because the code is no longer in two-address form (at least
+ // partially).
+ if (MO.isUse())
+ return false;
+ else if (!MO.isDead() && MO.getReg() != DstReg)
+ return false;
+ }
+ return true;
+}
+
+/// hasVolatileMemoryRef - Return true if this instruction may have a
+/// volatile memory reference, or if the information describing the
+/// memory reference is not available. Return false if it is known to
+/// have no volatile memory references.
+bool MachineInstr::hasVolatileMemoryRef() const {
+ // An instruction known never to access memory won't have a volatile access.
+ if (!TID->mayStore() &&
+ !TID->mayLoad() &&
+ !TID->isCall() &&
+ !TID->hasUnmodeledSideEffects())
+ return false;
+
+ // Otherwise, if the instruction has no memory reference information,
+ // conservatively assume it wasn't preserved.
+ if (memoperands_empty())
+ return true;
+
+ // Check the memory reference information for volatile references.
+ for (std::list<MachineMemOperand>::const_iterator I = memoperands_begin(),
+ E = memoperands_end(); I != E; ++I)
+ if (I->isVolatile())
+ return true;
+
+ return false;
+}
+
+void MachineInstr::dump() const {
+ cerr << " " << *this;
+}
+
+void MachineInstr::print(std::ostream &OS, const TargetMachine *TM) const {
+ raw_os_ostream RawOS(OS);
+ print(RawOS, TM);
+}
+
+void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
+ // Specialize printing if op#0 is definition
+ unsigned StartOp = 0;
+ if (getNumOperands() && getOperand(0).isReg() && getOperand(0).isDef()) {
+ getOperand(0).print(OS, TM);
+ OS << " = ";
+ ++StartOp; // Don't print this operand again!
+ }
+
+ OS << getDesc().getName();
+
+ for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+ if (i != StartOp)
+ OS << ",";
+ OS << " ";
+ getOperand(i).print(OS, TM);
+ }
+
+ if (!memoperands_empty()) {
+ OS << ", Mem:";
+ for (std::list<MachineMemOperand>::const_iterator i = memoperands_begin(),
+ e = memoperands_end(); i != e; ++i) {
+ const MachineMemOperand &MRO = *i;
+ const Value *V = MRO.getValue();
+
+ assert((MRO.isLoad() || MRO.isStore()) &&
+ "SV has to be a load, store or both.");
+
+ if (MRO.isVolatile())
+ OS << "Volatile ";
+
+ if (MRO.isLoad())
+ OS << "LD";
+ if (MRO.isStore())
+ OS << "ST";
+
+ OS << "(" << MRO.getSize() << "," << MRO.getAlignment() << ") [";
+
+ if (!V)
+ OS << "<unknown>";
+ else if (!V->getName().empty())
+ OS << V->getName();
+ else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ PSV->print(OS);
+ } else
+ OS << V;
+
+ OS << " + " << MRO.getOffset() << "]";
+ }
+ }
+
+ if (!debugLoc.isUnknown()) {
+ const MachineFunction *MF = getParent()->getParent();
+ DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
+ DICompileUnit CU(DLT.CompileUnit);
+ std::string Dir, Fn;
+ OS << " [dbg: "
+ << CU.getDirectory(Dir) << '/' << CU.getFilename(Fn) << ","
+ << DLT.Line << ","
+ << DLT.Col << "]";
+ }
+
+ OS << "\n";
+}
+
+bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ if (!Found) {
+ if (MO.isKill())
+ // The register is already marked kill.
+ return true;
+ MO.setIsKill();
+ Found = true;
+ }
+ } else if (hasAliases && MO.isKill() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // A super-register kill already exists.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded kill operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsKill(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is killed. Add a
+ // new implicit operand if required.
+ if (!Found && AddIfNotFound) {
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ false /*IsDef*/,
+ true /*IsImp*/,
+ true /*IsKill*/));
+ return true;
+ }
+ return Found;
+}
+
+bool MachineInstr::addRegisterDead(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ if (!Found) {
+ if (MO.isDead())
+ // The register is already marked dead.
+ return true;
+ MO.setIsDead();
+ Found = true;
+ }
+ } else if (hasAliases && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // There exists a super-register that's marked dead.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->getSubRegisters(IncomingReg) &&
+ RegInfo->getSuperRegisters(Reg) &&
+ RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded dead operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsDead(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is dead. Add a
+ // new implicit operand if required.
+ if (!Found && AddIfNotFound) {
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ true /*IsDef*/,
+ true /*IsImp*/,
+ false /*IsKill*/,
+ true /*IsDead*/));
+ return true;
+ }
+ return Found;
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
new file mode 100644
index 0000000..aaa4de4
--- /dev/null
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -0,0 +1,406 @@
+//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion on machine instructions. We
+// attempt to remove as much code from the body of a loop as possible.
+//
+// This pass does not attempt to throttle itself to limit register pressure.
+// The register allocation phases are expected to perform rematerialization
+// to recover when register pressure is high.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-licm"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
+STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed");
+
+namespace {
+ class VISIBILITY_HIDDEN MachineLICM : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+
+ // Various analyses that we use...
+ MachineLoopInfo *LI; // Current MachineLoopInfo
+ MachineDominatorTree *DT; // Machine dominator tree for the cur loop
+ MachineRegisterInfo *RegInfo; // Machine register information
+
+ // State that is updated as we process loops
+ bool Changed; // True if a loop is changed.
+ MachineLoop *CurLoop; // The current loop we are working on.
+ MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
+
+ // For each BB and opcode pair, keep a list of hoisted instructions.
+ DenseMap<std::pair<unsigned, unsigned>,
+ std::vector<const MachineInstr*> > CSEMap;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineLICM() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Machine Instruction LICM"; }
+
+ // FIXME: Loop preheaders?
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ CSEMap.clear();
+ }
+
+ private:
+ /// IsLoopInvariantInst - Returns true if the instruction is loop
+ /// invariant. I.e., all virtual register operands are defined outside of
+ /// the loop, physical registers aren't accessed (explicitly or implicitly),
+ /// and the instruction is hoistable.
+ ///
+ bool IsLoopInvariantInst(MachineInstr &I);
+
+ /// IsProfitableToHoist - Return true if it is potentially profitable to
+ /// hoist the given loop invariant.
+ bool IsProfitableToHoist(MachineInstr &MI);
+
+ /// HoistRegion - Walk the specified region of the CFG (defined by all
+ /// blocks dominated by the specified block, and that are in the current
+ /// loop) in depth first order w.r.t the DominatorTree. This allows us to
+ /// visit definitions before uses, allowing us to hoist a loop body in one
+ /// pass without iteration.
+ ///
+ void HoistRegion(MachineDomTreeNode *N);
+
+ /// Hoist - When an instruction is found to only use loop invariant operands
+ /// that is safe to hoist, this instruction is called to do the dirty work.
+ ///
+ void Hoist(MachineInstr &MI);
+ };
+} // end anonymous namespace
+
+char MachineLICM::ID = 0;
+static RegisterPass<MachineLICM>
+X("machinelicm", "Machine Loop Invariant Code Motion");
+
+FunctionPass *llvm::createMachineLICMPass() { return new MachineLICM(); }
+
+/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most
+/// loop that has a preheader.
+static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
+ for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
+ if (L->getLoopPreheader())
+ return false;
+ return true;
+}
+
+/// Hoist expressions out of the specified loop. Note, alias info for inner loop
+/// is not preserved so it is not a good idea to run LICM multiple times on one
+/// loop.
+///
+bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ return false;
+
+ DOUT << "******** Machine LICM ********\n";
+
+ Changed = false;
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ RegInfo = &MF.getRegInfo();
+
+ // Get our Loop information...
+ LI = &getAnalysis<MachineLoopInfo>();
+ DT = &getAnalysis<MachineDominatorTree>();
+
+ for (MachineLoopInfo::iterator
+ I = LI->begin(), E = LI->end(); I != E; ++I) {
+ CurLoop = *I;
+
+ // Only visit outer-most preheader-sporting loops.
+ if (!LoopIsOuterMostWithPreheader(CurLoop))
+ continue;
+
+ // Determine the block to which to hoist instructions. If we can't find a
+ // suitable loop preheader, we can't do any hoisting.
+ //
+ // FIXME: We are only hoisting if the basic block coming into this loop
+ // has only one successor. This isn't the case in general because we haven't
+ // broken critical edges or added preheaders.
+ CurPreheader = CurLoop->getLoopPreheader();
+ if (!CurPreheader)
+ continue;
+
+ HoistRegion(DT->getNode(CurLoop->getHeader()));
+ }
+
+ return Changed;
+}
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree. This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
+ assert(N != 0 && "Null dominator tree node?");
+ MachineBasicBlock *BB = N->getBlock();
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB)) return;
+
+ for (MachineBasicBlock::iterator
+ MII = BB->begin(), E = BB->end(); MII != E; ) {
+ MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+ MachineInstr &MI = *MII;
+
+ Hoist(MI);
+
+ MII = NextMII;
+ }
+
+ const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
+
+ for (unsigned I = 0, E = Children.size(); I != E; ++I)
+ HoistRegion(Children[I]);
+}
+
+/// IsLoopInvariantInst - Returns true if the instruction is loop
+/// invariant. I.e., all virtual register operands are defined outside of the
+/// loop, physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+///
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+ const TargetInstrDesc &TID = I.getDesc();
+
+ // Ignore stuff that we obviously can't hoist.
+ if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+ TID.hasUnmodeledSideEffects())
+ return false;
+
+ if (TID.mayLoad()) {
+ // Okay, this instruction does a load. As a refinement, we allow the target
+ // to decide whether the loaded value is actually a constant. If so, we can
+ // actually use it as a load.
+ if (!TII->isInvariantLoad(&I))
+ // FIXME: we should be able to sink loads with no other side effects if
+ // there is nothing that can change memory from here until the end of
+ // block. This is a trivial form of alias analysis.
+ return false;
+ }
+
+ DEBUG({
+ DOUT << "--- Checking if we can hoist " << I;
+ if (I.getDesc().getImplicitUses()) {
+ DOUT << " * Instruction has implicit uses:\n";
+
+ const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ for (const unsigned *ImpUses = I.getDesc().getImplicitUses();
+ *ImpUses; ++ImpUses)
+ DOUT << " -> " << TRI->getName(*ImpUses) << "\n";
+ }
+
+ if (I.getDesc().getImplicitDefs()) {
+ DOUT << " * Instruction has implicit defines:\n";
+
+ const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs();
+ *ImpDefs; ++ImpDefs)
+ DOUT << " -> " << TRI->getName(*ImpDefs) << "\n";
+ }
+ });
+
+ if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) {
+ DOUT << "Cannot hoist with implicit defines or uses\n";
+ return false;
+ }
+
+ // The instruction is loop invariant if all of its operands are.
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I.getOperand(i);
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ // Don't hoist an instruction that uses or defines a physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+
+ if (!MO.isUse())
+ continue;
+
+ assert(RegInfo->getVRegDef(Reg) &&
+ "Machine instr not mapped for this vreg?!");
+
+ // If the loop contains the definition of an operand, then the instruction
+ // isn't loop invariant.
+ if (CurLoop->contains(RegInfo->getVRegDef(Reg)->getParent()))
+ return false;
+ }
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+
+/// HasPHIUses - Return true if the specified register has any PHI use.
+static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
+ for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg),
+ UE = RegInfo->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI->getOpcode() == TargetInstrInfo::PHI)
+ return true;
+ }
+ return false;
+}
+
+/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
+/// the given loop invariant.
+bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
+ if (MI.getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+ return false;
+
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // FIXME: For now, only hoist re-materilizable instructions. LICM will
+ // increase register pressure. We want to make sure it doesn't increase
+ // spilling.
+ if (!TID.mayLoad() && (!TID.isRematerializable() ||
+ !TII->isTriviallyReMaterializable(&MI)))
+ return false;
+
+ // If result(s) of this instruction is used by PHIs, then don't hoist it.
+ // The presence of joins makes it difficult for current register allocator
+ // implementation to perform remat.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ if (HasPHIUses(MO.getReg(), RegInfo))
+ return false;
+ }
+
+ return true;
+}
+
+static const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs,
+ MachineRegisterInfo *RegInfo) {
+ unsigned NumOps = MI->getNumOperands();
+ for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
+ const MachineInstr *PrevMI = PrevMIs[i];
+ unsigned NumOps2 = PrevMI->getNumOperands();
+ if (NumOps != NumOps2)
+ continue;
+ bool IsSame = true;
+ for (unsigned j = 0; j != NumOps; ++j) {
+ const MachineOperand &MO = MI->getOperand(j);
+ if (MO.isReg() && MO.isDef()) {
+ if (RegInfo->getRegClass(MO.getReg()) !=
+ RegInfo->getRegClass(PrevMI->getOperand(j).getReg())) {
+ IsSame = false;
+ break;
+ }
+ continue;
+ }
+ if (!MO.isIdenticalTo(PrevMI->getOperand(j))) {
+ IsSame = false;
+ break;
+ }
+ }
+ if (IsSame)
+ return PrevMI;
+ }
+ return 0;
+}
+
+/// Hoist - When an instruction is found to use only loop invariant operands
+/// that are safe to hoist, this instruction is called to do the dirty work.
+///
+void MachineLICM::Hoist(MachineInstr &MI) {
+ if (!IsLoopInvariantInst(MI)) return;
+ if (!IsProfitableToHoist(MI)) return;
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ DEBUG({
+ DOUT << "Hoisting " << MI;
+ if (CurPreheader->getBasicBlock())
+ DOUT << " to MachineBasicBlock "
+ << CurPreheader->getBasicBlock()->getName();
+ if (MI.getParent()->getBasicBlock())
+ DOUT << " from MachineBasicBlock "
+ << MI.getParent()->getBasicBlock()->getName();
+ DOUT << "\n";
+ });
+
+ // Look for opportunity to CSE the hoisted instruction.
+ std::pair<unsigned, unsigned> BBOpcPair =
+ std::make_pair(CurPreheader->getNumber(), MI.getOpcode());
+ DenseMap<std::pair<unsigned, unsigned>,
+ std::vector<const MachineInstr*> >::iterator CI = CSEMap.find(BBOpcPair);
+ bool DoneCSE = false;
+ if (CI != CSEMap.end()) {
+ const MachineInstr *Dup = LookForDuplicate(&MI, CI->second, RegInfo);
+ if (Dup) {
+ DOUT << "CSEing " << MI;
+ DOUT << " with " << *Dup;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && MO.isDef())
+ RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+ }
+ MI.eraseFromParent();
+ DoneCSE = true;
+ ++NumCSEed;
+ }
+ }
+
+ // Otherwise, splice the instruction to the preheader.
+ if (!DoneCSE) {
+ CurPreheader->splice(CurPreheader->getFirstTerminator(),
+ MI.getParent(), &MI);
+ // Add to the CSE map.
+ if (CI != CSEMap.end())
+ CI->second.push_back(&MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(&MI);
+ CSEMap.insert(std::make_pair(BBOpcPair, CSEMIs));
+ }
+ }
+
+ ++NumHoisted;
+ Changed = true;
+}
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
new file mode 100644
index 0000000..68ddb7b
--- /dev/null
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -0,0 +1,40 @@
+//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineLoopInfo class that is used to identify natural
+// loops and determine the loop depth of various nodes of the CFG. Note that
+// the loops identified may actually be several natural loops that share the
+// same header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+using namespace llvm;
+
+TEMPLATE_INSTANTIATION(class LoopBase<MachineBasicBlock>);
+TEMPLATE_INSTANTIATION(class LoopInfoBase<MachineBasicBlock>);
+
+char MachineLoopInfo::ID = 0;
+static RegisterPass<MachineLoopInfo>
+X("machine-loops", "Machine Natural Loop Construction", true);
+
+const PassInfo *const llvm::MachineLoopInfoID = &X;
+
+bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ LI->Calculate(getAnalysis<MachineDominatorTree>().getBase()); // Update
+ return false;
+}
+
+void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 0000000..1d8109e
--- /dev/null
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,368 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+#include "llvm/Constants.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Streams.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+static RegisterPass<MachineModuleInfo>
+X("machinemoduleinfo", "Module Information");
+char MachineModuleInfo::ID = 0;
+
+//===----------------------------------------------------------------------===//
+
+MachineModuleInfo::MachineModuleInfo()
+: ImmutablePass(&ID)
+, LabelIDList()
+, FrameMoves()
+, LandingPads()
+, Personalities()
+, CallsEHReturn(0)
+, CallsUnwindInit(0)
+, DbgInfoAvailable(false)
+{
+ // Always emit "no personality" info
+ Personalities.push_back(NULL);
+}
+MachineModuleInfo::~MachineModuleInfo() {
+
+}
+
+/// doInitialization - Initialize the state for a new module.
+///
+bool MachineModuleInfo::doInitialization() {
+ return false;
+}
+
+/// doFinalization - Tear down the state after completion of a module.
+///
+bool MachineModuleInfo::doFinalization() {
+ return false;
+}
+
+/// BeginFunction - Begin gathering function meta information.
+///
+void MachineModuleInfo::BeginFunction(MachineFunction *MF) {
+ // Coming soon.
+}
+
+/// EndFunction - Discard function meta information.
+///
+void MachineModuleInfo::EndFunction() {
+ // Clean up frame info.
+ FrameMoves.clear();
+
+ // Clean up exception info.
+ LandingPads.clear();
+ TypeInfos.clear();
+ FilterIds.clear();
+ FilterEnds.clear();
+ CallsEHReturn = 0;
+ CallsUnwindInit = 0;
+}
+
+/// AnalyzeModule - Scan the module for global debug information.
+///
+void MachineModuleInfo::AnalyzeModule(Module &M) {
+ // Insert functions in the llvm.used array into UsedFunctions.
+ GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InitList->getOperand(i)))
+ if (CE->getOpcode() == Instruction::BitCast)
+ if (Function *F = dyn_cast<Function>(CE->getOperand(0)))
+ UsedFunctions.insert(F);
+ }
+}
+
+//===-EH-------------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+ (MachineBasicBlock *LandingPad) {
+ unsigned N = LandingPads.size();
+ for (unsigned i = 0; i < N; ++i) {
+ LandingPadInfo &LP = LandingPads[i];
+ if (LP.LandingPadBlock == LandingPad)
+ return LP;
+ }
+
+ LandingPads.push_back(LandingPadInfo(LandingPad));
+ return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+ unsigned BeginLabel, unsigned EndLabel) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.BeginLabels.push_back(BeginLabel);
+ LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+ unsigned LandingPadLabel = NextLabelID();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.LandingPadLabel = LandingPadLabel;
+ return LandingPadLabel;
+}
+
+/// addPersonality - Provide the personality function for the exception
+/// information.
+void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
+ Function *Personality) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.Personality = Personality;
+
+ for (unsigned i = 0; i < Personalities.size(); ++i)
+ if (Personalities[i] == Personality)
+ return;
+
+ Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
+ std::vector<GlobalVariable *> &TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ for (unsigned N = TyInfo.size(); N; --N)
+ LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
+ std::vector<GlobalVariable *> &TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ std::vector<unsigned> IdsInFilter(TyInfo.size());
+ for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+ IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+ LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// addCleanup - Add a cleanup action for a landing pad.
+///
+void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.TypeIds.push_back(0);
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads() {
+ for (unsigned i = 0; i != LandingPads.size(); ) {
+ LandingPadInfo &LandingPad = LandingPads[i];
+ LandingPad.LandingPadLabel = MappedLabel(LandingPad.LandingPadLabel);
+
+ // Special case: we *should* emit LPs with null LP MBB. This indicates
+ // "nounwind" case.
+ if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ for (unsigned j=0; j != LandingPads[i].BeginLabels.size(); ) {
+ unsigned BeginLabel = MappedLabel(LandingPad.BeginLabels[j]);
+ unsigned EndLabel = MappedLabel(LandingPad.EndLabels[j]);
+
+ if (!BeginLabel || !EndLabel) {
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ continue;
+ }
+
+ LandingPad.BeginLabels[j] = BeginLabel;
+ LandingPad.EndLabels[j] = EndLabel;
+ ++j;
+ }
+
+ // Remove landing pads with no try-ranges.
+ if (LandingPads[i].BeginLabels.empty()) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ // If there is no landing pad, ensure that the list of typeids is empty.
+ // If the only typeid is a cleanup, this is the same as having no typeids.
+ if (!LandingPad.LandingPadBlock ||
+ (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+ LandingPad.TypeIds.clear();
+
+ ++i;
+ }
+}
+
+/// getTypeIDFor - Return the type id for the specified typeinfo. This is
+/// function wide.
+unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) {
+ for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+ if (TypeInfos[i] == TI) return i + 1;
+
+ TypeInfos.push_back(TI);
+ return TypeInfos.size();
+}
+
+/// getFilterIDFor - Return the filter id for the specified typeinfos. This is
+/// function wide.
+int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
+ // If the new filter coincides with the tail of an existing filter, then
+ // re-use the existing filter. Folding filters more than this requires
+ // re-ordering filters and/or their elements - probably not worth it.
+ for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+ E = FilterEnds.end(); I != E; ++I) {
+ unsigned i = *I, j = TyIds.size();
+
+ while (i && j)
+ if (FilterIds[--i] != TyIds[--j])
+ goto try_next;
+
+ if (!j)
+ // The new filter coincides with range [i, end) of the existing filter.
+ return -(1 + i);
+
+try_next:;
+ }
+
+ // Add the new filter.
+ int FilterID = -(1 + FilterIds.size());
+ FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+ for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
+ FilterIds.push_back(TyIds[I]);
+ FilterEnds.push_back(FilterIds.size());
+ FilterIds.push_back(0); // terminator
+ return FilterID;
+}
+
+/// getPersonality - Return the personality function for the current function.
+Function *MachineModuleInfo::getPersonality() const {
+ // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
+ // function
+ return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+}
+
+/// getPersonalityIndex - Return unique index for current personality
+/// function. NULL personality function should always get zero index.
+unsigned MachineModuleInfo::getPersonalityIndex() const {
+ const Function* Personality = NULL;
+
+ // Scan landing pads. If there is at least one non-NULL personality - use it.
+ for (unsigned i = 0; i != LandingPads.size(); ++i)
+ if (LandingPads[i].Personality) {
+ Personality = LandingPads[i].Personality;
+ break;
+ }
+
+ for (unsigned i = 0; i < Personalities.size(); ++i) {
+ if (Personalities[i] == Personality)
+ return i;
+ }
+
+ // This should never happen
+ assert(0 && "Personality function should be set!");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+/// DebugLabelFolding pass - This pass prunes out redundant labels. This allows
+/// a info consumer to determine if the range of two labels is empty, by seeing
+/// if the labels map to the same reduced label.
+
+namespace llvm {
+
+struct DebugLabelFolder : public MachineFunctionPass {
+ static char ID;
+ DebugLabelFolder() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Label Folder"; }
+};
+
+char DebugLabelFolder::ID = 0;
+
+bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
+ // Get machine module info.
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ if (!MMI) return false;
+
+ // Track if change is made.
+ bool MadeChange = false;
+ // No prior label to begin.
+ unsigned PriorLabel = 0;
+
+ // Iterate through basic blocks.
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+ BB != E; ++BB) {
+ // Iterate through instructions.
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ // Is it a label.
+ if (I->isDebugLabel() && !MMI->isDbgLabelUsed(I->getOperand(0).getImm())){
+ // The label ID # is always operand #0, an immediate.
+ unsigned NextLabel = I->getOperand(0).getImm();
+
+ // If there was an immediate prior label.
+ if (PriorLabel) {
+ // Remap the current label to prior label.
+ MMI->RemapLabel(NextLabel, PriorLabel);
+ // Delete the current label.
+ I = BB->erase(I);
+ // Indicate a change has been made.
+ MadeChange = true;
+ continue;
+ } else {
+ // Start a new round.
+ PriorLabel = NextLabel;
+ }
+ } else {
+ // No consecutive labels.
+ PriorLabel = 0;
+ }
+
+ ++I;
+ }
+ }
+
+ return MadeChange;
+}
+
+FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); }
+
+}
+
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
new file mode 100644
index 0000000..9f4ef12
--- /dev/null
+++ b/lib/CodeGen/MachinePassRegistry.cpp
@@ -0,0 +1,41 @@
+//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the machine function pass registry for register allocators
+// and instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+using namespace llvm;
+
+
+/// Add - Adds a function pass to the registration list.
+///
+void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
+ Node->setNext(List);
+ List = Node;
+ if (Listener) Listener->NotifyAdd(Node->getName(),
+ Node->getCtor(),
+ Node->getDescription());
+}
+
+
+/// Remove - Removes a function pass from the registration list.
+///
+void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
+ for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
+ if (*I == Node) {
+ if (Listener) Listener->NotifyRemove(Node->getName());
+ *I = (*I)->getNext();
+ break;
+ }
+ }
+}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
new file mode 100644
index 0000000..4f5ab1f
--- /dev/null
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -0,0 +1,125 @@
+//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MachineRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
+MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
+ VRegInfo.reserve(256);
+ RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1.
+ UsedPhysRegs.resize(TRI.getNumRegs());
+
+ // Create the physreg use/def lists.
+ PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
+ memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
+}
+
+MachineRegisterInfo::~MachineRegisterInfo() {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i)
+ assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?");
+ for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
+ assert(!PhysRegUseDefLists[i] &&
+ "PhysRegUseDefLists has entries after all instructions are deleted");
+#endif
+ delete [] PhysRegUseDefLists;
+}
+
+/// setRegClass - Set the register class of the specified virtual register.
+///
+void
+MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+ unsigned VR = Reg;
+ Reg -= TargetRegisterInfo::FirstVirtualRegister;
+ assert(Reg < VRegInfo.size() && "Invalid vreg!");
+ const TargetRegisterClass *OldRC = VRegInfo[Reg].first;
+ VRegInfo[Reg].first = RC;
+
+ // Remove from old register class's vregs list. This may be slow but
+ // fortunately this operation is rarely needed.
+ std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
+ std::vector<unsigned>::iterator I=std::find(VRegs.begin(), VRegs.end(), VR);
+ VRegs.erase(I);
+
+ // Add to new register class's vregs list.
+ RegClass2VRegMap[RC->getID()].push_back(VR);
+}
+
+/// createVirtualRegister - Create and return a new virtual register in the
+/// function with the specified register class.
+///
+unsigned
+MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
+ assert(RegClass && "Cannot create register without RegClass!");
+ // Add a reg, but keep track of whether the vector reallocated or not.
+ void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0];
+ VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0));
+
+ if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1)))
+ // The vector reallocated, handle this now.
+ HandleVRegListReallocation();
+ unsigned VR = getLastVirtReg();
+ RegClass2VRegMap[RegClass->getID()].push_back(VR);
+ return VR;
+}
+
+/// HandleVRegListReallocation - We just added a virtual register to the
+/// VRegInfo info list and it reallocated. Update the use/def lists info
+/// pointers.
+void MachineRegisterInfo::HandleVRegListReallocation() {
+ // The back pointers for the vreg lists point into the previous vector.
+ // Update them to point to their correct slots.
+ for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) {
+ MachineOperand *List = VRegInfo[i].second;
+ if (!List) continue;
+ // Update the back-pointer to be accurate once more.
+ List->Contents.Reg.Prev = &VRegInfo[i].second;
+ }
+}
+
+/// replaceRegWith - Replace all instances of FromReg with ToReg in the
+/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
+/// except that it also changes any definitions of the register as well.
+void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
+ assert(FromReg != ToReg && "Cannot replace a reg with itself");
+
+ // TODO: This could be more efficient by bulk changing the operands.
+ for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
+ MachineOperand &O = I.getOperand();
+ ++I;
+ O.setReg(ToReg);
+ }
+}
+
+
+/// getVRegDef - Return the machine instr that defines the specified virtual
+/// register or null if none is found. This assumes that the code is in SSA
+/// form, so there should only be one definition.
+MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
+ assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() &&
+ "Invalid vreg!");
+ for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) {
+ // Since we are in SSA form, we can stop at the first definition.
+ if (I.getOperand().isDef())
+ return &*I;
+ }
+ return 0;
+}
+
+
+#ifndef NDEBUG
+void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+ for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
+ I.getOperand().getParent()->dump();
+}
+#endif
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
new file mode 100644
index 0000000..0e18fa7
--- /dev/null
+++ b/lib/CodeGen/MachineSink.cpp
@@ -0,0 +1,257 @@
+//===-- MachineSink.cpp - Sinking for machine instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-sink"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+
+namespace {
+ class VISIBILITY_HIDDEN MachineSinking : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ MachineFunction *CurMF; // Current MachineFunction
+ MachineRegisterInfo *RegInfo; // Machine register information
+ MachineDominatorTree *DT; // Machine dominator tree for the current Loop
+
+ public:
+ static char ID; // Pass identification
+ MachineSinking() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ private:
+ bool ProcessBlock(MachineBasicBlock &MBB);
+ bool SinkInstruction(MachineInstr *MI, bool &SawStore);
+ bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const;
+ };
+} // end anonymous namespace
+
+char MachineSinking::ID = 0;
+static RegisterPass<MachineSinking>
+X("machine-sink", "Machine code sinking");
+
+FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified register
+/// occur in blocks dominated by the specified block.
+bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+ MachineBasicBlock *MBB) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Only makes sense for vregs");
+ for (MachineRegisterInfo::reg_iterator I = RegInfo->reg_begin(Reg),
+ E = RegInfo->reg_end(); I != E; ++I) {
+ if (I.getOperand().isDef()) continue; // ignore def.
+
+ // Determine the block of the use.
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseInst->getOpcode() == TargetInstrInfo::PHI) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+ }
+ // Check that it dominates.
+ if (!DT->dominates(MBB, UseBlock))
+ return false;
+ }
+ return true;
+}
+
+
+
+bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ DOUT << "******** Machine Sinking ********\n";
+
+ CurMF = &MF;
+ TM = &CurMF->getTarget();
+ TII = TM->getInstrInfo();
+ RegInfo = &CurMF->getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+
+ bool EverMadeChange = false;
+
+ while (1) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ for (MachineFunction::iterator I = CurMF->begin(), E = CurMF->end();
+ I != E; ++I)
+ MadeChange |= ProcessBlock(*I);
+
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+ return EverMadeChange;
+}
+
+bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+
+ bool MadeChange = false;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ MachineBasicBlock::iterator I = MBB.end();
+ --I;
+ bool ProcessedBegin, SawStore = false;
+ do {
+ MachineInstr *MI = I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == MBB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (SinkInstruction(MI, SawStore))
+ ++NumSunk, MadeChange = true;
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Check if it's safe to move the instruction.
+ if (!MI->isSafeToMove(TII, SawStore))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z only the shrink the live range of x.
+
+ // Loop over all the operands of the specified instruction. If there is
+ // anything we can't handle, bail out.
+ MachineBasicBlock *ParentBlock = MI->getParent();
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ MachineBasicBlock *SuccToSinkTo = 0;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue; // Ignore non-register operands.
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // If this is a physical register use, we can't move it. If it is a def,
+ // we can move it, but only if the def is dead.
+ if (MO.isUse() || !MO.isDead())
+ return false;
+ } else {
+ // Virtual register uses are always safe to sink.
+ if (MO.isUse()) continue;
+
+ // If it's not safe to move defs of the register class, then abort.
+ if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg)))
+ return false;
+
+ // FIXME: This picks a successor to sink into based on having one
+ // successor that dominates all the uses. However, there are cases where
+ // sinking can happen but where the sink point isn't a successor. For
+ // example:
+ // x = computation
+ // if () {} else {}
+ // use x
+ // the instruction could be sunk over the whole diamond for the
+ // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+ // after that.
+
+ // Virtual register defs can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ if (SuccToSinkTo) {
+ // If a previous operand picked a block to sink to, then this operand
+ // must be sinkable to the same block.
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo))
+ return false;
+ continue;
+ }
+
+ // Otherwise, we should look at all the successors and decide which one
+ // we should sink to.
+ for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
+ E = ParentBlock->succ_end(); SI != E; ++SI) {
+ if (AllUsesDominatedByBlock(Reg, *SI)) {
+ SuccToSinkTo = *SI;
+ break;
+ }
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (SuccToSinkTo == 0)
+ return false;
+ }
+ }
+
+ // If there are no outputs, it must have side-effects.
+ if (SuccToSinkTo == 0)
+ return false;
+
+ // It's not safe to sink instructions to EH landing pad. Control flow into
+ // landing pad is implicitly defined.
+ if (SuccToSinkTo->isLandingPad())
+ return false;
+
+ // If is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (MI->getParent() == SuccToSinkTo)
+ return false;
+
+ DEBUG(cerr << "Sink instr " << *MI);
+ DEBUG(cerr << "to block " << *SuccToSinkTo);
+
+ // If the block has multiple predecessors, this would introduce computation on
+ // a path that it doesn't already exist. We could split the critical edge,
+ // but for now we just punt.
+ // FIXME: Split critical edges if not backedges.
+ if (SuccToSinkTo->pred_size() > 1) {
+ DEBUG(cerr << " *** PUNTING: Critical edge found\n");
+ return false;
+ }
+
+ // Determine where to insert into. Skip phi nodes.
+ MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+ while (InsertPos != SuccToSinkTo->end() &&
+ InsertPos->getOpcode() == TargetInstrInfo::PHI)
+ ++InsertPos;
+
+ // Move the instruction.
+ SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
+ ++MachineBasicBlock::iterator(MI));
+ return true;
+}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
new file mode 100644
index 0000000..be1396c
--- /dev/null
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -0,0 +1,690 @@
+//===-- MachineVerifier.cpp - Machine Code Verifier -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to verify generated machine code. The following is checked:
+//
+// Operand counts: All explicit operands must be present.
+//
+// Register classes: All physical and virtual register operands must be
+// compatible with the register class required by the instruction descriptor.
+//
+// Register live intervals: Registers must be defined only once, and must be
+// defined before use.
+//
+// The machine code verifier is enabled from LLVMTargetMachine.cpp with the
+// command-line option -verify-machineinstrs, or by defining the environment
+// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive
+// the verifier errors.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <fstream>
+
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN MachineVerifier : public MachineFunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+
+ MachineVerifier(bool allowDoubleDefs = false) :
+ MachineFunctionPass(&ID),
+ allowVirtDoubleDefs(allowDoubleDefs),
+ allowPhysDoubleDefs(allowDoubleDefs),
+ OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+ {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const bool allowVirtDoubleDefs;
+ const bool allowPhysDoubleDefs;
+
+ const char *const OutFileName;
+ std::ostream *OS;
+ const MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+
+ unsigned foundErrors;
+
+ typedef SmallVector<unsigned, 16> RegVector;
+ typedef DenseSet<unsigned> RegSet;
+ typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+ BitVector regsReserved;
+ RegSet regsLive;
+ RegVector regsDefined, regsImpDefined, regsDead, regsKilled;
+
+ // Add Reg and any sub-registers to RV
+ void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+ RV.push_back(Reg);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+ RV.push_back(*R);
+ }
+
+ // Does RS contain any super-registers of Reg?
+ bool anySuperRegisters(const RegSet &RS, unsigned Reg) {
+ for (const unsigned *R = TRI->getSuperRegisters(Reg); *R; R++)
+ if (RS.count(*R))
+ return true;
+ return false;
+ }
+
+ struct BBInfo {
+ // Is this MBB reachable from the MF entry point?
+ bool reachable;
+
+ // Vregs that must be live in because they are used without being
+ // defined. Map value is the user.
+ RegMap vregsLiveIn;
+
+ // Vregs that must be dead in because they are defined without being
+ // killed first. Map value is the defining instruction.
+ RegMap vregsDeadIn;
+
+ // Regs killed in MBB. They may be defined again, and will then be in both
+ // regsKilled and regsLiveOut.
+ RegSet regsKilled;
+
+ // Regs defined in MBB and live out. Note that vregs passing through may
+ // be live out without being mentioned here.
+ RegSet regsLiveOut;
+
+ // Vregs that pass through MBB untouched. This set is disjoint from
+ // regsKilled and regsLiveOut.
+ RegSet vregsPassed;
+
+ BBInfo() : reachable(false) {}
+
+ // Add register to vregsPassed if it belongs there. Return true if
+ // anything changed.
+ bool addPassed(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
+ return false;
+ return vregsPassed.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addPassed(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addPassed(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Live-out registers are either in regsLiveOut or vregsPassed.
+ bool isLiveOut(unsigned Reg) const {
+ return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
+ }
+ };
+
+ // Extra register info per MBB.
+ DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
+
+ bool isReserved(unsigned Reg) {
+ return Reg < regsReserved.size() && regsReserved[Reg];
+ }
+
+ void visitMachineFunctionBefore();
+ void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+ void visitMachineInstrBefore(const MachineInstr *MI);
+ void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
+ void visitMachineInstrAfter(const MachineInstr *MI);
+ void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
+ void visitMachineFunctionAfter();
+
+ void report(const char *msg, const MachineFunction *MF);
+ void report(const char *msg, const MachineBasicBlock *MBB);
+ void report(const char *msg, const MachineInstr *MI);
+ void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+
+ void markReachable(const MachineBasicBlock *MBB);
+ void calcMaxRegsPassed();
+ void calcMinRegsPassed();
+ void checkPHIOps(const MachineBasicBlock *MBB);
+ };
+}
+
+char MachineVerifier::ID = 0;
+static RegisterPass<MachineVerifier>
+MachineVer("machineverifier", "Verify generated machine code");
+static const PassInfo *const MachineVerifyID = &MachineVer;
+
+FunctionPass *
+llvm::createMachineVerifierPass(bool allowPhysDoubleDefs)
+{
+ return new MachineVerifier(allowPhysDoubleDefs);
+}
+
+bool
+MachineVerifier::runOnMachineFunction(MachineFunction &MF)
+{
+ std::ofstream OutFile;
+ if (OutFileName) {
+ OutFile.open(OutFileName, std::ios::out | std::ios::app);
+ OS = &OutFile;
+ } else {
+ OS = cerr.stream();
+ }
+
+ foundErrors = 0;
+
+ this->MF = &MF;
+ TM = &MF.getTarget();
+ TRI = TM->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ visitMachineFunctionBefore();
+ for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
+ MFI!=MFE; ++MFI) {
+ visitMachineBasicBlockBefore(MFI);
+ for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
+ MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ visitMachineInstrBefore(MBBI);
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
+ visitMachineOperand(&MBBI->getOperand(I), I);
+ visitMachineInstrAfter(MBBI);
+ }
+ visitMachineBasicBlockAfter(MFI);
+ }
+ visitMachineFunctionAfter();
+
+ if (OutFileName)
+ OutFile.close();
+
+ if (foundErrors) {
+ cerr << "\nStopping with " << foundErrors << " machine code errors.\n";
+ exit(1);
+ }
+
+ return false; // no changes
+}
+
+void
+MachineVerifier::report(const char *msg, const MachineFunction *MF)
+{
+ assert(MF);
+ *OS << "\n";
+ if (!foundErrors++)
+ MF->print(OS);
+ *OS << "*** Bad machine code: " << msg << " ***\n"
+ << "- function: " << MF->getFunction()->getName() << "\n";
+}
+
+void
+MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB)
+{
+ assert(MBB);
+ report(msg, MBB->getParent());
+ *OS << "- basic block: " << MBB->getBasicBlock()->getName()
+ << " " << (void*)MBB
+ << " (#" << MBB->getNumber() << ")\n";
+}
+
+void
+MachineVerifier::report(const char *msg, const MachineInstr *MI)
+{
+ assert(MI);
+ report(msg, MI->getParent());
+ *OS << "- instruction: ";
+ MI->print(OS, TM);
+}
+
+void
+MachineVerifier::report(const char *msg,
+ const MachineOperand *MO, unsigned MONum)
+{
+ assert(MO);
+ report(msg, MO->getParent());
+ *OS << "- operand " << MONum << ": ";
+ MO->print(*OS, TM);
+ *OS << "\n";
+}
+
+void
+MachineVerifier::markReachable(const MachineBasicBlock *MBB)
+{
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ if (!MInfo.reachable) {
+ MInfo.reachable = true;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI)
+ markReachable(*SuI);
+ }
+}
+
+void
+MachineVerifier::visitMachineFunctionBefore()
+{
+ regsReserved = TRI->getReservedRegs(*MF);
+ markReachable(&MF->front());
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
+{
+ regsLive.clear();
+ for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ regsLive.insert(*I);
+ for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
+ regsLive.insert(*R);
+ }
+ regsKilled.clear();
+ regsDefined.clear();
+ regsImpDefined.clear();
+}
+
+void
+MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI)
+{
+ const TargetInstrDesc &TI = MI->getDesc();
+ if (MI->getNumExplicitOperands() < TI.getNumOperands()) {
+ report("Too few operands", MI);
+ *OS << TI.getNumOperands() << " operands expected, but "
+ << MI->getNumExplicitOperands() << " given.\n";
+ }
+ if (!TI.isVariadic()) {
+ if (MI->getNumExplicitOperands() > TI.getNumOperands()) {
+ report("Too many operands", MI);
+ *OS << TI.getNumOperands() << " operands expected, but "
+ << MI->getNumExplicitOperands() << " given.\n";
+ }
+ }
+}
+
+void
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum)
+{
+ const MachineInstr *MI = MO->getParent();
+ const TargetInstrDesc &TI = MI->getDesc();
+
+ // The first TI.NumDefs operands must be explicit register defines
+ if (MONum < TI.getNumDefs()) {
+ if (!MO->isReg())
+ report("Explicit definition must be a register", MO, MONum);
+ else if (!MO->isDef())
+ report("Explicit definition marked as use", MO, MONum);
+ else if (MO->isImplicit())
+ report("Explicit definition marked as implicit", MO, MONum);
+ }
+
+ switch (MO->getType()) {
+ case MachineOperand::MO_Register: {
+ const unsigned Reg = MO->getReg();
+ if (!Reg)
+ return;
+
+ // Check Live Variables.
+ if (MO->isUse()) {
+ if (MO->isKill()) {
+ addRegWithSubRegs(regsKilled, Reg);
+ } else {
+ // TwoAddress instr modyfying a reg is treated as kill+def.
+ unsigned defIdx;
+ if (MI->isRegTiedToDefOperand(MONum, &defIdx) &&
+ MI->getOperand(defIdx).getReg() == Reg)
+ addRegWithSubRegs(regsKilled, Reg);
+ }
+ // Explicit use of a dead register.
+ if (!MO->isImplicit() && !regsLive.count(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Reserved registers may be used even when 'dead'.
+ if (!isReserved(Reg))
+ report("Using an undefined physical register", MO, MONum);
+ } else {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ // We don't know which virtual registers are live in, so only complain
+ // if vreg was killed in this MBB. Otherwise keep track of vregs that
+ // must be live in. PHI instructions are handled separately.
+ if (MInfo.regsKilled.count(Reg))
+ report("Using a killed virtual register", MO, MONum);
+ else if (MI->getOpcode() != TargetInstrInfo::PHI)
+ MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+ }
+ }
+ } else {
+ // Register defined.
+ // TODO: verify that earlyclobber ops are not used.
+ if (MO->isImplicit())
+ addRegWithSubRegs(regsImpDefined, Reg);
+ else
+ addRegWithSubRegs(regsDefined, Reg);
+
+ if (MO->isDead())
+ addRegWithSubRegs(regsDead, Reg);
+ }
+
+ // Check register classes.
+ if (MONum < TI.getNumOperands() && !MO->isImplicit()) {
+ const TargetOperandInfo &TOI = TI.OpInfo[MONum];
+ unsigned SubIdx = MO->getSubReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ unsigned sr = Reg;
+ if (SubIdx) {
+ unsigned s = TRI->getSubReg(Reg, SubIdx);
+ if (!s) {
+ report("Invalid subregister index for physical register",
+ MO, MONum);
+ return;
+ }
+ sr = s;
+ }
+ if (TOI.RegClass) {
+ const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass);
+ if (!DRC->contains(sr)) {
+ report("Illegal physical register for instruction", MO, MONum);
+ *OS << TRI->getName(sr) << " is not a "
+ << DRC->getName() << " register.\n";
+ }
+ }
+ } else {
+ // Virtual register.
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (SubIdx) {
+ if (RC->subregclasses_begin()+SubIdx >= RC->subregclasses_end()) {
+ report("Invalid subregister index for virtual register", MO, MONum);
+ return;
+ }
+ RC = *(RC->subregclasses_begin()+SubIdx);
+ }
+ if (TOI.RegClass) {
+ const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass);
+ if (RC != DRC && !RC->hasSuperClass(DRC)) {
+ report("Illegal virtual register for instruction", MO, MONum);
+ *OS << "Expected a " << DRC->getName() << " register, but got a "
+ << RC->getName() << " register\n";
+ }
+ }
+ }
+ }
+ break;
+ }
+ // Can PHI instrs refer to MBBs not in the CFG? X86 and ARM do.
+ // case MachineOperand::MO_MachineBasicBlock:
+ // if (MI->getOpcode() == TargetInstrInfo::PHI) {
+ // if (!MO->getMBB()->isSuccessor(MI->getParent()))
+ // report("PHI operand is not in the CFG", MO, MONum);
+ // }
+ // break;
+ default:
+ break;
+ }
+}
+
+void
+MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI)
+{
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ set_union(MInfo.regsKilled, regsKilled);
+ set_subtract(regsLive, regsKilled);
+ regsKilled.clear();
+
+ for (RegVector::const_iterator I = regsDefined.begin(),
+ E = regsDefined.end(); I != E; ++I) {
+ if (regsLive.count(*I)) {
+ if (TargetRegisterInfo::isPhysicalRegister(*I)) {
+ // We allow double defines to physical registers with live
+ // super-registers.
+ if (!allowPhysDoubleDefs && !isReserved(*I) &&
+ !anySuperRegisters(regsLive, *I)) {
+ report("Redefining a live physical register", MI);
+ *OS << "Register " << TRI->getName(*I)
+ << " was defined but already live.\n";
+ }
+ } else {
+ if (!allowVirtDoubleDefs) {
+ report("Redefining a live virtual register", MI);
+ *OS << "Virtual register %reg" << *I
+ << " was defined but already live.\n";
+ }
+ }
+ } else if (TargetRegisterInfo::isVirtualRegister(*I) &&
+ !MInfo.regsKilled.count(*I)) {
+ // Virtual register defined without being killed first must be dead on
+ // entry.
+ MInfo.vregsDeadIn.insert(std::make_pair(*I, MI));
+ }
+ }
+
+ set_union(regsLive, regsDefined); regsDefined.clear();
+ set_union(regsLive, regsImpDefined); regsImpDefined.clear();
+ set_subtract(regsLive, regsDead); regsDead.clear();
+}
+
+void
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB)
+{
+ MBBInfoMap[MBB].regsLiveOut = regsLive;
+ regsLive.clear();
+}
+
+// Calculate the largest possible vregsPassed sets. These are the registers that
+// can pass through an MBB live, but may not be live every time. It is assumed
+// that all vregsPassed sets are empty before the call.
+void
+MachineVerifier::calcMaxRegsPassed()
+{
+ // First push live-out regs to successors' vregsPassed. Remember the MBBs that
+ // have any vregsPassed.
+ DenseSet<const MachineBasicBlock*> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock &MBB(*MFI);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ if (!MInfo.reachable)
+ continue;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
+ SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.regsLiveOut))
+ todo.insert(*SuI);
+ }
+ }
+
+ // Iteratively push vregsPassed to successors. This will converge to the same
+ // final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
+ if (*SuI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.vregsPassed))
+ todo.insert(*SuI);
+ }
+ }
+}
+
+// Calculate the minimum vregsPassed set. These are the registers that always
+// pass live through an MBB. The calculation assumes that calcMaxRegsPassed has
+// been called earlier.
+void
+MachineVerifier::calcMinRegsPassed()
+{
+ DenseSet<const MachineBasicBlock*> todo;
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI)
+ todo.insert(MFI);
+
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+
+ // Remove entries from vRegsPassed that are not live out from all
+ // reachable predecessors.
+ RegSet dead;
+ for (RegSet::iterator I = MInfo.vregsPassed.begin(),
+ E = MInfo.vregsPassed.end(); I != E; ++I) {
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PrInfo = MBBInfoMap[*PrI];
+ if (PrInfo.reachable && !PrInfo.isLiveOut(*I)) {
+ dead.insert(*I);
+ break;
+ }
+ }
+ }
+ // If any regs removed, we need to recheck successors.
+ if (!dead.empty()) {
+ set_subtract(MInfo.vregsPassed, dead);
+ todo.insert(MBB->succ_begin(), MBB->succ_end());
+ }
+ }
+}
+
+// Check PHI instructions at the beginning of MBB. It is assumed that
+// calcMinRegsPassed has been run so BBInfo::isLiveOut is valid.
+void
+MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB)
+{
+ for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
+ BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
+ DenseSet<const MachineBasicBlock*> seen;
+
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB();
+ if (!Pre->isSuccessor(MBB))
+ continue;
+ seen.insert(Pre);
+ BBInfo &PrInfo = MBBInfoMap[Pre];
+ if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
+ report("PHI operand is not live-out from predecessor",
+ &BBI->getOperand(i), i);
+ }
+
+ // Did we see all predecessors?
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (!seen.count(*PrI)) {
+ report("Missing PHI operand", BBI);
+ *OS << "MBB #" << (*PrI)->getNumber()
+ << " is a predecessor according to the CFG.\n";
+ }
+ }
+ }
+}
+
+void
+MachineVerifier::visitMachineFunctionAfter()
+{
+ calcMaxRegsPassed();
+
+ // With the maximal set of vregsPassed we can verify dead-in registers.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Skip unreachable MBBs.
+ if (!MInfo.reachable)
+ continue;
+
+ for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
+ PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PrInfo = MBBInfoMap[*PrI];
+ if (!PrInfo.reachable)
+ continue;
+
+ // Verify physical live-ins. EH landing pads have magic live-ins so we
+ // ignore them.
+ if (!MFI->isLandingPad()) {
+ for (MachineBasicBlock::const_livein_iterator I = MFI->livein_begin(),
+ E = MFI->livein_end(); I != E; ++I) {
+ if (TargetRegisterInfo::isPhysicalRegister(*I) &&
+ !isReserved (*I) && !PrInfo.isLiveOut(*I)) {
+ report("Live-in physical register is not live-out from predecessor",
+ MFI);
+ *OS << "Register " << TRI->getName(*I)
+ << " is not live-out from MBB #" << (*PrI)->getNumber()
+ << ".\n";
+ }
+ }
+ }
+
+
+ // Verify dead-in virtual registers.
+ if (!allowVirtDoubleDefs) {
+ for (RegMap::iterator I = MInfo.vregsDeadIn.begin(),
+ E = MInfo.vregsDeadIn.end(); I != E; ++I) {
+ // DeadIn register must be in neither regsLiveOut or vregsPassed of
+ // any predecessor.
+ if (PrInfo.isLiveOut(I->first)) {
+ report("Live-in virtual register redefined", I->second);
+ *OS << "Register %reg" << I->first
+ << " was live-out from predecessor MBB #"
+ << (*PrI)->getNumber() << ".\n";
+ }
+ }
+ }
+ }
+ }
+
+ calcMinRegsPassed();
+
+ // With the minimal set of vregsPassed we can verify live-in virtual
+ // registers, including PHI instructions.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+
+ // Skip unreachable MBBs.
+ if (!MInfo.reachable)
+ continue;
+
+ checkPHIOps(MFI);
+
+ for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
+ PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PrInfo = MBBInfoMap[*PrI];
+ if (!PrInfo.reachable)
+ continue;
+
+ for (RegMap::iterator I = MInfo.vregsLiveIn.begin(),
+ E = MInfo.vregsLiveIn.end(); I != E; ++I) {
+ if (!PrInfo.isLiveOut(I->first)) {
+ report("Used virtual register is not live-in", I->second);
+ *OS << "Register %reg" << I->first
+ << " is not live-out from predecessor MBB #"
+ << (*PrI)->getNumber()
+ << ".\n";
+ }
+ }
+ }
+ }
+}
diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile
new file mode 100644
index 0000000..4ab3e3c
--- /dev/null
+++ b/lib/CodeGen/Makefile
@@ -0,0 +1,22 @@
+##===- lib/CodeGen/Makefile --------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMCodeGen
+PARALLEL_DIRS = SelectionDAG AsmPrinter
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
+# Xcode prior to 2.4 generates an error in -pedantic mode with use of HUGE_VAL
+# in this directory. Disable -pedantic for this broken compiler.
+ifneq ($(HUGE_VAL_SANITY),yes)
+CompileCommonOpts := $(filter-out -pedantic, $(CompileCommonOpts))
+endif
+
diff --git a/lib/CodeGen/OcamlGC.cpp b/lib/CodeGen/OcamlGC.cpp
new file mode 100644
index 0000000..f7bc9f3
--- /dev/null
+++ b/lib/CodeGen/OcamlGC.cpp
@@ -0,0 +1,38 @@
+//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics compatible with
+// Objective Caml 3.10.0, which uses a liveness-accurate static stack map.
+//
+// The frametable emitter is in OcamlGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+ class VISIBILITY_HIDDEN OcamlGC : public GCStrategy {
+ public:
+ OcamlGC();
+ };
+}
+
+static GCRegistry::Add<OcamlGC>
+X("ocaml", "ocaml 3.10-compatible GC");
+
+void llvm::linkOcamlGC() { }
+
+OcamlGC::OcamlGC() {
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+}
diff --git a/lib/CodeGen/PBQP.cpp b/lib/CodeGen/PBQP.cpp
new file mode 100644
index 0000000..562300f
--- /dev/null
+++ b/lib/CodeGen/PBQP.cpp
@@ -0,0 +1,1395 @@
+//===---------------- PBQP.cpp --------- PBQP Solver ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Developed by: Bernhard Scholz
+// The University of Sydney
+// http://www.it.usyd.edu.au/~scholz
+//===----------------------------------------------------------------------===//
+
+#include "PBQP.h"
+#include "llvm/Config/alloca.h"
+#include <limits>
+#include <cassert>
+#include <cstring>
+
+namespace llvm {
+
+/**************************************************************************
+ * Data Structures
+ **************************************************************************/
+
+/* edge of PBQP graph */
+typedef struct adjnode {
+ struct adjnode *prev, /* doubly chained list */
+ *succ,
+ *reverse; /* reverse edge */
+ int adj; /* adj. node */
+ PBQPMatrix *costs; /* cost matrix of edge */
+
+ bool tc_valid; /* flag whether following fields are valid */
+ int *tc_safe_regs; /* safe registers */
+ int tc_impact; /* impact */
+} adjnode;
+
+/* bucket node */
+typedef struct bucketnode {
+ struct bucketnode *prev; /* doubly chained list */
+ struct bucketnode *succ;
+ int u; /* node */
+} bucketnode;
+
+/* data structure of partitioned boolean quadratic problem */
+struct pbqp {
+ int num_nodes; /* number of nodes */
+ int max_deg; /* maximal degree of a node */
+ bool solved; /* flag that indicates whether PBQP has been solved yet */
+ bool optimal; /* flag that indicates whether PBQP is optimal */
+ PBQPNum min;
+ bool changed; /* flag whether graph has changed in simplification */
+
+ /* node fields */
+ PBQPVector **node_costs; /* cost vectors of nodes */
+ int *node_deg; /* node degree of nodes */
+ int *solution; /* solution for node */
+ adjnode **adj_list; /* adj. list */
+ bucketnode **bucket_ptr; /* bucket pointer of a node */
+
+ /* node stack */
+ int *stack; /* stack of nodes */
+ int stack_ptr; /* stack pointer */
+
+ /* bucket fields */
+ bucketnode **bucket_list; /* bucket list */
+
+ int num_r0; /* counters for number statistics */
+ int num_ri;
+ int num_rii;
+ int num_rn;
+ int num_rn_special;
+};
+
+bool isInf(PBQPNum n) { return n == std::numeric_limits<PBQPNum>::infinity(); }
+
+/*****************************************************************************
+ * allocation/de-allocation of pbqp problem
+ ****************************************************************************/
+
+/* allocate new partitioned boolean quadratic program problem */
+pbqp *alloc_pbqp(int num_nodes)
+{
+ pbqp *this_;
+ int u;
+
+ assert(num_nodes > 0);
+
+ /* allocate memory for pbqp data structure */
+ this_ = (pbqp *)malloc(sizeof(pbqp));
+
+ /* Initialize pbqp fields */
+ this_->num_nodes = num_nodes;
+ this_->solved = false;
+ this_->optimal = true;
+ this_->min = 0.0;
+ this_->max_deg = 0;
+ this_->changed = false;
+ this_->num_r0 = 0;
+ this_->num_ri = 0;
+ this_->num_rii = 0;
+ this_->num_rn = 0;
+ this_->num_rn_special = 0;
+
+ /* initialize/allocate stack fields of pbqp */
+ this_->stack = (int *) malloc(sizeof(int)*num_nodes);
+ this_->stack_ptr = 0;
+
+ /* initialize/allocate node fields of pbqp */
+ this_->adj_list = (adjnode **) malloc(sizeof(adjnode *)*num_nodes);
+ this_->node_deg = (int *) malloc(sizeof(int)*num_nodes);
+ this_->solution = (int *) malloc(sizeof(int)*num_nodes);
+ this_->bucket_ptr = (bucketnode **) malloc(sizeof(bucketnode **)*num_nodes);
+ this_->node_costs = (PBQPVector**) malloc(sizeof(PBQPVector*) * num_nodes);
+ for(u=0;u<num_nodes;u++) {
+ this_->solution[u]=-1;
+ this_->adj_list[u]=NULL;
+ this_->node_deg[u]=0;
+ this_->bucket_ptr[u]=NULL;
+ this_->node_costs[u]=NULL;
+ }
+
+ /* initialize bucket list */
+ this_->bucket_list = NULL;
+
+ return this_;
+}
+
+/* free pbqp problem */
+void free_pbqp(pbqp *this_)
+{
+ int u;
+ int deg;
+ adjnode *adj_ptr,*adj_next;
+ bucketnode *bucket,*bucket_next;
+
+ assert(this_ != NULL);
+
+ /* free node cost fields */
+ for(u=0;u < this_->num_nodes;u++) {
+ delete this_->node_costs[u];
+ }
+ free(this_->node_costs);
+
+ /* free bucket list */
+ for(deg=0;deg<=this_->max_deg;deg++) {
+ for(bucket=this_->bucket_list[deg];bucket!=NULL;bucket=bucket_next) {
+ this_->bucket_ptr[bucket->u] = NULL;
+ bucket_next = bucket-> succ;
+ free(bucket);
+ }
+ }
+ free(this_->bucket_list);
+
+ /* free adj. list */
+ assert(this_->adj_list != NULL);
+ for(u=0;u < this_->num_nodes; u++) {
+ for(adj_ptr = this_->adj_list[u]; adj_ptr != NULL; adj_ptr = adj_next) {
+ adj_next = adj_ptr -> succ;
+ if (u < adj_ptr->adj) {
+ assert(adj_ptr != NULL);
+ delete adj_ptr->costs;
+ }
+ if (adj_ptr -> tc_safe_regs != NULL) {
+ free(adj_ptr -> tc_safe_regs);
+ }
+ free(adj_ptr);
+ }
+ }
+ free(this_->adj_list);
+
+ /* free other node fields */
+ free(this_->node_deg);
+ free(this_->solution);
+ free(this_->bucket_ptr);
+
+ /* free stack */
+ free(this_->stack);
+
+ /* free pbqp data structure itself */
+ free(this_);
+}
+
+
+/****************************************************************************
+ * adj. node routines
+ ****************************************************************************/
+
+/* find data structure of adj. node of a given node */
+static
+adjnode *find_adjnode(pbqp *this_,int u,int v)
+{
+ adjnode *adj_ptr;
+
+ assert (this_ != NULL);
+ assert (u >= 0 && u < this_->num_nodes);
+ assert (v >= 0 && v < this_->num_nodes);
+ assert(this_->adj_list != NULL);
+
+ for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ if (adj_ptr->adj == v) {
+ return adj_ptr;
+ }
+ }
+ return NULL;
+}
+
+/* allocate a new data structure for adj. node */
+static
+adjnode *alloc_adjnode(pbqp *this_,int u, PBQPMatrix *costs)
+{
+ adjnode *p;
+
+ assert(this_ != NULL);
+ assert(costs != NULL);
+ assert(u >= 0 && u < this_->num_nodes);
+
+ p = (adjnode *)malloc(sizeof(adjnode));
+ assert(p != NULL);
+
+ p->adj = u;
+ p->costs = costs;
+
+ p->tc_valid= false;
+ p->tc_safe_regs = NULL;
+ p->tc_impact = 0;
+
+ return p;
+}
+
+/* insert adjacence node to adj. list */
+static
+void insert_adjnode(pbqp *this_, int u, adjnode *adj_ptr)
+{
+
+ assert(this_ != NULL);
+ assert(adj_ptr != NULL);
+ assert(u >= 0 && u < this_->num_nodes);
+
+ /* if adjacency list of node is not empty -> update
+ first node of the list */
+ if (this_ -> adj_list[u] != NULL) {
+ assert(this_->adj_list[u]->prev == NULL);
+ this_->adj_list[u] -> prev = adj_ptr;
+ }
+
+ /* update doubly chained list pointers of pointers */
+ adj_ptr -> succ = this_->adj_list[u];
+ adj_ptr -> prev = NULL;
+
+ /* update adjacency list pointer of node u */
+ this_->adj_list[u] = adj_ptr;
+}
+
+/* remove entry in an adj. list */
+static
+void remove_adjnode(pbqp *this_, int u, adjnode *adj_ptr)
+{
+ assert(this_!= NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+ assert(this_->adj_list != NULL);
+ assert(adj_ptr != NULL);
+
+ if (adj_ptr -> prev == NULL) {
+ this_->adj_list[u] = adj_ptr -> succ;
+ } else {
+ adj_ptr -> prev -> succ = adj_ptr -> succ;
+ }
+
+ if (adj_ptr -> succ != NULL) {
+ adj_ptr -> succ -> prev = adj_ptr -> prev;
+ }
+
+ if(adj_ptr->reverse != NULL) {
+ adjnode *rev = adj_ptr->reverse;
+ rev->reverse = NULL;
+ }
+
+ if (adj_ptr -> tc_safe_regs != NULL) {
+ free(adj_ptr -> tc_safe_regs);
+ }
+
+ free(adj_ptr);
+}
+
+/*****************************************************************************
+ * node functions
+ ****************************************************************************/
+
+/* get degree of a node */
+static
+int get_deg(pbqp *this_,int u)
+{
+ adjnode *adj_ptr;
+ int deg = 0;
+
+ assert(this_ != NULL);
+ assert(u >= 0 && u < this_->num_nodes);
+ assert(this_->adj_list != NULL);
+
+ for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ deg ++;
+ }
+ return deg;
+}
+
+/* reinsert node */
+static
+void reinsert_node(pbqp *this_,int u)
+{
+ adjnode *adj_u,
+ *adj_v;
+
+ assert(this_!= NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+ assert(this_->adj_list != NULL);
+
+ for(adj_u = this_ -> adj_list[u]; adj_u != NULL; adj_u = adj_u -> succ) {
+ int v = adj_u -> adj;
+ adj_v = alloc_adjnode(this_,u,adj_u->costs);
+ insert_adjnode(this_,v,adj_v);
+ }
+}
+
+/* remove node */
+static
+void remove_node(pbqp *this_,int u)
+{
+ adjnode *adj_ptr;
+
+ assert(this_!= NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+ assert(this_->adj_list != NULL);
+
+ for(adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ remove_adjnode(this_,adj_ptr->adj,adj_ptr -> reverse);
+ }
+}
+
+/*****************************************************************************
+ * edge functions
+ ****************************************************************************/
+
+/* insert edge to graph */
+/* (does not check whether edge exists in graph */
+static
+void insert_edge(pbqp *this_, int u, int v, PBQPMatrix *costs)
+{
+ adjnode *adj_u,
+ *adj_v;
+
+ /* create adjanceny entry for u */
+ adj_u = alloc_adjnode(this_,v,costs);
+ insert_adjnode(this_,u,adj_u);
+
+
+ /* create adjanceny entry for v */
+ adj_v = alloc_adjnode(this_,u,costs);
+ insert_adjnode(this_,v,adj_v);
+
+ /* create link for reverse edge */
+ adj_u -> reverse = adj_v;
+ adj_v -> reverse = adj_u;
+}
+
+/* delete edge */
+static
+void delete_edge(pbqp *this_,int u,int v)
+{
+ adjnode *adj_ptr;
+ adjnode *rev;
+
+ assert(this_ != NULL);
+ assert( u >= 0 && u < this_->num_nodes);
+ assert( v >= 0 && v < this_->num_nodes);
+
+ adj_ptr=find_adjnode(this_,u,v);
+ assert(adj_ptr != NULL);
+ assert(adj_ptr->reverse != NULL);
+
+ delete adj_ptr -> costs;
+
+ rev = adj_ptr->reverse;
+ remove_adjnode(this_,u,adj_ptr);
+ remove_adjnode(this_,v,rev);
+}
+
+/*****************************************************************************
+ * cost functions
+ ****************************************************************************/
+
+/* Note: Since cost(u,v) = transpose(cost(v,u)), it would be necessary to store
+ two matrices for both edges (u,v) and (v,u). However, we only store the
+ matrix for the case u < v. For the other case we transpose the stored matrix
+ if required.
+*/
+
+/* add costs to cost vector of a node */
+void add_pbqp_nodecosts(pbqp *this_,int u, PBQPVector *costs)
+{
+ assert(this_ != NULL);
+ assert(costs != NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+
+ if (!this_->node_costs[u]) {
+ this_->node_costs[u] = new PBQPVector(*costs);
+ } else {
+ *this_->node_costs[u] += *costs;
+ }
+}
+
+/* get cost matrix ptr */
+static
+PBQPMatrix *get_costmatrix_ptr(pbqp *this_, int u, int v)
+{
+ adjnode *adj_ptr;
+ PBQPMatrix *m = NULL;
+
+ assert (this_ != NULL);
+ assert (u >= 0 && u < this_->num_nodes);
+ assert (v >= 0 && v < this_->num_nodes);
+
+ adj_ptr = find_adjnode(this_,u,v);
+
+ if (adj_ptr != NULL) {
+ m = adj_ptr -> costs;
+ }
+
+ return m;
+}
+
+/* get cost matrix ptr */
+/* Note: only the pointer is returned for
+ cost(u,v), if u < v.
+*/
+static
+PBQPMatrix *pbqp_get_costmatrix(pbqp *this_, int u, int v)
+{
+ adjnode *adj_ptr = find_adjnode(this_,u,v);
+
+ if (adj_ptr != NULL) {
+ if ( u < v) {
+ return new PBQPMatrix(*adj_ptr->costs);
+ } else {
+ return new PBQPMatrix(adj_ptr->costs->transpose());
+ }
+ } else {
+ return NULL;
+ }
+}
+
+/* add costs to cost matrix of an edge */
+void add_pbqp_edgecosts(pbqp *this_,int u,int v, PBQPMatrix *costs)
+{
+ PBQPMatrix *adj_costs;
+
+ assert(this_!= NULL);
+ assert(costs != NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+ assert(v >= 0 && v <= this_->num_nodes);
+
+ /* does the edge u-v exists ? */
+ if (u == v) {
+ PBQPVector *diag = new PBQPVector(costs->diagonalize());
+ add_pbqp_nodecosts(this_,v,diag);
+ delete diag;
+ } else if ((adj_costs = get_costmatrix_ptr(this_,u,v))!=NULL) {
+ if ( u < v) {
+ *adj_costs += *costs;
+ } else {
+ *adj_costs += costs->transpose();
+ }
+ } else {
+ adj_costs = new PBQPMatrix((u < v) ? *costs : costs->transpose());
+ insert_edge(this_,u,v,adj_costs);
+ }
+}
+
+/* remove bucket from bucket list */
+static
+void pbqp_remove_bucket(pbqp *this_, bucketnode *bucket)
+{
+ int u = bucket->u;
+
+ assert(this_ != NULL);
+ assert(u >= 0 && u < this_->num_nodes);
+ assert(this_->bucket_list != NULL);
+ assert(this_->bucket_ptr[u] != NULL);
+
+ /* update predecessor node in bucket list
+ (if no preceeding bucket exists, then
+ the bucket_list pointer needs to be
+ updated.)
+ */
+ if (bucket->prev != NULL) {
+ bucket->prev-> succ = bucket->succ;
+ } else {
+ this_->bucket_list[this_->node_deg[u]] = bucket -> succ;
+ }
+
+ /* update successor node in bucket list */
+ if (bucket->succ != NULL) {
+ bucket->succ-> prev = bucket->prev;
+ }
+}
+
+/**********************************************************************************
+ * pop functions
+ **********************************************************************************/
+
+/* pop node of given degree */
+static
+int pop_node(pbqp *this_,int deg)
+{
+ bucketnode *bucket;
+ int u;
+
+ assert(this_ != NULL);
+ assert(deg >= 0 && deg <= this_->max_deg);
+ assert(this_->bucket_list != NULL);
+
+ /* get first bucket of bucket list */
+ bucket = this_->bucket_list[deg];
+ assert(bucket != NULL);
+
+ /* remove bucket */
+ pbqp_remove_bucket(this_,bucket);
+ u = bucket->u;
+ free(bucket);
+ return u;
+}
+
+/**********************************************************************************
+ * reorder functions
+ **********************************************************************************/
+
+/* add bucket to bucketlist */
+static
+void add_to_bucketlist(pbqp *this_,bucketnode *bucket, int deg)
+{
+ bucketnode *old_head;
+
+ assert(bucket != NULL);
+ assert(this_ != NULL);
+ assert(deg >= 0 && deg <= this_->max_deg);
+ assert(this_->bucket_list != NULL);
+
+ /* store node degree (for re-ordering purposes)*/
+ this_->node_deg[bucket->u] = deg;
+
+ /* put bucket to front of doubly chained list */
+ old_head = this_->bucket_list[deg];
+ bucket -> prev = NULL;
+ bucket -> succ = old_head;
+ this_ -> bucket_list[deg] = bucket;
+ if (bucket -> succ != NULL ) {
+ assert ( old_head -> prev == NULL);
+ old_head -> prev = bucket;
+ }
+}
+
+
+/* reorder node in bucket list according to
+ current node degree */
+static
+void reorder_node(pbqp *this_, int u)
+{
+ int deg;
+
+ assert(this_ != NULL);
+ assert(u>= 0 && u < this_->num_nodes);
+ assert(this_->bucket_list != NULL);
+ assert(this_->bucket_ptr[u] != NULL);
+
+ /* get current node degree */
+ deg = get_deg(this_,u);
+
+ /* remove bucket from old bucket list only
+ if degree of node has changed. */
+ if (deg != this_->node_deg[u]) {
+ pbqp_remove_bucket(this_,this_->bucket_ptr[u]);
+ add_to_bucketlist(this_,this_->bucket_ptr[u],deg);
+ }
+}
+
+/* reorder adj. nodes of a node */
+static
+void reorder_adjnodes(pbqp *this_,int u)
+{
+ adjnode *adj_ptr;
+
+ assert(this_!= NULL);
+ assert(u >= 0 && u <= this_->num_nodes);
+ assert(this_->adj_list != NULL);
+
+ for(adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ reorder_node(this_,adj_ptr->adj);
+ }
+}
+
+/**********************************************************************************
+ * creation functions
+ **********************************************************************************/
+
+/* create new bucket entry */
+/* consistency of the bucket list is not checked! */
+static
+void create_bucket(pbqp *this_,int u,int deg)
+{
+ bucketnode *bucket;
+
+ assert(this_ != NULL);
+ assert(u >= 0 && u < this_->num_nodes);
+ assert(this_->bucket_list != NULL);
+
+ bucket = (bucketnode *)malloc(sizeof(bucketnode));
+ assert(bucket != NULL);
+
+ bucket -> u = u;
+ this_->bucket_ptr[u] = bucket;
+
+ add_to_bucketlist(this_,bucket,deg);
+}
+
+/* create bucket list */
+static
+void create_bucketlist(pbqp *this_)
+{
+ int u;
+ int max_deg;
+ int deg;
+
+ assert(this_ != NULL);
+ assert(this_->bucket_list == NULL);
+
+ /* determine max. degree of the nodes */
+ max_deg = 2; /* at least of degree two! */
+ for(u=0;u<this_->num_nodes;u++) {
+ deg = this_->node_deg[u] = get_deg(this_,u);
+ if (deg > max_deg) {
+ max_deg = deg;
+ }
+ }
+ this_->max_deg = max_deg;
+
+ /* allocate bucket list */
+ this_ -> bucket_list = (bucketnode **)malloc(sizeof(bucketnode *)*(max_deg + 1));
+ memset(this_->bucket_list,0,sizeof(bucketnode *)*(max_deg + 1));
+ assert(this_->bucket_list != NULL);
+
+ /* insert nodes to the list */
+ for(u=0;u<this_->num_nodes;u++) {
+ create_bucket(this_,u,this_->node_deg[u]);
+ }
+}
+
+/*****************************************************************************
+ * PBQP simplification for trivial nodes
+ ****************************************************************************/
+
+/* remove trivial node with cost vector length of one */
+static
+void disconnect_trivialnode(pbqp *this_,int u)
+{
+ int v;
+ adjnode *adj_ptr,
+ *next;
+ PBQPMatrix *c_uv;
+ PBQPVector *c_v;
+
+ assert(this_ != NULL);
+ assert(this_->node_costs != NULL);
+ assert(u >= 0 && u < this_ -> num_nodes);
+ assert(this_->node_costs[u]->getLength() == 1);
+
+ /* add edge costs to node costs of adj. nodes */
+ for(adj_ptr = this_->adj_list[u]; adj_ptr != NULL; adj_ptr = next){
+ next = adj_ptr -> succ;
+ v = adj_ptr -> adj;
+ assert(v >= 0 && v < this_ -> num_nodes);
+
+ /* convert matrix to cost vector offset for adj. node */
+ c_uv = pbqp_get_costmatrix(this_,u,v);
+ c_v = new PBQPVector(c_uv->getRowAsVector(0));
+ *this_->node_costs[v] += *c_v;
+
+ /* delete edge & free vec/mat */
+ delete c_v;
+ delete c_uv;
+ delete_edge(this_,u,v);
+ }
+}
+
+/* find all trivial nodes and disconnect them */
+static
+void eliminate_trivial_nodes(pbqp *this_)
+{
+ int u;
+
+ assert(this_ != NULL);
+ assert(this_ -> node_costs != NULL);
+
+ for(u=0;u < this_ -> num_nodes; u++) {
+ if (this_->node_costs[u]->getLength() == 1) {
+ disconnect_trivialnode(this_,u);
+ }
+ }
+}
+
+/*****************************************************************************
+ * Normal form for PBQP
+ ****************************************************************************/
+
+/* simplify a cost matrix. If the matrix
+ is independent, then simplify_matrix
+ returns true - otherwise false. In
+ vectors u and v the offset values of
+ the decomposition are stored.
+*/
+
+static
+bool normalize_matrix(PBQPMatrix *m, PBQPVector *u, PBQPVector *v)
+{
+ assert( m != NULL);
+ assert( u != NULL);
+ assert( v != NULL);
+ assert( u->getLength() > 0);
+ assert( v->getLength() > 0);
+
+ assert(m->getRows() == u->getLength());
+ assert(m->getCols() == v->getLength());
+
+ /* determine u vector */
+ for(unsigned r = 0; r < m->getRows(); ++r) {
+ PBQPNum min = m->getRowMin(r);
+ (*u)[r] += min;
+ if (!isInf(min)) {
+ m->subFromRow(r, min);
+ } else {
+ m->setRow(r, 0);
+ }
+ }
+
+ /* determine v vector */
+ for(unsigned c = 0; c < m->getCols(); ++c) {
+ PBQPNum min = m->getColMin(c);
+ (*v)[c] += min;
+ if (!isInf(min)) {
+ m->subFromCol(c, min);
+ } else {
+ m->setCol(c, 0);
+ }
+ }
+
+ /* determine whether matrix is
+ independent or not.
+ */
+ return m->isZero();
+}
+
+/* simplify single edge */
+static
+void simplify_edge(pbqp *this_,int u,int v)
+{
+ PBQPMatrix *costs;
+ bool is_zero;
+
+ assert (this_ != NULL);
+ assert (u >= 0 && u <this_->num_nodes);
+ assert (v >= 0 && v <this_->num_nodes);
+ assert (u != v);
+
+ /* swap u and v if u > v in order to avoid un-necessary
+ tranpositions of the cost matrix */
+
+ if (u > v) {
+ int swap = u;
+ u = v;
+ v = swap;
+ }
+
+ /* get cost matrix and simplify it */
+ costs = get_costmatrix_ptr(this_,u,v);
+ is_zero=normalize_matrix(costs,this_->node_costs[u],this_->node_costs[v]);
+
+ /* delete edge */
+ if(is_zero){
+ delete_edge(this_,u,v);
+ this_->changed = true;
+ }
+}
+
+/* normalize cost matrices and remove
+ edges in PBQP if they ary independent,
+ i.e. can be decomposed into two
+ cost vectors.
+*/
+static
+void eliminate_independent_edges(pbqp *this_)
+{
+ int u,v;
+ adjnode *adj_ptr,*next;
+
+ assert(this_ != NULL);
+ assert(this_ -> adj_list != NULL);
+
+ this_->changed = false;
+ for(u=0;u < this_->num_nodes;u++) {
+ for (adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = next) {
+ next = adj_ptr -> succ;
+ v = adj_ptr -> adj;
+ assert(v >= 0 && v < this_->num_nodes);
+ if (u < v) {
+ simplify_edge(this_,u,v);
+ }
+ }
+ }
+}
+
+
+/*****************************************************************************
+ * PBQP reduction rules
+ ****************************************************************************/
+
+/* RI reduction
+ This reduction rule is applied for nodes
+ of degree one. */
+
+static
+void apply_RI(pbqp *this_,int x)
+{
+ int y;
+ unsigned xlen,
+ ylen;
+ PBQPMatrix *c_yx;
+ PBQPVector *c_x, *delta;
+
+ assert(this_ != NULL);
+ assert(x >= 0 && x < this_->num_nodes);
+ assert(this_ -> adj_list[x] != NULL);
+ assert(this_ -> adj_list[x] -> succ == NULL);
+
+ /* get adjacence matrix */
+ y = this_ -> adj_list[x] -> adj;
+ assert(y >= 0 && y < this_->num_nodes);
+
+ /* determine length of cost vectors for node x and y */
+ xlen = this_ -> node_costs[x]->getLength();
+ ylen = this_ -> node_costs[y]->getLength();
+
+ /* get cost vector c_x and matrix c_yx */
+ c_x = this_ -> node_costs[x];
+ c_yx = pbqp_get_costmatrix(this_,y,x);
+ assert (c_yx != NULL);
+
+
+ /* allocate delta vector */
+ delta = new PBQPVector(ylen);
+
+ /* compute delta vector */
+ for(unsigned i = 0; i < ylen; ++i) {
+ PBQPNum min = (*c_yx)[i][0] + (*c_x)[0];
+ for(unsigned j = 1; j < xlen; ++j) {
+ PBQPNum c = (*c_yx)[i][j] + (*c_x)[j];
+ if ( c < min )
+ min = c;
+ }
+ (*delta)[i] = min;
+ }
+
+ /* add delta vector */
+ *this_ -> node_costs[y] += *delta;
+
+ /* delete node x */
+ remove_node(this_,x);
+
+ /* reorder adj. nodes of node x */
+ reorder_adjnodes(this_,x);
+
+ /* push node x on stack */
+ assert(this_ -> stack_ptr < this_ -> num_nodes);
+ this_->stack[this_ -> stack_ptr++] = x;
+
+ /* free vec/mat */
+ delete c_yx;
+ delete delta;
+
+ /* increment counter for number statistic */
+ this_->num_ri++;
+}
+
+/* RII reduction
+ This reduction rule is applied for nodes
+ of degree two. */
+
+static
+void apply_RII(pbqp *this_,int x)
+{
+ int y,z;
+ unsigned xlen,ylen,zlen;
+ adjnode *adj_yz;
+
+ PBQPMatrix *c_yx, *c_zx;
+ PBQPVector *cx;
+ PBQPMatrix *delta;
+
+ assert(this_ != NULL);
+ assert(x >= 0 && x < this_->num_nodes);
+ assert(this_ -> adj_list[x] != NULL);
+ assert(this_ -> adj_list[x] -> succ != NULL);
+ assert(this_ -> adj_list[x] -> succ -> succ == NULL);
+
+ /* get adjacence matrix */
+ y = this_ -> adj_list[x] -> adj;
+ z = this_ -> adj_list[x] -> succ -> adj;
+ assert(y >= 0 && y < this_->num_nodes);
+ assert(z >= 0 && z < this_->num_nodes);
+
+ /* determine length of cost vectors for node x and y */
+ xlen = this_ -> node_costs[x]->getLength();
+ ylen = this_ -> node_costs[y]->getLength();
+ zlen = this_ -> node_costs[z]->getLength();
+
+ /* get cost vector c_x and matrix c_yx */
+ cx = this_ -> node_costs[x];
+ c_yx = pbqp_get_costmatrix(this_,y,x);
+ c_zx = pbqp_get_costmatrix(this_,z,x);
+ assert(c_yx != NULL);
+ assert(c_zx != NULL);
+
+ /* Colour Heuristic */
+ if ( (adj_yz = find_adjnode(this_,y,z)) != NULL) {
+ adj_yz->tc_valid = false;
+ adj_yz->reverse->tc_valid = false;
+ }
+
+ /* allocate delta matrix */
+ delta = new PBQPMatrix(ylen, zlen);
+
+ /* compute delta matrix */
+ for(unsigned i=0;i<ylen;i++) {
+ for(unsigned j=0;j<zlen;j++) {
+ PBQPNum min = (*c_yx)[i][0] + (*c_zx)[j][0] + (*cx)[0];
+ for(unsigned k=1;k<xlen;k++) {
+ PBQPNum c = (*c_yx)[i][k] + (*c_zx)[j][k] + (*cx)[k];
+ if ( c < min ) {
+ min = c;
+ }
+ }
+ (*delta)[i][j] = min;
+ }
+ }
+
+ /* add delta matrix */
+ add_pbqp_edgecosts(this_,y,z,delta);
+
+ /* delete node x */
+ remove_node(this_,x);
+
+ /* simplify cost matrix c_yz */
+ simplify_edge(this_,y,z);
+
+ /* reorder adj. nodes */
+ reorder_adjnodes(this_,x);
+
+ /* push node x on stack */
+ assert(this_ -> stack_ptr < this_ -> num_nodes);
+ this_->stack[this_ -> stack_ptr++] = x;
+
+ /* free vec/mat */
+ delete c_yx;
+ delete c_zx;
+ delete delta;
+
+ /* increment counter for number statistic */
+ this_->num_rii++;
+
+}
+
+/* RN reduction */
+static
+void apply_RN(pbqp *this_,int x)
+{
+ unsigned xlen;
+
+ assert(this_ != NULL);
+ assert(x >= 0 && x < this_->num_nodes);
+ assert(this_ -> node_costs[x] != NULL);
+
+ xlen = this_ -> node_costs[x] -> getLength();
+
+ /* after application of RN rule no optimality
+ can be guaranteed! */
+ this_ -> optimal = false;
+
+ /* push node x on stack */
+ assert(this_ -> stack_ptr < this_ -> num_nodes);
+ this_->stack[this_ -> stack_ptr++] = x;
+
+ /* delete node x */
+ remove_node(this_,x);
+
+ /* reorder adj. nodes of node x */
+ reorder_adjnodes(this_,x);
+
+ /* increment counter for number statistic */
+ this_->num_rn++;
+}
+
+
+static
+void compute_tc_info(pbqp *this_, adjnode *p)
+{
+ adjnode *r;
+ PBQPMatrix *m;
+ int x,y;
+ PBQPVector *c_x, *c_y;
+ int *row_inf_counts;
+
+ assert(p->reverse != NULL);
+
+ /* set flags */
+ r = p->reverse;
+ p->tc_valid = true;
+ r->tc_valid = true;
+
+ /* get edge */
+ x = r->adj;
+ y = p->adj;
+
+ /* get cost vectors */
+ c_x = this_ -> node_costs[x];
+ c_y = this_ -> node_costs[y];
+
+ /* get cost matrix */
+ m = pbqp_get_costmatrix(this_, x, y);
+
+
+ /* allocate allowed set for edge (x,y) and (y,x) */
+ if (p->tc_safe_regs == NULL) {
+ p->tc_safe_regs = (int *) malloc(sizeof(int) * c_x->getLength());
+ }
+
+ if (r->tc_safe_regs == NULL ) {
+ r->tc_safe_regs = (int *) malloc(sizeof(int) * c_y->getLength());
+ }
+
+ p->tc_impact = r->tc_impact = 0;
+
+ row_inf_counts = (int *) alloca(sizeof(int) * c_x->getLength());
+
+ /* init arrays */
+ p->tc_safe_regs[0] = 0;
+ row_inf_counts[0] = 0;
+ for(unsigned i = 1; i < c_x->getLength(); ++i){
+ p->tc_safe_regs[i] = 1;
+ row_inf_counts[i] = 0;
+ }
+
+ r->tc_safe_regs[0] = 0;
+ for(unsigned j = 1; j < c_y->getLength(); ++j){
+ r->tc_safe_regs[j] = 1;
+ }
+
+ for(unsigned j = 0; j < c_y->getLength(); ++j) {
+ int col_inf_counts = 0;
+ for (unsigned i = 0; i < c_x->getLength(); ++i) {
+ if (isInf((*m)[i][j])) {
+ ++col_inf_counts;
+ ++row_inf_counts[i];
+
+ p->tc_safe_regs[i] = 0;
+ r->tc_safe_regs[j] = 0;
+ }
+ }
+ if (col_inf_counts > p->tc_impact) {
+ p->tc_impact = col_inf_counts;
+ }
+ }
+
+ for(unsigned i = 0; i < c_x->getLength(); ++i){
+ if (row_inf_counts[i] > r->tc_impact)
+ {
+ r->tc_impact = row_inf_counts[i];
+ }
+ }
+
+ delete m;
+}
+
+/*
+ * Checks whether node x can be locally coloured.
+ */
+static
+int is_colorable(pbqp *this_,int x)
+{
+ adjnode *adj_ptr;
+ PBQPVector *c_x;
+ int result = 1;
+ int *allowed;
+ int num_allowed = 0;
+ unsigned total_impact = 0;
+
+ assert(this_ != NULL);
+ assert(x >= 0 && x < this_->num_nodes);
+ assert(this_ -> node_costs[x] != NULL);
+
+ c_x = this_ -> node_costs[x];
+
+ /* allocate allowed set */
+ allowed = (int *)malloc(sizeof(int) * c_x->getLength());
+ for(unsigned i = 0; i < c_x->getLength(); ++i){
+ if (!isInf((*c_x)[i]) && i > 0) {
+ allowed[i] = 1;
+ ++num_allowed;
+ } else {
+ allowed[i] = 0;
+ }
+ }
+
+ /* determine local minimum */
+ for(adj_ptr=this_->adj_list[x] ;adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ if (!adj_ptr -> tc_valid) {
+ compute_tc_info(this_, adj_ptr);
+ }
+
+ total_impact += adj_ptr->tc_impact;
+
+ if (num_allowed > 0) {
+ for (unsigned i = 1; i < c_x->getLength(); ++i){
+ if (allowed[i]){
+ if (!adj_ptr->tc_safe_regs[i]){
+ allowed[i] = 0;
+ --num_allowed;
+ if (num_allowed == 0)
+ break;
+ }
+ }
+ }
+ }
+
+ if ( total_impact >= c_x->getLength() - 1 && num_allowed == 0 ) {
+ result = 0;
+ break;
+ }
+ }
+ free(allowed);
+
+ return result;
+}
+
+/* use briggs heuristic
+ note: this_ is not a general heuristic. it only is useful for
+ interference graphs.
+ */
+int pop_colorablenode(pbqp *this_)
+{
+ int deg;
+ bucketnode *min_bucket=NULL;
+ PBQPNum min = std::numeric_limits<PBQPNum>::infinity();
+
+ /* select node where the number of colors is less than the node degree */
+ for(deg=this_->max_deg;deg > 2;deg--) {
+ bucketnode *bucket;
+ for(bucket=this_->bucket_list[deg];bucket!= NULL;bucket = bucket -> succ) {
+ int u = bucket->u;
+ if (is_colorable(this_,u)) {
+ pbqp_remove_bucket(this_,bucket);
+ this_->num_rn_special++;
+ free(bucket);
+ return u;
+ }
+ }
+ }
+
+ /* select node with minimal ratio between average node costs and degree of node */
+ for(deg=this_->max_deg;deg >2; deg--) {
+ bucketnode *bucket;
+ for(bucket=this_->bucket_list[deg];bucket!= NULL;bucket = bucket -> succ) {
+ PBQPNum h;
+ int u;
+
+ u = bucket->u;
+ assert(u>=0 && u < this_->num_nodes);
+ h = (*this_->node_costs[u])[0] / (PBQPNum) deg;
+ if (h < min) {
+ min_bucket = bucket;
+ min = h;
+ }
+ }
+ }
+
+ /* return node and free bucket */
+ if (min_bucket != NULL) {
+ int u;
+
+ pbqp_remove_bucket(this_,min_bucket);
+ u = min_bucket->u;
+ free(min_bucket);
+ return u;
+ } else {
+ return -1;
+ }
+}
+
+
+/*****************************************************************************
+ * PBQP graph parsing
+ ****************************************************************************/
+
+/* reduce pbqp problem (first phase) */
+static
+void reduce_pbqp(pbqp *this_)
+{
+ int u;
+
+ assert(this_ != NULL);
+ assert(this_->bucket_list != NULL);
+
+ for(;;){
+
+ if (this_->bucket_list[1] != NULL) {
+ u = pop_node(this_,1);
+ apply_RI(this_,u);
+ } else if (this_->bucket_list[2] != NULL) {
+ u = pop_node(this_,2);
+ apply_RII(this_,u);
+ } else if ((u = pop_colorablenode(this_)) != -1) {
+ apply_RN(this_,u);
+ } else {
+ break;
+ }
+ }
+}
+
+/*****************************************************************************
+ * PBQP back propagation
+ ****************************************************************************/
+
+/* determine solution of a reduced node. Either
+ RI or RII was applied for this_ node. */
+static
+void determine_solution(pbqp *this_,int x)
+{
+ PBQPVector *v = new PBQPVector(*this_ -> node_costs[x]);
+ adjnode *adj_ptr;
+
+ assert(this_ != NULL);
+ assert(x >= 0 && x < this_->num_nodes);
+ assert(this_ -> adj_list != NULL);
+ assert(this_ -> solution != NULL);
+
+ for(adj_ptr=this_->adj_list[x] ;adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ int y = adj_ptr -> adj;
+ int y_sol = this_ -> solution[y];
+
+ PBQPMatrix *c_yx = pbqp_get_costmatrix(this_,y,x);
+ assert(y_sol >= 0 && y_sol < (int)this_->node_costs[y]->getLength());
+ (*v) += c_yx->getRowAsVector(y_sol);
+ delete c_yx;
+ }
+ this_ -> solution[x] = v->minIndex();
+
+ delete v;
+}
+
+/* back popagation phase of PBQP */
+static
+void back_propagate(pbqp *this_)
+{
+ int i;
+
+ assert(this_ != NULL);
+ assert(this_->stack != NULL);
+ assert(this_->stack_ptr < this_->num_nodes);
+
+ for(i=this_ -> stack_ptr-1;i>=0;i--) {
+ int x = this_ -> stack[i];
+ assert( x >= 0 && x < this_ -> num_nodes);
+ reinsert_node(this_,x);
+ determine_solution(this_,x);
+ }
+}
+
+/* solve trivial nodes of degree zero */
+static
+void determine_trivialsolution(pbqp *this_)
+{
+ int u;
+ PBQPNum delta;
+
+ assert( this_ != NULL);
+ assert( this_ -> bucket_list != NULL);
+
+ /* determine trivial solution */
+ while (this_->bucket_list[0] != NULL) {
+ u = pop_node(this_,0);
+
+ assert( u >= 0 && u < this_ -> num_nodes);
+
+ this_->solution[u] = this_->node_costs[u]->minIndex();
+ delta = (*this_->node_costs[u])[this_->solution[u]];
+ this_->min = this_->min + delta;
+
+ /* increment counter for number statistic */
+ this_->num_r0++;
+ }
+}
+
+/*****************************************************************************
+ * debug facilities
+ ****************************************************************************/
+static
+void check_pbqp(pbqp *this_)
+{
+ int u,v;
+ PBQPMatrix *costs;
+ adjnode *adj_ptr;
+
+ assert( this_ != NULL);
+
+ for(u=0;u< this_->num_nodes; u++) {
+ assert (this_ -> node_costs[u] != NULL);
+ for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) {
+ v = adj_ptr -> adj;
+ assert( v>= 0 && v < this_->num_nodes);
+ if (u < v ) {
+ costs = adj_ptr -> costs;
+ assert( costs->getRows() == this_->node_costs[u]->getLength() &&
+ costs->getCols() == this_->node_costs[v]->getLength());
+ }
+ }
+ }
+}
+
+/*****************************************************************************
+ * PBQP solve routines
+ ****************************************************************************/
+
+/* solve PBQP problem */
+void solve_pbqp(pbqp *this_)
+{
+ assert(this_ != NULL);
+ assert(!this_->solved);
+
+ /* check vector & matrix dimensions */
+ check_pbqp(this_);
+
+ /* simplify PBQP problem */
+
+ /* eliminate trivial nodes, i.e.
+ nodes with cost vectors of length one. */
+ eliminate_trivial_nodes(this_);
+
+ /* eliminate edges with independent
+ cost matrices and normalize matrices */
+ eliminate_independent_edges(this_);
+
+ /* create bucket list for graph parsing */
+ create_bucketlist(this_);
+
+ /* reduce phase */
+ reduce_pbqp(this_);
+
+ /* solve trivial nodes */
+ determine_trivialsolution(this_);
+
+ /* back propagation phase */
+ back_propagate(this_);
+
+ this_->solved = true;
+}
+
+/* get solution of a node */
+int get_pbqp_solution(pbqp *this_,int x)
+{
+ assert(this_ != NULL);
+ assert(this_->solution != NULL);
+ assert(this_ -> solved);
+
+ return this_->solution[x];
+}
+
+/* is solution optimal? */
+bool is_pbqp_optimal(pbqp *this_)
+{
+ assert(this_ -> solved);
+ return this_->optimal;
+}
+
+}
+
+/* end of pbqp.c */
diff --git a/lib/CodeGen/PBQP.h b/lib/CodeGen/PBQP.h
new file mode 100644
index 0000000..5fd2c06
--- /dev/null
+++ b/lib/CodeGen/PBQP.h
@@ -0,0 +1,284 @@
+//===---------------- PBQP.cpp --------- PBQP Solver ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Developed by: Bernhard Scholz
+// The University of Sydney
+// http://www.it.usyd.edu.au/~scholz
+//===----------------------------------------------------------------------===//
+
+// TODO:
+//
+// * Default to null costs on vector initialisation?
+// * C++-ify the rest of the solver.
+
+#ifndef LLVM_CODEGEN_PBQPSOLVER_H
+#define LLVM_CODEGEN_PBQPSOLVER_H
+
+#include <cassert>
+#include <algorithm>
+#include <functional>
+
+namespace llvm {
+
+//! \brief Floating point type to use in PBQP solver.
+typedef double PBQPNum;
+
+//! \brief PBQP Vector class.
+class PBQPVector {
+public:
+
+ //! \brief Construct a PBQP vector of the given size.
+ explicit PBQPVector(unsigned length) :
+ length(length), data(new PBQPNum[length]) {
+ std::fill(data, data + length, 0);
+ }
+
+ //! \brief Copy construct a PBQP vector.
+ PBQPVector(const PBQPVector &v) :
+ length(v.length), data(new PBQPNum[length]) {
+ std::copy(v.data, v.data + length, data);
+ }
+
+ ~PBQPVector() { delete[] data; }
+
+ //! \brief Assignment operator.
+ PBQPVector& operator=(const PBQPVector &v) {
+ delete[] data;
+ length = v.length;
+ data = new PBQPNum[length];
+ std::copy(v.data, v.data + length, data);
+ return *this;
+ }
+
+ //! \brief Return the length of the vector
+ unsigned getLength() const throw () {
+ return length;
+ }
+
+ //! \brief Element access.
+ PBQPNum& operator[](unsigned index) {
+ assert(index < length && "PBQPVector element access out of bounds.");
+ return data[index];
+ }
+
+ //! \brief Const element access.
+ const PBQPNum& operator[](unsigned index) const {
+ assert(index < length && "PBQPVector element access out of bounds.");
+ return data[index];
+ }
+
+ //! \brief Add another vector to this one.
+ PBQPVector& operator+=(const PBQPVector &v) {
+ assert(length == v.length && "PBQPVector length mismatch.");
+ std::transform(data, data + length, v.data, data, std::plus<PBQPNum>());
+ return *this;
+ }
+
+ //! \brief Subtract another vector from this one.
+ PBQPVector& operator-=(const PBQPVector &v) {
+ assert(length == v.length && "PBQPVector length mismatch.");
+ std::transform(data, data + length, v.data, data, std::minus<PBQPNum>());
+ return *this;
+ }
+
+ //! \brief Returns the index of the minimum value in this vector
+ unsigned minIndex() const {
+ return std::min_element(data, data + length) - data;
+ }
+
+private:
+ unsigned length;
+ PBQPNum *data;
+};
+
+
+//! \brief PBQP Matrix class
+class PBQPMatrix {
+public:
+
+ //! \brief Construct a PBQP Matrix with the given dimensions.
+ PBQPMatrix(unsigned rows, unsigned cols) :
+ rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+ std::fill(data, data + (rows * cols), 0);
+ }
+
+ //! \brief Copy construct a PBQP matrix.
+ PBQPMatrix(const PBQPMatrix &m) :
+ rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) {
+ std::copy(m.data, m.data + (rows * cols), data);
+ }
+
+ ~PBQPMatrix() { delete[] data; }
+
+ //! \brief Assignment operator.
+ PBQPMatrix& operator=(const PBQPMatrix &m) {
+ delete[] data;
+ rows = m.rows; cols = m.cols;
+ data = new PBQPNum[rows * cols];
+ std::copy(m.data, m.data + (rows * cols), data);
+ return *this;
+ }
+
+ //! \brief Return the number of rows in this matrix.
+ unsigned getRows() const throw () { return rows; }
+
+ //! \brief Return the number of cols in this matrix.
+ unsigned getCols() const throw () { return cols; }
+
+ //! \brief Matrix element access.
+ PBQPNum* operator[](unsigned r) {
+ assert(r < rows && "Row out of bounds.");
+ return data + (r * cols);
+ }
+
+ //! \brief Matrix element access.
+ const PBQPNum* operator[](unsigned r) const {
+ assert(r < rows && "Row out of bounds.");
+ return data + (r * cols);
+ }
+
+ //! \brief Returns the given row as a vector.
+ PBQPVector getRowAsVector(unsigned r) const {
+ PBQPVector v(cols);
+ for (unsigned c = 0; c < cols; ++c)
+ v[c] = (*this)[r][c];
+ return v;
+ }
+
+ //! \brief Reset the matrix to the given value.
+ PBQPMatrix& reset(PBQPNum val = 0) {
+ std::fill(data, data + (rows * cols), val);
+ return *this;
+ }
+
+ //! \brief Set a single row of this matrix to the given value.
+ PBQPMatrix& setRow(unsigned r, PBQPNum val) {
+ assert(r < rows && "Row out of bounds.");
+ std::fill(data + (r * cols), data + ((r + 1) * cols), val);
+ return *this;
+ }
+
+ //! \brief Set a single column of this matrix to the given value.
+ PBQPMatrix& setCol(unsigned c, PBQPNum val) {
+ assert(c < cols && "Column out of bounds.");
+ for (unsigned r = 0; r < rows; ++r)
+ (*this)[r][c] = val;
+ return *this;
+ }
+
+ //! \brief Matrix transpose.
+ PBQPMatrix transpose() const {
+ PBQPMatrix m(cols, rows);
+ for (unsigned r = 0; r < rows; ++r)
+ for (unsigned c = 0; c < cols; ++c)
+ m[c][r] = (*this)[r][c];
+ return m;
+ }
+
+ //! \brief Returns the diagonal of the matrix as a vector.
+ //!
+ //! Matrix must be square.
+ PBQPVector diagonalize() const {
+ assert(rows == cols && "Attempt to diagonalize non-square matrix.");
+
+ PBQPVector v(rows);
+ for (unsigned r = 0; r < rows; ++r)
+ v[r] = (*this)[r][r];
+ return v;
+ }
+
+ //! \brief Add the given matrix to this one.
+ PBQPMatrix& operator+=(const PBQPMatrix &m) {
+ assert(rows == m.rows && cols == m.cols &&
+ "Matrix dimensions mismatch.");
+ std::transform(data, data + (rows * cols), m.data, data,
+ std::plus<PBQPNum>());
+ return *this;
+ }
+
+ //! \brief Returns the minimum of the given row
+ PBQPNum getRowMin(unsigned r) const {
+ assert(r < rows && "Row out of bounds");
+ return *std::min_element(data + (r * cols), data + ((r + 1) * cols));
+ }
+
+ //! \brief Returns the minimum of the given column
+ PBQPNum getColMin(unsigned c) const {
+ PBQPNum minElem = (*this)[0][c];
+ for (unsigned r = 1; r < rows; ++r)
+ if ((*this)[r][c] < minElem) minElem = (*this)[r][c];
+ return minElem;
+ }
+
+ //! \brief Subtracts the given scalar from the elements of the given row.
+ PBQPMatrix& subFromRow(unsigned r, PBQPNum val) {
+ assert(r < rows && "Row out of bounds");
+ std::transform(data + (r * cols), data + ((r + 1) * cols),
+ data + (r * cols),
+ std::bind2nd(std::minus<PBQPNum>(), val));
+ return *this;
+ }
+
+ //! \brief Subtracts the given scalar from the elements of the given column.
+ PBQPMatrix& subFromCol(unsigned c, PBQPNum val) {
+ for (unsigned r = 0; r < rows; ++r)
+ (*this)[r][c] -= val;
+ return *this;
+ }
+
+ //! \brief Returns true if this is a zero matrix.
+ bool isZero() const {
+ return find_if(data, data + (rows * cols),
+ std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
+ data + (rows * cols);
+ }
+
+private:
+ unsigned rows, cols;
+ PBQPNum *data;
+};
+
+#define EPS (1E-8)
+
+#ifndef PBQP_TYPE
+#define PBQP_TYPE
+struct pbqp;
+typedef struct pbqp pbqp;
+#endif
+
+/*****************
+ * PBQP routines *
+ *****************/
+
+/* allocate pbqp problem */
+pbqp *alloc_pbqp(int num);
+
+/* add node costs */
+void add_pbqp_nodecosts(pbqp *this_,int u, PBQPVector *costs);
+
+/* add edge mat */
+void add_pbqp_edgecosts(pbqp *this_,int u,int v,PBQPMatrix *costs);
+
+/* solve PBQP problem */
+void solve_pbqp(pbqp *this_);
+
+/* get solution of a node */
+int get_pbqp_solution(pbqp *this_,int u);
+
+/* alloc PBQP */
+pbqp *alloc_pbqp(int num);
+
+/* free PBQP */
+void free_pbqp(pbqp *this_);
+
+/* is optimal */
+bool is_pbqp_optimal(pbqp *this_);
+
+}
+#endif
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 0000000..c5c76fc
--- /dev/null
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,431 @@
+//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions. This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phielim"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumAtomic, "Number of atomic phis lowered");
+
+namespace {
+ class VISIBILITY_HIDDEN PNE : public MachineFunctionPass {
+ MachineRegisterInfo *MRI; // Machine register information
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PNE() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ ///
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+ void LowerAtomicPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ ///
+ void analyzePHINodes(const MachineFunction& Fn);
+
+ // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
+ // SrcReg. This needs to be after any def or uses of SrcReg, but before
+ // any subsequent point where control flow might jump out of the basic
+ // block.
+ MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
+ unsigned SrcReg);
+
+ // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
+ // also after any exception handling labels: in landing pads execution
+ // starts at the label, so any copies placed before it won't be executed!
+ MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ // Rather than assuming that EH labels come before other kinds of labels,
+ // just skip all labels.
+ while (I != MBB.end() &&
+ (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel()))
+ ++I;
+ return I;
+ }
+
+ typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair;
+ typedef std::map<BBVRegPair, unsigned> VRegPHIUse;
+
+ VRegPHIUse VRegPHIUseCount;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr*, 4> ImpDefs;
+ };
+}
+
+char PNE::ID = 0;
+static RegisterPass<PNE>
+X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
+
+const PassInfo *const llvm::PHIEliminationID = &X;
+
+bool PNE::runOnMachineFunction(MachineFunction &Fn) {
+ MRI = &Fn.getRegInfo();
+
+ analyzePHINodes(Fn);
+
+ bool Changed = false;
+
+ // Eliminate PHI instructions by inserting copies into predecessor blocks.
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= EliminatePHINodes(Fn, *I);
+
+ // Remove dead IMPLICIT_DEF instructions.
+ for (SmallPtrSet<MachineInstr*,4>::iterator I = ImpDefs.begin(),
+ E = ImpDefs.end(); I != E; ++I) {
+ MachineInstr *DefMI = *I;
+ unsigned DefReg = DefMI->getOperand(0).getReg();
+ if (MRI->use_empty(DefReg))
+ DefMI->eraseFromParent();
+ }
+
+ ImpDefs.clear();
+ VRegPHIUseCount.clear();
+ return Changed;
+}
+
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+///
+bool PNE::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) {
+ if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI)
+ return false; // Quick exit for basic blocks without PHIs.
+
+ // Get an iterator to the first instruction after the last PHI node (this may
+ // also be the end of the basic block).
+ MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin());
+
+ while (MBB.front().getOpcode() == TargetInstrInfo::PHI)
+ LowerAtomicPHINode(MBB, AfterPHIsIt);
+
+ return true;
+}
+
+/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
+/// are implicit_def's.
+static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
+ const MachineRegisterInfo *MRI) {
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ unsigned SrcReg = MPhi->getOperand(i).getReg();
+ const MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (!DefMI || DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF)
+ return false;
+ }
+ return true;
+}
+
+// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg.
+// This needs to be after any def or uses of SrcReg, but before any subsequent
+// point where control flow might jump out of the basic block.
+MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB,
+ unsigned SrcReg) {
+ // Handle the trivial case trivially.
+ if (MBB.empty())
+ return MBB.begin();
+
+ // If this basic block does not contain an invoke, then control flow always
+ // reaches the end of it, so place the copy there. The logic below works in
+ // this case too, but is more expensive.
+ if (!isa<InvokeInst>(MBB.getBasicBlock()->getTerminator()))
+ return MBB.getFirstTerminator();
+
+ // Discover any definition/uses in this basic block.
+ SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+ RE = MRI->reg_end(); RI != RE; ++RI) {
+ MachineInstr *DefUseMI = &*RI;
+ if (DefUseMI->getParent() == &MBB)
+ DefUsesInMBB.insert(DefUseMI);
+ }
+
+ MachineBasicBlock::iterator InsertPoint;
+ if (DefUsesInMBB.empty()) {
+ // No def/uses. Insert the copy at the start of the basic block.
+ InsertPoint = MBB.begin();
+ } else if (DefUsesInMBB.size() == 1) {
+ // Insert the copy immediately after the definition/use.
+ InsertPoint = *DefUsesInMBB.begin();
+ ++InsertPoint;
+ } else {
+ // Insert the copy immediately after the last definition/use.
+ InsertPoint = MBB.end();
+ while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+ ++InsertPoint;
+ }
+
+ // Make sure the copy goes after any phi nodes however.
+ return SkipPHIsAndLabels(MBB, InsertPoint);
+}
+
+/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
+/// under the assuption that it needs to be lowered in a way that supports
+/// atomic execution of PHIs. This lowering method is always correct all of the
+/// time.
+///
+void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt) {
+ // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+ MachineInstr *MPhi = MBB.remove(MBB.begin());
+
+ unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
+ unsigned DestReg = MPhi->getOperand(0).getReg();
+ bool isDead = MPhi->getOperand(0).isDead();
+
+ // Create a new register for the incoming PHI arguments.
+ MachineFunction &MF = *MBB.getParent();
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
+ unsigned IncomingReg = 0;
+
+ // Insert a register to register copy at the top of the current block (but
+ // after any remaining phi nodes) which copies the new incoming register
+ // into the phi node destination.
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ if (isSourceDefinedByImplicitDef(MPhi, MRI))
+ // If all sources of a PHI node are implicit_def, just emit an
+ // implicit_def instead of a copy.
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetInstrInfo::IMPLICIT_DEF), DestReg);
+ else {
+ IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+ TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC);
+ }
+
+ // Update live variable information if there is any.
+ LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
+ if (LV) {
+ MachineInstr *PHICopy = prior(AfterPHIsIt);
+
+ if (IncomingReg) {
+ // Increment use count of the newly created virtual register.
+ LV->getVarInfo(IncomingReg).NumUses++;
+
+ // Add information to LiveVariables to know that the incoming value is
+ // killed. Note that because the value is defined in several places (once
+ // each for each incoming block), the "def" block and instruction fields
+ // for the VarInfo is not filled in.
+ LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+ }
+
+ // Since we are going to be deleting the PHI node, if it is the last use of
+ // any registers, or if the value itself is dead, we need to move this
+ // information over to the new copy we just inserted.
+ LV->removeVirtualRegistersKilled(MPhi);
+
+ // If the result is dead, update LV.
+ if (isDead) {
+ LV->addVirtualRegisterDead(DestReg, PHICopy);
+ LV->removeVirtualRegisterDead(DestReg, MPhi);
+ }
+ }
+
+ // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i + 1).getMBB(),
+ MPhi->getOperand(i).getReg())];
+
+ // Now loop over all of the incoming arguments, changing them to copy into the
+ // IncomingReg register in the corresponding predecessor basic block.
+ SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
+ for (int i = NumSrcs - 1; i >= 0; --i) {
+ unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ // If source is defined by an implicit def, there is no need to insert a
+ // copy.
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ ImpDefs.insert(DefMI);
+ continue;
+ }
+
+ // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+ // path the PHI.
+ MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+ // Check to make sure we haven't already emitted the copy for this block.
+ // This can happen because PHI nodes may have multiple entries for the same
+ // basic block.
+ if (!MBBsInsertedInto.insert(&opBlock))
+ continue; // If the copy has already been emitted, we're done.
+
+ // Find a safe location to insert the copy, this may be the first terminator
+ // in the block (or end()).
+ MachineBasicBlock::iterator InsertPos = FindCopyInsertPoint(opBlock, SrcReg);
+
+ // Insert the copy.
+ TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC);
+
+ // Now update live variable information if we have it. Otherwise we're done
+ if (!LV) continue;
+
+ // We want to be able to insert a kill of the register if this PHI (aka, the
+ // copy we just inserted) is the last use of the source value. Live
+ // variable analysis conservatively handles this by saying that the value is
+ // live until the end of the block the PHI entry lives in. If the value
+ // really is dead at the PHI copy, there will be no successor blocks which
+ // have the value live-in.
+ //
+ // Check to see if the copy is the last use, and if so, update the live
+ // variables information so that it knows the copy source instruction kills
+ // the incoming value.
+ LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg);
+
+ // Loop over all of the successors of the basic block, checking to see if
+ // the value is either live in the block, or if it is killed in the block.
+ // Also check to see if this register is in use by another PHI node which
+ // has not yet been eliminated. If so, it will be killed at an appropriate
+ // point later.
+
+ // Is it used by any PHI instructions in this block?
+ bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0;
+
+ std::vector<MachineBasicBlock*> OpSuccBlocks;
+
+ // Otherwise, scan successors, including the BB the PHI node lives in.
+ for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
+ E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (InRegVI.AliveBlocks.test(SuccIdx)) {
+ ValueIsLive = true;
+ break;
+ }
+
+ OpSuccBlocks.push_back(SuccMBB);
+ }
+
+ // Check to see if this value is live because there is a use in a successor
+ // that kills it.
+ if (!ValueIsLive) {
+ switch (OpSuccBlocks.size()) {
+ case 1: {
+ MachineBasicBlock *MBB = OpSuccBlocks[0];
+ for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+ if (InRegVI.Kills[i]->getParent() == MBB) {
+ ValueIsLive = true;
+ break;
+ }
+ break;
+ }
+ case 2: {
+ MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1];
+ for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+ if (InRegVI.Kills[i]->getParent() == MBB1 ||
+ InRegVI.Kills[i]->getParent() == MBB2) {
+ ValueIsLive = true;
+ break;
+ }
+ break;
+ }
+ default:
+ std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+ for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+ if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+ InRegVI.Kills[i]->getParent())) {
+ ValueIsLive = true;
+ break;
+ }
+ }
+ }
+
+ // Okay, if we now know that the value is not live out of the block, we can
+ // add a kill marker in this block saying that it kills the incoming value!
+ if (!ValueIsLive) {
+ // In our final twist, we have to decide which instruction kills the
+ // register. In most cases this is the copy, however, the first
+ // terminator instruction at the end of the block may also use the value.
+ // In this case, we should mark *it* as being the killing block, not the
+ // copy.
+ MachineBasicBlock::iterator KillInst = prior(InsertPos);
+ MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
+ if (Term != opBlock.end()) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+
+ // Check that no other terminators use values.
+#ifndef NDEBUG
+ for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end();
+ ++TI) {
+ assert(!TI->readsRegister(SrcReg) &&
+ "Terminator instructions cannot use virtual registers unless"
+ "they are the first terminator in a block!");
+ }
+#endif
+ }
+
+ // Finally, mark it killed.
+ LV->addVirtualRegisterKilled(SrcReg, KillInst);
+
+ // This vreg no longer lives all of the way through opBlock.
+ unsigned opBlockNum = opBlock.getNumber();
+ InRegVI.AliveBlocks.reset(opBlockNum);
+ }
+ }
+
+ // Really delete the PHI instruction now!
+ MF.DeleteMachineInstr(MPhi);
+ ++NumAtomic;
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+///
+void PNE::analyzePHINodes(const MachineFunction& Fn) {
+ for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(),
+ BBI->getOperand(i).getReg())];
+}
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
new file mode 100644
index 0000000..f67eb79
--- /dev/null
+++ b/lib/CodeGen/Passes.cpp
@@ -0,0 +1,54 @@
+//===-- Passes.cpp - Target independent code generation passes ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterRegAlloc class - Track the registration of register allocators.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterRegAlloc::Registry;
+
+
+//===---------------------------------------------------------------------===//
+///
+/// RegAlloc command line options.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+ cl::init(&createLinearScanRegisterAllocator),
+ cl::desc("Register allocator to use: (default = linearscan)"));
+
+
+//===---------------------------------------------------------------------===//
+///
+/// createRegisterAllocator - choose the appropriate register allocator.
+///
+//===---------------------------------------------------------------------===//
+FunctionPass *llvm::createRegisterAllocator() {
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = RegAlloc;
+ RegisterRegAlloc::setDefault(RegAlloc);
+ }
+
+ return Ctor();
+}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 0000000..de774685
--- /dev/null
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,941 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumNoops, "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static cl::opt<bool>
+EnableAntiDepBreaking("break-anti-dependencies",
+ cl::desc("Break post-RA scheduling anti-dependencies"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+EnablePostRAHazardAvoidance("avoid-hazards",
+ cl::desc("Enable simple hazard-avoidance"),
+ cl::init(true), cl::Hidden);
+
+namespace {
+ class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass {
+ public:
+ static char ID;
+ PostRAScheduler() : MachineFunctionPass(&ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const {
+ return "Post RA top-down list latency scheduler";
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+ };
+ char PostRAScheduler::ID = 0;
+
+ class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs {
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ LatencyPriorityQueue AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// Topo - A topological ordering for SUnits.
+ ScheduleDAGTopologicalSort Topo;
+
+ /// AllocatableSet - The set of allocatable registers.
+ /// We'll be ignoring anti-dependencies on non-allocatable registers,
+ /// because they may not be safe to break.
+ const BitVector AllocatableSet;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// Classes - For live regs that are only used in one register class in a
+ /// live range, the register class. If the register is not live, the
+ /// corresponding value is null. If the register is live but used in
+ /// multiple register classes, the corresponding value is -1 casted to a
+ /// pointer.
+ const TargetRegisterClass *
+ Classes[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// RegRegs - Map registers to all their references within a live range.
+ std::multimap<unsigned, MachineOperand *> RegRefs;
+
+ /// The index of the most recent kill (proceding bottom-up), or ~0u if
+ /// the register is not live.
+ unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// The index of the most recent complete def (proceding bottom up), or ~0u
+ /// if the register is live.
+ unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
+
+ public:
+ SchedulePostRATDList(MachineFunction &MF,
+ const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT,
+ ScheduleHazardRecognizer *HR)
+ : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
+ AllocatableSet(TRI->getAllocatableSet(MF)),
+ HazardRec(HR) {}
+
+ ~SchedulePostRATDList() {
+ delete HazardRec;
+ }
+
+ /// StartBlock - Initialize register live-range state for scheduling in
+ /// this block.
+ ///
+ void StartBlock(MachineBasicBlock *BB);
+
+ /// Schedule - Schedule the instruction range using list scheduling.
+ ///
+ void Schedule();
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr *MI, unsigned Count);
+
+ /// FinishBlock - Clean up register live-range state.
+ ///
+ void FinishBlock();
+
+ private:
+ void PrescanInstruction(MachineInstr *MI);
+ void ScanInstruction(MachineInstr *MI, unsigned Count);
+ void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+ bool BreakAntiDependencies();
+ };
+
+ /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
+ /// a coarse classification and attempts to avoid that instructions of
+ /// a given class aren't grouped too densely together.
+ class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
+ /// Class - A simple classification for SUnits.
+ enum Class {
+ Other, Load, Store
+ };
+
+ /// Window - The Class values of the most recently issued
+ /// instructions.
+ Class Window[8];
+
+ /// getClass - Classify the given SUnit.
+ Class getClass(const SUnit *SU) {
+ const MachineInstr *MI = SU->getInstr();
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.mayLoad())
+ return Load;
+ if (TID.mayStore())
+ return Store;
+ return Other;
+ }
+
+ /// Step - Rotate the existing entries in Window and insert the
+ /// given class value in position as the most recent.
+ void Step(Class C) {
+ std::copy(Window+1, array_endof(Window), Window);
+ Window[array_lengthof(Window)-1] = C;
+ }
+
+ public:
+ SimpleHazardRecognizer() : Window() {}
+
+ virtual HazardType getHazardType(SUnit *SU) {
+ Class C = getClass(SU);
+ if (C == Other)
+ return NoHazard;
+ unsigned Score = 0;
+ for (unsigned i = 0; i != array_lengthof(Window); ++i)
+ if (Window[i] == C)
+ Score += i + 1;
+ if (Score > array_lengthof(Window) * 2)
+ return Hazard;
+ return NoHazard;
+ }
+
+ virtual void EmitInstruction(SUnit *SU) {
+ Step(getClass(SU));
+ }
+
+ virtual void AdvanceCycle() {
+ Step(Other);
+ }
+ };
+}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+///
+static bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineFunction &MF) {
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Don't attempt to schedule around any instruction that modifies
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+ if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+ return true;
+
+ return false;
+}
+
+bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+ DOUT << "PostRAScheduler\n";
+
+ const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
+ new SimpleHazardRecognizer :
+ new ScheduleHazardRecognizer();
+
+ SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR);
+
+ // Loop over all of the basic blocks
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ // Initialize register live-range state for scheduling in this block.
+ Scheduler.StartBlock(MBB);
+
+ // Schedule each sequence of instructions not interrupted by a label
+ // or anything else that effectively needs to shut down scheduling.
+ MachineBasicBlock::iterator Current = MBB->end();
+ unsigned Count = MBB->size(), CurrentCount = Count;
+ for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
+ MachineInstr *MI = prior(I);
+ if (isSchedulingBoundary(MI, Fn)) {
+ Scheduler.Run(MBB, I, Current, CurrentCount);
+ Scheduler.EmitSchedule();
+ Current = MI;
+ CurrentCount = Count - 1;
+ Scheduler.Observe(MI, CurrentCount);
+ }
+ I = MI;
+ --Count;
+ }
+ assert(Count == 0 && "Instruction count mismatch!");
+ assert((MBB->begin() == Current || CurrentCount != 0) &&
+ "Instruction count mismatch!");
+ Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.EmitSchedule();
+
+ // Clean up register live-range state.
+ Scheduler.FinishBlock();
+ }
+
+ return true;
+}
+
+/// StartBlock - Initialize register live-range state for scheduling in
+/// this block.
+///
+void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+ // Call the superclass.
+ ScheduleDAGInstrs::StartBlock(BB);
+
+ // Clear out the register class data.
+ std::fill(Classes, array_endof(Classes),
+ static_cast<const TargetRegisterClass *>(0));
+
+ // Initialize the indices to indicate that no registers are live.
+ std::fill(KillIndices, array_endof(KillIndices), ~0u);
+ std::fill(DefIndices, array_endof(DefIndices), BB->size());
+
+ // Determine the live-out physregs for this block.
+ if (!BB->empty() && BB->back().getDesc().isReturn())
+ // In a return block, examine the function live-out regs.
+ for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+ E = MRI.liveout_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ else
+ // In a non-return block, examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+
+ // Consider callee-saved registers as live-out, since we're running after
+ // prologue/epilogue insertion so there's no way to add additional
+ // saved registers.
+ //
+ // TODO: If the callee saves and restores these, then we can potentially
+ // use them between the save and the restore. To do that, we could scan
+ // the exit blocks to see which of these registers are defined.
+ // Alternatively, callee-saved registers that aren't saved and restored
+ // could be marked live-in in every block.
+ for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+}
+
+/// Schedule - Schedule the instruction range using list scheduling.
+///
+void SchedulePostRATDList::Schedule() {
+ DOUT << "********** List Scheduling **********\n";
+
+ // Build the scheduling graph.
+ BuildSchedGraph();
+
+ if (EnableAntiDepBreaking) {
+ if (BreakAntiDependencies()) {
+ // We made changes. Update the dependency graph.
+ // Theoretically we could update the graph in place:
+ // When a live range is changed to use a different register, remove
+ // the def's anti-dependence *and* output-dependence edges due to
+ // that register, and add new anti-dependence and output-dependence
+ // edges based on the next live range of the register.
+ SUnits.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+ BuildSchedGraph();
+ }
+ }
+
+ AvailableQueue.initNodes(SUnits);
+
+ ListScheduleTopDown();
+
+ AvailableQueue.releaseState();
+}
+
+/// Observe - Update liveness information to account for the current
+/// instruction, which will not be scheduled.
+///
+void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ // Any register which was defined within the previous scheduling region
+ // may have been rescheduled and its lifetime may overlap with registers
+ // in ways not reflected in our current liveness state. For each such
+ // register, adjust the liveness state to be conservatively correct.
+ for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg)
+ if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+ assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
+ // Mark this register to be non-renamable.
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ // Move the def index to the end of the previous region, to reflect
+ // that the def could theoretically have been scheduled at the end.
+ DefIndices[Reg] = InsertPosIndex;
+ }
+
+ PrescanInstruction(MI);
+ ScanInstruction(MI, Count);
+}
+
+/// FinishBlock - Clean up register live-range state.
+///
+void SchedulePostRATDList::FinishBlock() {
+ RegRefs.clear();
+
+ // Call the superclass.
+ ScheduleDAGInstrs::FinishBlock();
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SDep *CriticalPathStep(SUnit *SU) {
+ SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ return Next;
+}
+
+void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) {
+ // Scan the register operands for this instruction and update
+ // Classes and RegRefs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ const TargetRegisterClass *NewRC =
+ getInstrOperandRegClass(TRI, MI->getDesc(), i);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Now check for aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ // If an alias of the reg is used during the live range, give up.
+ // Note that this allows us to skip checking if AntiDepReg
+ // overlaps with any of the aliases, among other things.
+ unsigned AliasReg = *Alias;
+ if (Classes[AliasReg]) {
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+
+ // If we're still willing to consider this register, note the reference.
+ if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
+ RegRefs.insert(std::make_pair(Reg, &MO));
+ }
+}
+
+void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
+ unsigned Count) {
+ // Update liveness.
+ // Proceding upwards, registers that are defed but not used in this
+ // instruction are now dead.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ DefIndices[Reg] = Count;
+ KillIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ Classes[Reg] = 0;
+ RegRefs.erase(Reg);
+ // Repeat, for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ unsigned SubregReg = *Subreg;
+ DefIndices[SubregReg] = Count;
+ KillIndices[SubregReg] = ~0u;
+ Classes[SubregReg] = 0;
+ RegRefs.erase(SubregReg);
+ }
+ // Conservatively mark super-registers as unusable.
+ for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+ *Super; ++Super) {
+ unsigned SuperReg = *Super;
+ Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isUse()) continue;
+
+ const TargetRegisterClass *NewRC =
+ getInstrOperandRegClass(TRI, MI->getDesc(), i);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ // It wasn't previously live but now it is, this is a kill.
+ if (KillIndices[Reg] == ~0u) {
+ KillIndices[Reg] = Count;
+ DefIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ }
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ if (KillIndices[AliasReg] == ~0u) {
+ KillIndices[AliasReg] = Count;
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ }
+}
+
+/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
+/// of the ScheduleDAG and break them by renaming registers.
+///
+bool SchedulePostRATDList::BreakAntiDependencies() {
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return false;
+
+ // Find the node at the bottom of the critical path.
+ SUnit *Max = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
+ Max = SU;
+ }
+
+ DOUT << "Critical path has total latency "
+ << (Max->getDepth() + Max->Latency) << "\n";
+
+ // Track progress along the critical path through the SUnit graph as we walk
+ // the instructions.
+ SUnit *CriticalPathSU = Max;
+ MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
+
+ // Consider this pattern:
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // There are three anti-dependencies here, and without special care,
+ // we'd break all of them using the same register:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // because at each anti-dependence, B is the first register that
+ // isn't A which is free. This re-introduces anti-dependencies
+ // at all but one of the original anti-dependencies that we were
+ // trying to break. To avoid this, keep track of the most recent
+ // register that each register was replaced with, avoid avoid
+ // using it to repair an anti-dependence on the same register.
+ // This lets us produce this:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // C = ...
+ // ... = C
+ // B = ...
+ // ... = B
+ // This still has an anti-dependence on B, but at least it isn't on the
+ // original critical path.
+ //
+ // TODO: If we tracked more than one register here, we could potentially
+ // fix that remaining critical edge too. This is a little more involved,
+ // because unlike the most recent register, less recent registers should
+ // still be considered, though only if no other registers are available.
+ unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};
+
+ // Attempt to break anti-dependence edges on the critical path. Walk the
+ // instructions from the bottom up, tracking information about liveness
+ // as we go to help determine which registers are available.
+ bool Changed = false;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = InsertPos, E = Begin;
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+
+ // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as
+ // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF
+ // is left behind appearing to clobber the super-register, while the
+ // subregister needs to remain live. So we just ignore them.
+ if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+ continue;
+
+ // Check if this instruction has a dependence on the critical path that
+ // is an anti-dependence that we may be able to break. If it is, set
+ // AntiDepReg to the non-zero register associated with the anti-dependence.
+ //
+ // We limit our attention to the critical path as a heuristic to avoid
+ // breaking anti-dependence edges that aren't going to significantly
+ // impact the overall schedule. There are a limited number of registers
+ // and we want to save them for the important edges.
+ //
+ // TODO: Instructions with multiple defs could have multiple
+ // anti-dependencies. The current code here only knows how to break one
+ // edge per instruction. Note that we'd have to be able to break all of
+ // the anti-dependencies in an instruction in order to be effective.
+ unsigned AntiDepReg = 0;
+ if (MI == CriticalPathMI) {
+ if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+ SUnit *NextSU = Edge->getSUnit();
+
+ // Only consider anti-dependence edges.
+ if (Edge->getKind() == SDep::Anti) {
+ AntiDepReg = Edge->getReg();
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+ // Don't break anti-dependencies on non-allocatable registers.
+ if (!AllocatableSet.test(AntiDepReg))
+ AntiDepReg = 0;
+ else {
+ // If the SUnit has other dependencies on the SUnit that it
+ // anti-depends on, don't bother breaking the anti-dependency
+ // since those edges would prevent such units from being
+ // scheduled past each other regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
+ PE = CriticalPathSU->Preds.end(); P != PE; ++P)
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ }
+ CriticalPathSU = NextSU;
+ CriticalPathMI = CriticalPathSU->getInstr();
+ } else {
+ // We've reached the end of the critical path.
+ CriticalPathSU = 0;
+ CriticalPathMI = 0;
+ }
+ }
+
+ PrescanInstruction(MI);
+
+ // If this instruction has a use of AntiDepReg, breaking it
+ // is invalid.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (MO.isUse() && AntiDepReg == Reg) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+
+ // Determine AntiDepReg's register class, if it is live and is
+ // consistently used within a single class.
+ const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
+ assert((AntiDepReg == 0 || RC != NULL) &&
+ "Register should be live if it's causing an anti-dependence!");
+ if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
+ AntiDepReg = 0;
+
+ // Look for a suitable register to use to break the anti-depenence.
+ //
+ // TODO: Instead of picking the first free register, consider which might
+ // be the best.
+ if (AntiDepReg != 0) {
+ for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
+ RE = RC->allocation_order_end(MF); R != RE; ++R) {
+ unsigned NewReg = *R;
+ // Don't replace a register with itself.
+ if (NewReg == AntiDepReg) continue;
+ // Don't replace a register with one that was recently used to repair
+ // an anti-dependence with this AntiDepReg, because that would
+ // re-introduce that anti-dependence.
+ if (NewReg == LastNewReg[AntiDepReg]) continue;
+ // If NewReg is dead and NewReg's most recent def is not before
+ // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+ assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+ assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for NewReg!");
+ if (KillIndices[NewReg] == ~0u &&
+ Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) &&
+ KillIndices[AntiDepReg] <= DefIndices[NewReg]) {
+ DOUT << "Breaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg)
+ << " with " << RegRefs.count(AntiDepReg) << " references"
+ << " using " << TRI->getName(NewReg) << "!\n";
+
+ // Update the references to the old register to refer to the new
+ // register.
+ std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+ std::multimap<unsigned, MachineOperand *>::iterator>
+ Range = RegRefs.equal_range(AntiDepReg);
+ for (std::multimap<unsigned, MachineOperand *>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q)
+ Q->second->setReg(NewReg);
+
+ // We just went back in time and modified history; the
+ // liveness information for the anti-depenence reg is now
+ // inconsistent. Set the state as if it were dead.
+ Classes[NewReg] = Classes[AntiDepReg];
+ DefIndices[NewReg] = DefIndices[AntiDepReg];
+ KillIndices[NewReg] = KillIndices[AntiDepReg];
+ assert(((KillIndices[NewReg] == ~0u) !=
+ (DefIndices[NewReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for NewReg!");
+
+ Classes[AntiDepReg] = 0;
+ DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+ KillIndices[AntiDepReg] = ~0u;
+ assert(((KillIndices[AntiDepReg] == ~0u) !=
+ (DefIndices[AntiDepReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+
+ RegRefs.erase(AntiDepReg);
+ Changed = true;
+ LastNewReg[AntiDepReg] = NewReg;
+ break;
+ }
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+ --SuccSU->NumPredsLeft;
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft < 0) {
+ cerr << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ // Compute how many cycles it will be before this actually becomes
+ // available. This is the max of the start time of all predecessors plus
+ // their latencies.
+ SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ ReleaseSucc(SU, &*I);
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue.ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void SchedulePostRATDList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue.push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ unsigned MinDepth = ~0u;
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() <= CurCycle) {
+ AvailableQueue.push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else if (PendingQueue[i]->getDepth() < MinDepth)
+ MinDepth = PendingQueue[i]->getDepth();
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue.empty()) {
+ CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1;
+ continue;
+ }
+
+ SUnit *FoundSUnit = 0;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue.empty()) {
+ SUnit *CurSUnit = AvailableQueue.pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue.push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ DOUT << "*** Advancing cycle, no work to do\n";
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DOUT << "*** Emitting noop\n";
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createPostRAScheduler() {
+ return new PostRAScheduler();
+}
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
new file mode 100644
index 0000000..97d4728
--- /dev/null
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -0,0 +1,1485 @@
+//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level pre-register allocation
+// live interval splitting pass. It finds live interval barriers, i.e.
+// instructions which will kill all physical registers in certain register
+// classes, and split all live intervals which cross the barrier.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-alloc-split"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden);
+static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), cl::Hidden);
+static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumSplits, "Number of intervals split");
+STATISTIC(NumRemats, "Number of intervals split by rematerialization");
+STATISTIC(NumFolds, "Number of intervals split with spill folding");
+STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding");
+STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers");
+STATISTIC(NumDeadSpills, "Number of dead spills removed");
+
+namespace {
+ class VISIBILITY_HIDDEN PreAllocSplitting : public MachineFunctionPass {
+ MachineFunction *CurrMF;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo* TRI;
+ MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIs;
+ LiveStacks *LSs;
+ VirtRegMap *VRM;
+
+ // Barrier - Current barrier being processed.
+ MachineInstr *Barrier;
+
+ // BarrierMBB - Basic block where the barrier resides in.
+ MachineBasicBlock *BarrierMBB;
+
+ // Barrier - Current barrier index.
+ unsigned BarrierIdx;
+
+ // CurrLI - Current live interval being split.
+ LiveInterval *CurrLI;
+
+ // CurrSLI - Current stack slot live interval.
+ LiveInterval *CurrSLI;
+
+ // CurrSValNo - Current val# for the stack slot live interval.
+ VNInfo *CurrSValNo;
+
+ // IntervalSSMap - A map from live interval to spill slots.
+ DenseMap<unsigned, int> IntervalSSMap;
+
+ // Def2SpillMap - A map from a def instruction index to spill index.
+ DenseMap<unsigned, unsigned> Def2SpillMap;
+
+ public:
+ static char ID;
+ PreAllocSplitting() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addPreserved<RegisterCoalescer>();
+ if (StrongPHIElim)
+ AU.addPreservedID(StrongPHIEliminationID);
+ else
+ AU.addPreservedID(PHIEliminationID);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ IntervalSSMap.clear();
+ Def2SpillMap.clear();
+ }
+
+ virtual const char *getPassName() const {
+ return "Pre-Register Allocaton Live Interval Splitting";
+ }
+
+ /// print - Implement the dump method.
+ virtual void print(std::ostream &O, const Module* M = 0) const {
+ LIs->print(O, M);
+ }
+
+ void print(std::ostream *O, const Module* M = 0) const {
+ if (O) print(*O, M);
+ }
+
+ private:
+ MachineBasicBlock::iterator
+ findNextEmptySlot(MachineBasicBlock*, MachineInstr*,
+ unsigned&);
+
+ MachineBasicBlock::iterator
+ findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
+ SmallPtrSet<MachineInstr*, 4>&, unsigned&);
+
+ MachineBasicBlock::iterator
+ findRestorePoint(MachineBasicBlock*, MachineInstr*, unsigned,
+ SmallPtrSet<MachineInstr*, 4>&, unsigned&);
+
+ int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
+
+ bool IsAvailableInStack(MachineBasicBlock*, unsigned, unsigned, unsigned,
+ unsigned&, int&) const;
+
+ void UpdateSpillSlotInterval(VNInfo*, unsigned, unsigned);
+
+ bool SplitRegLiveInterval(LiveInterval*);
+
+ bool SplitRegLiveIntervals(const TargetRegisterClass **,
+ SmallPtrSet<LiveInterval*, 8>&);
+
+ bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB,
+ MachineBasicBlock* BarrierMBB);
+ bool Rematerialize(unsigned vreg, VNInfo* ValNo,
+ MachineInstr* DefMI,
+ MachineBasicBlock::iterator RestorePt,
+ unsigned RestoreIdx,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+ MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
+ MachineInstr* DefMI,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int& SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+ MachineInstr* FoldRestore(unsigned vreg,
+ const TargetRegisterClass* RC,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+ void RenumberValno(VNInfo* VN);
+ void ReconstructLiveInterval(LiveInterval* LI);
+ bool removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split);
+ unsigned getNumberOfNonSpills(SmallPtrSet<MachineInstr*, 4>& MIs,
+ unsigned Reg, int FrameIndex, bool& TwoAddr);
+ VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock);
+ VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock);
+};
+} // end anonymous namespace
+
+char PreAllocSplitting::ID = 0;
+
+static RegisterPass<PreAllocSplitting>
+X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting");
+
+const PassInfo *const llvm::PreAllocSplittingID = &X;
+
+
+/// findNextEmptySlot - Find a gap after the given machine instruction in the
+/// instruction index map. If there isn't one, return end().
+MachineBasicBlock::iterator
+PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI,
+ unsigned &SpotIndex) {
+ MachineBasicBlock::iterator MII = MI;
+ if (++MII != MBB->end()) {
+ unsigned Index = LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII));
+ if (Index) {
+ SpotIndex = Index;
+ return MII;
+ }
+ }
+ return MBB->end();
+}
+
+/// findSpillPoint - Find a gap as far away from the given MI that's suitable
+/// for spilling the current live interval. The index must be before any
+/// defs and uses of the live interval register in the mbb. Return begin() if
+/// none is found.
+MachineBasicBlock::iterator
+PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
+ MachineInstr *DefMI,
+ SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
+ unsigned &SpillIndex) {
+ MachineBasicBlock::iterator Pt = MBB->begin();
+
+ MachineBasicBlock::iterator MII = MI;
+ MachineBasicBlock::iterator EndPt = DefMI
+ ? MachineBasicBlock::iterator(DefMI) : MBB->begin();
+
+ while (MII != EndPt && !RefsInMBB.count(MII) &&
+ MII->getOpcode() != TRI->getCallFrameSetupOpcode())
+ --MII;
+ if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+
+ while (MII != EndPt && !RefsInMBB.count(MII)) {
+ unsigned Index = LIs->getInstructionIndex(MII);
+
+ // We can't insert the spill between the barrier (a call), and its
+ // corresponding call frame setup.
+ if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) {
+ while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) {
+ --MII;
+ if (MII == EndPt) {
+ return Pt;
+ }
+ }
+ continue;
+ } else if (LIs->hasGapBeforeInstr(Index)) {
+ Pt = MII;
+ SpillIndex = LIs->findGapBeforeInstr(Index, true);
+ }
+
+ if (RefsInMBB.count(MII))
+ return Pt;
+
+
+ --MII;
+ }
+
+ return Pt;
+}
+
+/// findRestorePoint - Find a gap in the instruction index map that's suitable
+/// for restoring the current live interval value. The index must be before any
+/// uses of the live interval register in the mbb. Return end() if none is
+/// found.
+MachineBasicBlock::iterator
+PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
+ unsigned LastIdx,
+ SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
+ unsigned &RestoreIndex) {
+ // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
+ // begin index accordingly.
+ MachineBasicBlock::iterator Pt = MBB->end();
+ MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator();
+
+ // We start at the call, so walk forward until we find the call frame teardown
+ // since we can't insert restores before that. Bail if we encounter a use
+ // during this time.
+ MachineBasicBlock::iterator MII = MI;
+ if (MII == EndPt) return Pt;
+
+ while (MII != EndPt && !RefsInMBB.count(MII) &&
+ MII->getOpcode() != TRI->getCallFrameDestroyOpcode())
+ ++MII;
+ if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+ ++MII;
+
+ // FIXME: Limit the number of instructions to examine to reduce
+ // compile time?
+ while (MII != EndPt) {
+ unsigned Index = LIs->getInstructionIndex(MII);
+ if (Index > LastIdx)
+ break;
+ unsigned Gap = LIs->findGapBeforeInstr(Index);
+
+ // We can't insert a restore between the barrier (a call) and its
+ // corresponding call frame teardown.
+ if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) {
+ do {
+ if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+ ++MII;
+ } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
+ } else if (Gap) {
+ Pt = MII;
+ RestoreIndex = Gap;
+ }
+
+ if (RefsInMBB.count(MII))
+ return Pt;
+
+ ++MII;
+ }
+
+ return Pt;
+}
+
+/// CreateSpillStackSlot - Create a stack slot for the live interval being
+/// split. If the live interval was previously split, just reuse the same
+/// slot.
+int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
+ const TargetRegisterClass *RC) {
+ int SS;
+ DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
+ if (I != IntervalSSMap.end()) {
+ SS = I->second;
+ } else {
+ SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+ IntervalSSMap[Reg] = SS;
+ }
+
+ // Create live interval for stack slot.
+ CurrSLI = &LSs->getOrCreateInterval(SS, RC);
+ if (CurrSLI->hasAtLeastOneValue())
+ CurrSValNo = CurrSLI->getValNumInfo(0);
+ else
+ CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator());
+ return SS;
+}
+
+/// IsAvailableInStack - Return true if register is available in a split stack
+/// slot at the specified index.
+bool
+PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
+ unsigned Reg, unsigned DefIndex,
+ unsigned RestoreIndex, unsigned &SpillIndex,
+ int& SS) const {
+ if (!DefMBB)
+ return false;
+
+ DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
+ if (I == IntervalSSMap.end())
+ return false;
+ DenseMap<unsigned, unsigned>::iterator II = Def2SpillMap.find(DefIndex);
+ if (II == Def2SpillMap.end())
+ return false;
+
+ // If last spill of def is in the same mbb as barrier mbb (where restore will
+ // be), make sure it's not below the intended restore index.
+ // FIXME: Undo the previous spill?
+ assert(LIs->getMBBFromIndex(II->second) == DefMBB);
+ if (DefMBB == BarrierMBB && II->second >= RestoreIndex)
+ return false;
+
+ SS = I->second;
+ SpillIndex = II->second;
+ return true;
+}
+
+/// UpdateSpillSlotInterval - Given the specified val# of the register live
+/// interval being split, and the spill and restore indicies, update the live
+/// interval of the spill stack slot.
+void
+PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex,
+ unsigned RestoreIndex) {
+ assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB &&
+ "Expect restore in the barrier mbb");
+
+ MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex);
+ if (MBB == BarrierMBB) {
+ // Intra-block spill + restore. We are done.
+ LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ return;
+ }
+
+ SmallPtrSet<MachineBasicBlock*, 4> Processed;
+ unsigned EndIdx = LIs->getMBBEndIdx(MBB);
+ LiveRange SLR(SpillIndex, EndIdx+1, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ Processed.insert(MBB);
+
+ // Start from the spill mbb, figure out the extend of the spill slot's
+ // live interval.
+ SmallVector<MachineBasicBlock*, 4> WorkList;
+ const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex);
+ if (LR->end > EndIdx)
+ // If live range extend beyond end of mbb, add successors to work list.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ WorkList.push_back(*SI);
+
+ while (!WorkList.empty()) {
+ MachineBasicBlock *MBB = WorkList.back();
+ WorkList.pop_back();
+ if (Processed.count(MBB))
+ continue;
+ unsigned Idx = LIs->getMBBStartIdx(MBB);
+ LR = CurrLI->getLiveRangeContaining(Idx);
+ if (LR && LR->valno == ValNo) {
+ EndIdx = LIs->getMBBEndIdx(MBB);
+ if (Idx <= RestoreIndex && RestoreIndex < EndIdx) {
+ // Spill slot live interval stops at the restore.
+ LiveRange SLR(Idx, RestoreIndex, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ } else if (LR->end > EndIdx) {
+ // Live range extends beyond end of mbb, process successors.
+ LiveRange SLR(Idx, EndIdx+1, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ WorkList.push_back(*SI);
+ } else {
+ LiveRange SLR(Idx, LR->end, CurrSValNo);
+ CurrSLI->addRange(SLR);
+ }
+ Processed.insert(MBB);
+ }
+ }
+}
+
+/// PerformPHIConstruction - From properly set up use and def lists, use a PHI
+/// construction algorithm to compute the ranges and valnos for an interval.
+VNInfo*
+PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock) {
+ // Return memoized result if it's available.
+ if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI))
+ return NewVNs[UseI];
+ else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI))
+ return NewVNs[UseI];
+ else if (!IsIntraBlock && LiveOut.count(MBB))
+ return LiveOut[MBB];
+
+ // Check if our block contains any uses or defs.
+ bool ContainsDefs = Defs.count(MBB);
+ bool ContainsUses = Uses.count(MBB);
+
+ VNInfo* RetVNI = 0;
+
+ // Enumerate the cases of use/def contaning blocks.
+ if (!ContainsDefs && !ContainsUses) {
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses,
+ NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+ } else if (ContainsDefs && !ContainsUses) {
+ SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
+
+ // Search for the def in this block. If we don't find it before the
+ // instruction we care about, go to the fallback case. Note that that
+ // should never happen: this cannot be intrablock, so use should
+ // always be an end() iterator.
+ assert(UseI == MBB->end() && "No use marked in intrablock");
+
+ MachineBasicBlock::iterator Walker = UseI;
+ --Walker;
+ while (Walker != MBB->begin()) {
+ if (BlockDefs.count(Walker))
+ break;
+ --Walker;
+ }
+
+ // Once we've found it, extend its VNInfo to our instruction.
+ unsigned DefIndex = LIs->getInstructionIndex(Walker);
+ DefIndex = LiveIntervals::getDefIndex(DefIndex);
+ unsigned EndIndex = LIs->getMBBEndIdx(MBB);
+
+ RetVNI = NewVNs[Walker];
+ LI->addRange(LiveRange(DefIndex, EndIndex+1, RetVNI));
+ } else if (!ContainsDefs && ContainsUses) {
+ SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
+
+ // Search for the use in this block that precedes the instruction we care
+ // about, going to the fallback case if we don't find it.
+ if (UseI == MBB->begin())
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+ Uses, NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+
+ MachineBasicBlock::iterator Walker = UseI;
+ --Walker;
+ bool found = false;
+ while (Walker != MBB->begin()) {
+ if (BlockUses.count(Walker)) {
+ found = true;
+ break;
+ }
+ --Walker;
+ }
+
+ // Must check begin() too.
+ if (!found) {
+ if (BlockUses.count(Walker))
+ found = true;
+ else
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+ Uses, NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+ }
+
+ unsigned UseIndex = LIs->getInstructionIndex(Walker);
+ UseIndex = LiveIntervals::getUseIndex(UseIndex);
+ unsigned EndIndex = 0;
+ if (IsIntraBlock) {
+ EndIndex = LIs->getInstructionIndex(UseI);
+ EndIndex = LiveIntervals::getUseIndex(EndIndex);
+ } else
+ EndIndex = LIs->getMBBEndIdx(MBB);
+
+ // Now, recursively phi construct the VNInfo for the use we found,
+ // and then extend it to include the instruction we care about
+ RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
+ NewVNs, LiveOut, Phis, false, true);
+
+ LI->addRange(LiveRange(UseIndex, EndIndex+1, RetVNI));
+
+ // FIXME: Need to set kills properly for inter-block stuff.
+ if (LI->isKill(RetVNI, UseIndex)) LI->removeKill(RetVNI, UseIndex);
+ if (IsIntraBlock)
+ LI->addKill(RetVNI, EndIndex);
+ } else if (ContainsDefs && ContainsUses) {
+ SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
+ SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
+
+ // This case is basically a merging of the two preceding case, with the
+ // special note that checking for defs must take precedence over checking
+ // for uses, because of two-address instructions.
+
+ if (UseI == MBB->begin())
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses,
+ NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+
+ MachineBasicBlock::iterator Walker = UseI;
+ --Walker;
+ bool foundDef = false;
+ bool foundUse = false;
+ while (Walker != MBB->begin()) {
+ if (BlockDefs.count(Walker)) {
+ foundDef = true;
+ break;
+ } else if (BlockUses.count(Walker)) {
+ foundUse = true;
+ break;
+ }
+ --Walker;
+ }
+
+ // Must check begin() too.
+ if (!foundDef && !foundUse) {
+ if (BlockDefs.count(Walker))
+ foundDef = true;
+ else if (BlockUses.count(Walker))
+ foundUse = true;
+ else
+ return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+ Uses, NewVNs, LiveOut, Phis,
+ IsTopLevel, IsIntraBlock);
+ }
+
+ unsigned StartIndex = LIs->getInstructionIndex(Walker);
+ StartIndex = foundDef ? LiveIntervals::getDefIndex(StartIndex) :
+ LiveIntervals::getUseIndex(StartIndex);
+ unsigned EndIndex = 0;
+ if (IsIntraBlock) {
+ EndIndex = LIs->getInstructionIndex(UseI);
+ EndIndex = LiveIntervals::getUseIndex(EndIndex);
+ } else
+ EndIndex = LIs->getMBBEndIdx(MBB);
+
+ if (foundDef)
+ RetVNI = NewVNs[Walker];
+ else
+ RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
+ NewVNs, LiveOut, Phis, false, true);
+
+ LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI));
+
+ if (foundUse && LI->isKill(RetVNI, StartIndex))
+ LI->removeKill(RetVNI, StartIndex);
+ if (IsIntraBlock) {
+ LI->addKill(RetVNI, EndIndex);
+ }
+ }
+
+ // Memoize results so we don't have to recompute them.
+ if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
+ else {
+ if (!NewVNs.count(UseI))
+ NewVNs[UseI] = RetVNI;
+ Visited.insert(UseI);
+ }
+
+ return RetVNI;
+}
+
+/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path.
+///
+VNInfo*
+PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI,
+ MachineBasicBlock* MBB, LiveInterval* LI,
+ SmallPtrSet<MachineInstr*, 4>& Visited,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+ DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+ DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+ DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+ DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+ bool IsTopLevel, bool IsIntraBlock) {
+ // NOTE: Because this is the fallback case from other cases, we do NOT
+ // assume that we are not intrablock here.
+ if (Phis.count(MBB)) return Phis[MBB];
+
+ unsigned StartIndex = LIs->getMBBStartIdx(MBB);
+ VNInfo *RetVNI = Phis[MBB] = LI->getNextValue(~0U, /*FIXME*/ 0,
+ LIs->getVNInfoAllocator());
+ if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
+
+ // If there are no uses or defs between our starting point and the
+ // beginning of the block, then recursive perform phi construction
+ // on our predecessors.
+ DenseMap<MachineBasicBlock*, VNInfo*> IncomingVNs;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI,
+ Visited, Defs, Uses, NewVNs,
+ LiveOut, Phis, false, false);
+ if (Incoming != 0)
+ IncomingVNs[*PI] = Incoming;
+ }
+
+ if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill) {
+ VNInfo* OldVN = RetVNI;
+ VNInfo* NewVN = IncomingVNs.begin()->second;
+ VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN);
+ if (MergedVN == OldVN) std::swap(OldVN, NewVN);
+
+ for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator LOI = LiveOut.begin(),
+ LOE = LiveOut.end(); LOI != LOE; ++LOI)
+ if (LOI->second == OldVN)
+ LOI->second = MergedVN;
+ for (DenseMap<MachineInstr*, VNInfo*>::iterator NVI = NewVNs.begin(),
+ NVE = NewVNs.end(); NVI != NVE; ++NVI)
+ if (NVI->second == OldVN)
+ NVI->second = MergedVN;
+ for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator PI = Phis.begin(),
+ PE = Phis.end(); PI != PE; ++PI)
+ if (PI->second == OldVN)
+ PI->second = MergedVN;
+ RetVNI = MergedVN;
+ } else {
+ // Otherwise, merge the incoming VNInfos with a phi join. Create a new
+ // VNInfo to represent the joined value.
+ for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
+ IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
+ I->second->hasPHIKill = true;
+ unsigned KillIndex = LIs->getMBBEndIdx(I->first);
+ if (!LiveInterval::isKill(I->second, KillIndex))
+ LI->addKill(I->second, KillIndex);
+ }
+ }
+
+ unsigned EndIndex = 0;
+ if (IsIntraBlock) {
+ EndIndex = LIs->getInstructionIndex(UseI);
+ EndIndex = LiveIntervals::getUseIndex(EndIndex);
+ } else
+ EndIndex = LIs->getMBBEndIdx(MBB);
+ LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI));
+ if (IsIntraBlock)
+ LI->addKill(RetVNI, EndIndex);
+
+ // Memoize results so we don't have to recompute them.
+ if (!IsIntraBlock)
+ LiveOut[MBB] = RetVNI;
+ else {
+ if (!NewVNs.count(UseI))
+ NewVNs[UseI] = RetVNI;
+ Visited.insert(UseI);
+ }
+
+ return RetVNI;
+}
+
+/// ReconstructLiveInterval - Recompute a live interval from scratch.
+void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
+ BumpPtrAllocator& Alloc = LIs->getVNInfoAllocator();
+
+ // Clear the old ranges and valnos;
+ LI->clear();
+
+ // Cache the uses and defs of the register
+ typedef DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> > RegMap;
+ RegMap Defs, Uses;
+
+ // Keep track of the new VNs we're creating.
+ DenseMap<MachineInstr*, VNInfo*> NewVNs;
+ SmallPtrSet<VNInfo*, 2> PhiVNs;
+
+ // Cache defs, and create a new VNInfo for each def.
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ Defs[(*DI).getParent()].insert(&*DI);
+
+ unsigned DefIdx = LIs->getInstructionIndex(&*DI);
+ DefIdx = LiveIntervals::getDefIndex(DefIdx);
+
+ VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
+
+ // If the def is a move, set the copy field.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ if (DstReg == LI->reg)
+ NewVN->copy = &*DI;
+
+ NewVNs[&*DI] = NewVN;
+ }
+
+ // Cache uses as a separate pass from actually processing them.
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
+ UE = MRI->use_end(); UI != UE; ++UI)
+ Uses[(*UI).getParent()].insert(&*UI);
+
+ // Now, actually process every use and use a phi construction algorithm
+ // to walk from it to its reaching definitions, building VNInfos along
+ // the way.
+ DenseMap<MachineBasicBlock*, VNInfo*> LiveOut;
+ DenseMap<MachineBasicBlock*, VNInfo*> Phis;
+ SmallPtrSet<MachineInstr*, 4> Visited;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs,
+ Uses, NewVNs, LiveOut, Phis, true, true);
+ }
+
+ // Add ranges for dead defs
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ unsigned DefIdx = LIs->getInstructionIndex(&*DI);
+ DefIdx = LiveIntervals::getDefIndex(DefIdx);
+
+ if (LI->liveAt(DefIdx)) continue;
+
+ VNInfo* DeadVN = NewVNs[&*DI];
+ LI->addRange(LiveRange(DefIdx, DefIdx+1, DeadVN));
+ LI->addKill(DeadVN, DefIdx);
+ }
+}
+
+/// RenumberValno - Split the given valno out into a new vreg, allowing it to
+/// be allocated to a different register. This function creates a new vreg,
+/// copies the valno and its live ranges over to the new vreg's interval,
+/// removes them from the old interval, and rewrites all uses and defs of
+/// the original reg to the new vreg within those ranges.
+void PreAllocSplitting::RenumberValno(VNInfo* VN) {
+ SmallVector<VNInfo*, 4> Stack;
+ SmallVector<VNInfo*, 4> VNsToCopy;
+ Stack.push_back(VN);
+
+ // Walk through and copy the valno we care about, and any other valnos
+ // that are two-address redefinitions of the one we care about. These
+ // will need to be rewritten as well. We also check for safety of the
+ // renumbering here, by making sure that none of the valno involved has
+ // phi kills.
+ while (!Stack.empty()) {
+ VNInfo* OldVN = Stack.back();
+ Stack.pop_back();
+
+ // Bail out if we ever encounter a valno that has a PHI kill. We can't
+ // renumber these.
+ if (OldVN->hasPHIKill) return;
+
+ VNsToCopy.push_back(OldVN);
+
+ // Locate two-address redefinitions
+ for (SmallVector<unsigned, 4>::iterator KI = OldVN->kills.begin(),
+ KE = OldVN->kills.end(); KI != KE; ++KI) {
+ MachineInstr* MI = LIs->getInstructionFromIndex(*KI);
+ unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg);
+ if (DefIdx == ~0U) continue;
+ if (MI->isRegTiedToUseOperand(DefIdx)) {
+ VNInfo* NextVN =
+ CurrLI->findDefinedVNInfo(LiveIntervals::getDefIndex(*KI));
+ if (NextVN == OldVN) continue;
+ Stack.push_back(NextVN);
+ }
+ }
+ }
+
+ // Create the new vreg
+ unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg));
+
+ // Create the new live interval
+ LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg);
+
+ for (SmallVector<VNInfo*, 4>::iterator OI = VNsToCopy.begin(), OE =
+ VNsToCopy.end(); OI != OE; ++OI) {
+ VNInfo* OldVN = *OI;
+
+ // Copy the valno over
+ VNInfo* NewVN = NewLI.getNextValue(OldVN->def, OldVN->copy,
+ LIs->getVNInfoAllocator());
+ NewLI.copyValNumInfo(NewVN, OldVN);
+ NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN);
+
+ // Remove the valno from the old interval
+ CurrLI->removeValNo(OldVN);
+ }
+
+ // Rewrite defs and uses. This is done in two stages to avoid invalidating
+ // the reg_iterator.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> OpsToChange;
+
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand& MO = I.getOperand();
+ unsigned InstrIdx = LIs->getInstructionIndex(&*I);
+
+ if ((MO.isUse() && NewLI.liveAt(LiveIntervals::getUseIndex(InstrIdx))) ||
+ (MO.isDef() && NewLI.liveAt(LiveIntervals::getDefIndex(InstrIdx))))
+ OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo()));
+ }
+
+ for (SmallVector<std::pair<MachineInstr*, unsigned>, 8>::iterator I =
+ OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) {
+ MachineInstr* Inst = I->first;
+ unsigned OpIdx = I->second;
+ MachineOperand& MO = Inst->getOperand(OpIdx);
+ MO.setReg(NewVReg);
+ }
+
+ // Grow the VirtRegMap, since we've created a new vreg.
+ VRM->grow();
+
+ // The renumbered vreg shares a stack slot with the old register.
+ if (IntervalSSMap.count(CurrLI->reg))
+ IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg];
+
+ NumRenumbers++;
+}
+
+bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo,
+ MachineInstr* DefMI,
+ MachineBasicBlock::iterator RestorePt,
+ unsigned RestoreIdx,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+ MachineBasicBlock& MBB = *RestorePt->getParent();
+
+ MachineBasicBlock::iterator KillPt = BarrierMBB->end();
+ unsigned KillIdx = 0;
+ if (ValNo->def == ~0U || DefMI->getParent() == BarrierMBB)
+ KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx);
+ else
+ KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx);
+
+ if (KillPt == DefMI->getParent()->end())
+ return false;
+
+ TII->reMaterialize(MBB, RestorePt, vreg, DefMI);
+ LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx);
+
+ ReconstructLiveInterval(CurrLI);
+ unsigned RematIdx = LIs->getInstructionIndex(prior(RestorePt));
+ RematIdx = LiveIntervals::getDefIndex(RematIdx);
+ RenumberValno(CurrLI->findDefinedVNInfo(RematIdx));
+
+ ++NumSplits;
+ ++NumRemats;
+ return true;
+}
+
+MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
+ const TargetRegisterClass* RC,
+ MachineInstr* DefMI,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int& SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+ MachineBasicBlock::iterator Pt = MBB->begin();
+
+ // Go top down if RefsInMBB is empty.
+ if (RefsInMBB.empty())
+ return 0;
+
+ MachineBasicBlock::iterator FoldPt = Barrier;
+ while (&*FoldPt != DefMI && FoldPt != MBB->begin() &&
+ !RefsInMBB.count(FoldPt))
+ --FoldPt;
+
+ int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg, false);
+ if (OpIdx == -1)
+ return 0;
+
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(OpIdx);
+
+ if (!TII->canFoldMemoryOperand(FoldPt, Ops))
+ return 0;
+
+ DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(vreg);
+ if (I != IntervalSSMap.end()) {
+ SS = I->second;
+ } else {
+ SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+ }
+
+ MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
+ FoldPt, Ops, SS);
+
+ if (FMI) {
+ LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
+ FMI = MBB->insert(MBB->erase(FoldPt), FMI);
+ ++NumFolds;
+
+ IntervalSSMap[vreg] = SS;
+ CurrSLI = &LSs->getOrCreateInterval(SS, RC);
+ if (CurrSLI->hasAtLeastOneValue())
+ CurrSValNo = CurrSLI->getValNumInfo(0);
+ else
+ CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator());
+ }
+
+ return FMI;
+}
+
+MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg,
+ const TargetRegisterClass* RC,
+ MachineInstr* Barrier,
+ MachineBasicBlock* MBB,
+ int SS,
+ SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+ if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds)
+ return 0;
+
+ // Go top down if RefsInMBB is empty.
+ if (RefsInMBB.empty())
+ return 0;
+
+ // Can't fold a restore between a call stack setup and teardown.
+ MachineBasicBlock::iterator FoldPt = Barrier;
+
+ // Advance from barrier to call frame teardown.
+ while (FoldPt != MBB->getFirstTerminator() &&
+ FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
+ if (RefsInMBB.count(FoldPt))
+ return 0;
+
+ ++FoldPt;
+ }
+
+ if (FoldPt == MBB->getFirstTerminator())
+ return 0;
+ else
+ ++FoldPt;
+
+ // Now find the restore point.
+ while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) {
+ if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) {
+ while (FoldPt != MBB->getFirstTerminator() &&
+ FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
+ if (RefsInMBB.count(FoldPt))
+ return 0;
+
+ ++FoldPt;
+ }
+
+ if (FoldPt == MBB->getFirstTerminator())
+ return 0;
+ }
+
+ ++FoldPt;
+ }
+
+ if (FoldPt == MBB->getFirstTerminator())
+ return 0;
+
+ int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true);
+ if (OpIdx == -1)
+ return 0;
+
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(OpIdx);
+
+ if (!TII->canFoldMemoryOperand(FoldPt, Ops))
+ return 0;
+
+ MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
+ FoldPt, Ops, SS);
+
+ if (FMI) {
+ LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
+ FMI = MBB->insert(MBB->erase(FoldPt), FMI);
+ ++NumRestoreFolds;
+ }
+
+ return FMI;
+}
+
+/// SplitRegLiveInterval - Split (spill and restore) the given live interval
+/// so it would not cross the barrier that's being processed. Shrink wrap
+/// (minimize) the live interval to the last uses.
+bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
+ CurrLI = LI;
+
+ // Find live range where current interval cross the barrier.
+ LiveInterval::iterator LR =
+ CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx));
+ VNInfo *ValNo = LR->valno;
+
+ if (ValNo->def == ~1U) {
+ // Defined by a dead def? How can this be?
+ assert(0 && "Val# is defined by a dead def?");
+ abort();
+ }
+
+ MachineInstr *DefMI = (ValNo->def != ~0U)
+ ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
+
+ // If this would create a new join point, do not split.
+ if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent()))
+ return false;
+
+ // Find all references in the barrier mbb.
+ SmallPtrSet<MachineInstr*, 4> RefsInMBB;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineInstr *RefMI = &*I;
+ if (RefMI->getParent() == BarrierMBB)
+ RefsInMBB.insert(RefMI);
+ }
+
+ // Find a point to restore the value after the barrier.
+ unsigned RestoreIndex = 0;
+ MachineBasicBlock::iterator RestorePt =
+ findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex);
+ if (RestorePt == BarrierMBB->end())
+ return false;
+
+ if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
+ if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt,
+ RestoreIndex, RefsInMBB))
+ return true;
+
+ // Add a spill either before the barrier or after the definition.
+ MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL;
+ const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg);
+ unsigned SpillIndex = 0;
+ MachineInstr *SpillMI = NULL;
+ int SS = -1;
+ if (ValNo->def == ~0U) {
+ // If it's defined by a phi, we must split just before the barrier.
+ if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
+ BarrierMBB, SS, RefsInMBB))) {
+ SpillIndex = LIs->getInstructionIndex(SpillMI);
+ } else {
+ MachineBasicBlock::iterator SpillPt =
+ findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, SpillIndex);
+ if (SpillPt == BarrierMBB->begin())
+ return false; // No gap to insert spill.
+ // Add spill.
+
+ SS = CreateSpillStackSlot(CurrLI->reg, RC);
+ TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC);
+ SpillMI = prior(SpillPt);
+ LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
+ }
+ } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def,
+ RestoreIndex, SpillIndex, SS)) {
+ // If it's already split, just restore the value. There is no need to spill
+ // the def again.
+ if (!DefMI)
+ return false; // Def is dead. Do nothing.
+
+ if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier,
+ BarrierMBB, SS, RefsInMBB))) {
+ SpillIndex = LIs->getInstructionIndex(SpillMI);
+ } else {
+ // Check if it's possible to insert a spill after the def MI.
+ MachineBasicBlock::iterator SpillPt;
+ if (DefMBB == BarrierMBB) {
+ // Add spill after the def and the last use before the barrier.
+ SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
+ RefsInMBB, SpillIndex);
+ if (SpillPt == DefMBB->begin())
+ return false; // No gap to insert spill.
+ } else {
+ SpillPt = findNextEmptySlot(DefMBB, DefMI, SpillIndex);
+ if (SpillPt == DefMBB->end())
+ return false; // No gap to insert spill.
+ }
+ // Add spill. The store instruction kills the register if def is before
+ // the barrier in the barrier block.
+ SS = CreateSpillStackSlot(CurrLI->reg, RC);
+ TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg,
+ DefMBB == BarrierMBB, SS, RC);
+ SpillMI = prior(SpillPt);
+ LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
+ }
+ }
+
+ // Remember def instruction index to spill index mapping.
+ if (DefMI && SpillMI)
+ Def2SpillMap[ValNo->def] = SpillIndex;
+
+ // Add restore.
+ bool FoldedRestore = false;
+ if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier,
+ BarrierMBB, SS, RefsInMBB)) {
+ RestorePt = LMI;
+ RestoreIndex = LIs->getInstructionIndex(RestorePt);
+ FoldedRestore = true;
+ } else {
+ TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC);
+ MachineInstr *LoadMI = prior(RestorePt);
+ LIs->InsertMachineInstrInMaps(LoadMI, RestoreIndex);
+ }
+
+ // Update spill stack slot live interval.
+ UpdateSpillSlotInterval(ValNo, LIs->getUseIndex(SpillIndex)+1,
+ LIs->getDefIndex(RestoreIndex));
+
+ ReconstructLiveInterval(CurrLI);
+
+ if (!FoldedRestore) {
+ unsigned RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
+ RestoreIdx = LiveIntervals::getDefIndex(RestoreIdx);
+ RenumberValno(CurrLI->findDefinedVNInfo(RestoreIdx));
+ }
+
+ ++NumSplits;
+ return true;
+}
+
+/// SplitRegLiveIntervals - Split all register live intervals that cross the
+/// barrier that's being processed.
+bool
+PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs,
+ SmallPtrSet<LiveInterval*, 8>& Split) {
+ // First find all the virtual registers whose live intervals are intercepted
+ // by the current barrier.
+ SmallVector<LiveInterval*, 8> Intervals;
+ for (const TargetRegisterClass **RC = RCs; *RC; ++RC) {
+ // FIXME: If it's not safe to move any instruction that defines the barrier
+ // register class, then it means there are some special dependencies which
+ // codegen is not modelling. Ignore these barriers for now.
+ if (!TII->isSafeToMoveRegClassDefs(*RC))
+ continue;
+ std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC);
+ for (unsigned i = 0, e = VRs.size(); i != e; ++i) {
+ unsigned Reg = VRs[i];
+ if (!LIs->hasInterval(Reg))
+ continue;
+ LiveInterval *LI = &LIs->getInterval(Reg);
+ if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg))
+ // Virtual register live interval is intercepted by the barrier. We
+ // should split and shrink wrap its interval if possible.
+ Intervals.push_back(LI);
+ }
+ }
+
+ // Process the affected live intervals.
+ bool Change = false;
+ while (!Intervals.empty()) {
+ if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit)
+ break;
+ else if (NumSplits == 4)
+ Change |= Change;
+ LiveInterval *LI = Intervals.back();
+ Intervals.pop_back();
+ bool result = SplitRegLiveInterval(LI);
+ if (result) Split.insert(LI);
+ Change |= result;
+ }
+
+ return Change;
+}
+
+unsigned PreAllocSplitting::getNumberOfNonSpills(
+ SmallPtrSet<MachineInstr*, 4>& MIs,
+ unsigned Reg, int FrameIndex,
+ bool& FeedsTwoAddr) {
+ unsigned NonSpills = 0;
+ for (SmallPtrSet<MachineInstr*, 4>::iterator UI = MIs.begin(), UE = MIs.end();
+ UI != UE; ++UI) {
+ int StoreFrameIndex;
+ unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+ if (StoreVReg != Reg || StoreFrameIndex != FrameIndex)
+ NonSpills++;
+
+ int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg);
+ if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx))
+ FeedsTwoAddr = true;
+ }
+
+ return NonSpills;
+}
+
+/// removeDeadSpills - After doing splitting, filter through all intervals we've
+/// split, and see if any of the spills are unnecessary. If so, remove them.
+bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
+ bool changed = false;
+
+ // Walk over all of the live intervals that were touched by the splitter,
+ // and see if we can do any DCE and/or folding.
+ for (SmallPtrSet<LiveInterval*, 8>::iterator LI = split.begin(),
+ LE = split.end(); LI != LE; ++LI) {
+ DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> > VNUseCount;
+
+ // First, collect all the uses of the vreg, and sort them by their
+ // reaching definition (VNInfo).
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ unsigned index = LIs->getInstructionIndex(&*UI);
+ index = LiveIntervals::getUseIndex(index);
+
+ const LiveRange* LR = (*LI)->getLiveRangeContaining(index);
+ VNUseCount[LR->valno].insert(&*UI);
+ }
+
+ // Now, take the definitions (VNInfo's) one at a time and try to DCE
+ // and/or fold them away.
+ for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(),
+ VE = (*LI)->vni_end(); VI != VE; ++VI) {
+
+ if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit)
+ return changed;
+
+ VNInfo* CurrVN = *VI;
+
+ // We don't currently try to handle definitions with PHI kills, because
+ // it would involve processing more than one VNInfo at once.
+ if (CurrVN->hasPHIKill) continue;
+
+ // We also don't try to handle the results of PHI joins, since there's
+ // no defining instruction to analyze.
+ unsigned DefIdx = CurrVN->def;
+ if (DefIdx == ~0U || DefIdx == ~1U) continue;
+
+ // We're only interested in eliminating cruft introduced by the splitter,
+ // is of the form load-use or load-use-store. First, check that the
+ // definition is a load, and remember what stack slot we loaded it from.
+ MachineInstr* DefMI = LIs->getInstructionFromIndex(DefIdx);
+ int FrameIndex;
+ if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
+
+ // If the definition has no uses at all, just DCE it.
+ if (VNUseCount[CurrVN].size() == 0) {
+ LIs->RemoveMachineInstrFromMaps(DefMI);
+ (*LI)->removeValNo(CurrVN);
+ DefMI->eraseFromParent();
+ VNUseCount.erase(CurrVN);
+ NumDeadSpills++;
+ changed = true;
+ continue;
+ }
+
+ // Second, get the number of non-store uses of the definition, as well as
+ // a flag indicating whether it feeds into a later two-address definition.
+ bool FeedsTwoAddr = false;
+ unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN],
+ (*LI)->reg, FrameIndex,
+ FeedsTwoAddr);
+
+ // If there's one non-store use and it doesn't feed a two-addr, then
+ // this is a load-use-store case that we can try to fold.
+ if (NonSpillCount == 1 && !FeedsTwoAddr) {
+ // Start by finding the non-store use MachineInstr.
+ SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin();
+ int StoreFrameIndex;
+ unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+ while (UI != VNUseCount[CurrVN].end() &&
+ (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) {
+ ++UI;
+ if (UI != VNUseCount[CurrVN].end())
+ StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+ }
+ if (UI == VNUseCount[CurrVN].end()) continue;
+
+ MachineInstr* use = *UI;
+
+ // Attempt to fold it away!
+ int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false);
+ if (OpIdx == -1) continue;
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(OpIdx);
+ if (!TII->canFoldMemoryOperand(use, Ops)) continue;
+
+ MachineInstr* NewMI =
+ TII->foldMemoryOperand(*use->getParent()->getParent(),
+ use, Ops, FrameIndex);
+
+ if (!NewMI) continue;
+
+ // Update relevant analyses.
+ LIs->RemoveMachineInstrFromMaps(DefMI);
+ LIs->ReplaceMachineInstrInMaps(use, NewMI);
+ (*LI)->removeValNo(CurrVN);
+
+ DefMI->eraseFromParent();
+ MachineBasicBlock* MBB = use->getParent();
+ NewMI = MBB->insert(MBB->erase(use), NewMI);
+ VNUseCount[CurrVN].erase(use);
+
+ // Remove deleted instructions. Note that we need to remove them from
+ // the VNInfo->use map as well, just to be safe.
+ for (SmallPtrSet<MachineInstr*, 4>::iterator II =
+ VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end();
+ II != IE; ++II) {
+ for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
+ VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE;
+ ++VNI)
+ if (VNI->first != CurrVN)
+ VNI->second.erase(*II);
+ LIs->RemoveMachineInstrFromMaps(*II);
+ (*II)->eraseFromParent();
+ }
+
+ VNUseCount.erase(CurrVN);
+
+ for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
+ VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI)
+ if (VI->second.erase(use))
+ VI->second.insert(NewMI);
+
+ NumDeadSpills++;
+ changed = true;
+ continue;
+ }
+
+ // If there's more than one non-store instruction, we can't profitably
+ // fold it, so bail.
+ if (NonSpillCount) continue;
+
+ // Otherwise, this is a load-store case, so DCE them.
+ for (SmallPtrSet<MachineInstr*, 4>::iterator UI =
+ VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end();
+ UI != UI; ++UI) {
+ LIs->RemoveMachineInstrFromMaps(*UI);
+ (*UI)->eraseFromParent();
+ }
+
+ VNUseCount.erase(CurrVN);
+
+ LIs->RemoveMachineInstrFromMaps(DefMI);
+ (*LI)->removeValNo(CurrVN);
+ DefMI->eraseFromParent();
+ NumDeadSpills++;
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
+ MachineBasicBlock* DefMBB,
+ MachineBasicBlock* BarrierMBB) {
+ if (DefMBB == BarrierMBB)
+ return false;
+
+ if (LR->valno->hasPHIKill)
+ return false;
+
+ unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
+ if (LR->end < MBBEnd)
+ return false;
+
+ MachineLoopInfo& MLI = getAnalysis<MachineLoopInfo>();
+ if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB))
+ return true;
+
+ MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+ SmallPtrSet<MachineBasicBlock*, 4> Visited;
+ typedef std::pair<MachineBasicBlock*,
+ MachineBasicBlock::succ_iterator> ItPair;
+ SmallVector<ItPair, 4> Stack;
+ Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin()));
+
+ while (!Stack.empty()) {
+ ItPair P = Stack.back();
+ Stack.pop_back();
+
+ MachineBasicBlock* PredMBB = P.first;
+ MachineBasicBlock::succ_iterator S = P.second;
+
+ if (S == PredMBB->succ_end())
+ continue;
+ else if (Visited.count(*S)) {
+ Stack.push_back(std::make_pair(PredMBB, ++S));
+ continue;
+ } else
+ Stack.push_back(std::make_pair(PredMBB, S+1));
+
+ MachineBasicBlock* MBB = *S;
+ Visited.insert(MBB);
+
+ if (MBB == BarrierMBB)
+ return true;
+
+ MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB);
+ MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB);
+ MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom();
+ while (MDTN) {
+ if (MDTN == DefMDTN)
+ return true;
+ else if (MDTN == BarrierMDTN)
+ break;
+ MDTN = MDTN->getIDom();
+ }
+
+ MBBEnd = LIs->getMBBEndIdx(MBB);
+ if (LR->end > MBBEnd)
+ Stack.push_back(std::make_pair(MBB, MBB->succ_begin()));
+ }
+
+ return false;
+}
+
+
+bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) {
+ CurrMF = &MF;
+ TM = &MF.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ MFI = MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ LIs = &getAnalysis<LiveIntervals>();
+ LSs = &getAnalysis<LiveStacks>();
+ VRM = &getAnalysis<VirtRegMap>();
+
+ bool MadeChange = false;
+
+ // Make sure blocks are numbered in order.
+ MF.RenumberBlocks();
+
+ MachineBasicBlock *Entry = MF.begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+ SmallPtrSet<LiveInterval*, 8> Split;
+
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ BarrierMBB = *DFI;
+ for (MachineBasicBlock::iterator I = BarrierMBB->begin(),
+ E = BarrierMBB->end(); I != E; ++I) {
+ Barrier = &*I;
+ const TargetRegisterClass **BarrierRCs =
+ Barrier->getDesc().getRegClassBarriers();
+ if (!BarrierRCs)
+ continue;
+ BarrierIdx = LIs->getInstructionIndex(Barrier);
+ MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split);
+ }
+ }
+
+ MadeChange |= removeDeadSpills(Split);
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 0000000..9e7ad67
--- /dev/null
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,679 @@
+//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass provides an optional shrink wrapping variant of prolog/epilog
+// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PrologEpilogInserter.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/STLExtras.h"
+#include <climits>
+
+using namespace llvm;
+
+char PEI::ID = 0;
+
+static RegisterPass<PEI>
+X("prologepilog", "Prologue/Epilogue Insertion");
+
+/// createPrologEpilogCodeInserter - This function returns a pass that inserts
+/// prolog and epilog code, and eliminates abstract frame references.
+///
+FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+
+ // Get MachineModuleInfo so that we can track the construction of the
+ // frame.
+ if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>())
+ Fn.getFrameInfo()->setMachineModuleInfo(MMI);
+
+ // Allow the target machine to make some adjustments to the function
+ // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
+ TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+
+ // Scan the function for modified callee saved registers and insert spill
+ // code for any callee saved registers that are modified. Also calculate
+ // the MaxCallFrameSize and HasCalls variables for the function's frame
+ // information and eliminates call frame pseudo instructions.
+ calculateCalleeSavedRegisters(Fn);
+
+ // Determine placement of CSR spill/restore code:
+ // - with shrink wrapping, place spills and restores to tightly
+ // enclose regions in the Machine CFG of the function where
+ // they are used. Without shrink wrapping
+ // - default (no shrink wrapping), place all spills in the
+ // entry block, all restores in return blocks.
+ placeCSRSpillsAndRestores(Fn);
+
+ // Add the code to save and restore the callee saved registers
+ insertCSRSpillsAndRestores(Fn);
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TRI->processFunctionBeforeFrameFinalized(Fn);
+
+ // Calculate actual frame offsets for all abstract stack objects...
+ calculateFrameObjectOffsets(Fn);
+
+ // Add prolog and epilog code to the function. This function is required
+ // to align the stack frame as necessary for any stack variables or
+ // called functions. Because of this, calculateCalleeSavedRegisters
+ // must be called before this function in order to set the HasCalls
+ // and MaxCallFrameSize variables.
+ insertPrologEpilogCode(Fn);
+
+ // Replace all MO_FrameIndex operands with physical register references
+ // and actual offsets.
+ //
+ replaceFrameIndices(Fn);
+
+ delete RS;
+ clearAllSets();
+ return true;
+}
+
+#if 0
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ if (ShrinkWrapping || ShrinkWrapFunc != "") {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ }
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+#endif
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for
+/// the function's frame information and eliminates call frame pseudo
+/// instructions.
+///
+void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+
+ // Get the callee saved register list...
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+
+ // Get the function call frame set-up and tear-down instruction opcode
+ int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
+
+ // These are used to keep track the callee-save area. Initialize them.
+ MinCSFrameIndex = INT_MAX;
+ MaxCSFrameIndex = 0;
+
+ // Early exit for targets which have no callee saved registers and no call
+ // frame setup/destroy pseudo instructions.
+ if ((CSRegs == 0 || CSRegs[0] == 0) &&
+ FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+ return;
+
+ unsigned MaxCallFrameSize = 0;
+ bool HasCalls = false;
+
+ std::vector<MachineBasicBlock::iterator> FrameSDOps;
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+ " instructions should have a single immediate argument!");
+ unsigned Size = I->getOperand(0).getImm();
+ if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+ HasCalls = true;
+ FrameSDOps.push_back(I);
+ }
+
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+ FFI->setHasCalls(HasCalls);
+ FFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+ for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) {
+ MachineBasicBlock::iterator I = FrameSDOps[i];
+ // If call frames are not being included as part of the stack frame,
+ // and there is no dynamic allocation (therefore referencing frame slots
+ // off sp), leave the pseudo ops alone. We'll eliminate them later.
+ if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
+ RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ }
+
+ // Now figure out which *callee saved* registers are modified by the current
+ // function, thus needing to be saved and restored in the prolog/epilog.
+ //
+ const TargetRegisterClass* const *CSRegClasses =
+ RegInfo->getCalleeSavedRegClasses(&Fn);
+ std::vector<CalleeSavedInfo> CSI;
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
+ // If the reg is modified, save it!
+ CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ } else {
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
+ *AliasSet; ++AliasSet) { // Check alias registers too.
+ if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
+ CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ break;
+ }
+ }
+ }
+ }
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const std::pair<unsigned,int> *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = CSI[i].getRegClass();
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
+ FixedSlot->first != Reg)
+ ++FixedSlot;
+
+ int FrameIdx;
+ if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+ // We may not be able to sastify the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller.
+ // Use the min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second);
+ }
+ CSI[i].setFrameIdx(FrameIdx);
+ }
+
+ FFI->setCalleeSavedInfo(CSI);
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function, handling shrink wrapping.
+///
+void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+ // Get callee saved register information.
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty())
+ return;
+
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ MachineBasicBlock::iterator I;
+
+ if (! ShrinkWrapThisFunction) {
+ // Spill using target interface.
+ I = EntryBlock->begin();
+ if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ EntryBlock->addLiveIn(CSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true,
+ CSI[i].getFrameIdx(), CSI[i].getRegClass());
+ }
+ }
+
+ // Restore using target interface.
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
+ MachineBasicBlock* MBB = ReturnBlocks[ri];
+ I = MBB->end(); --I;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ I = I2;
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that preceed it.
+ if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
+ CSI[i].getFrameIdx(),
+ CSI[i].getRegClass());
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+ }
+ return;
+ }
+
+ // Insert spills.
+ std::vector<CalleeSavedInfo> blockCSI;
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet save = BI->second;
+
+ if (save.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = save.begin(),
+ RE = save.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not collect callee saved register info");
+
+ I = MBB->begin();
+
+ // When shrink wrapping, use stack slot stores/loads.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ MBB->addLiveIn(blockCSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
+ true,
+ blockCSI[i].getFrameIdx(),
+ blockCSI[i].getRegClass());
+ }
+ }
+
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restore = BI->second;
+
+ if (restore.empty())
+ continue;
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = restore.begin(),
+ RE = restore.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not find callee saved register info");
+
+ // If MBB is empty and needs restores, insert at the _beginning_.
+ if (MBB->empty()) {
+ I = MBB->begin();
+ } else {
+ I = MBB->end();
+ --I;
+
+ // Skip over all terminator instructions, which are part of the
+ // return sequence.
+ if (! I->getDesc().isTerminator()) {
+ ++I;
+ } else {
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ I = I2;
+ }
+ }
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that preceed it.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
+ blockCSI[i].getFrameIdx(),
+ blockCSI[i].getRegClass());
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ unsigned &MaxAlign) {
+ // If stack grows down, we need to add size of find the lowest address of the
+ // object.
+ if (StackGrowsDown)
+ Offset += FFI->getObjectSize(FrameIdx);
+
+ unsigned Align = FFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ if (StackGrowsDown) {
+ FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ } else {
+ FFI->setObjectOffset(FrameIdx, Offset);
+ Offset += FFI->getObjectSize(FrameIdx);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+
+ unsigned MaxAlign = FFI->getMaxAlignment();
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always nonnegative.
+ int64_t Offset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ Offset = -Offset;
+ assert(Offset >= 0
+ && "Local area offset should be in direction of stack growth");
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // We currently don't support filling in holes in between fixed sized
+ // objects, so we adjust 'Offset' to point to the end of last fixed sized
+ // preallocated object.
+ for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -FFI->getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ // First assign frame offsets to stack objects that are used to spill
+ // callee saved registers.
+ if (StackGrowsDown) {
+ for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ // If stack grows down, we need to add size of find the lowest
+ // address of the object.
+ Offset += FFI->getObjectSize(i);
+
+ unsigned Align = FFI->getObjectAlignment(i);
+ // If the alignment of this object is greater than that of the stack,
+ // then increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ FFI->setObjectOffset(i, -Offset); // Set the computed offset
+ }
+ } else {
+ int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
+ for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+ unsigned Align = FFI->getObjectAlignment(i);
+ // If the alignment of this object is greater than that of the stack,
+ // then increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ FFI->setObjectOffset(i, Offset);
+ Offset += FFI->getObjectSize(i);
+ }
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // frame pointer if a frame pointer is required.
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ if (RS && RegInfo->hasFP(Fn)) {
+ int SFI = RS->getScavengingFrameIndex();
+ if (SFI >= 0)
+ AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ if (FFI->getStackProtectorIndex() >= 0)
+ AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown,
+ Offset, MaxAlign);
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && (int)i == RS->getScavengingFrameIndex())
+ continue;
+ if (FFI->isDeadObjectIndex(i))
+ continue;
+ if (FFI->getStackProtectorIndex() == (int)i)
+ continue;
+
+ AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // stack pointer.
+ if (RS && !RegInfo->hasFP(Fn)) {
+ int SFI = RS->getScavengingFrameIndex();
+ if (SFI >= 0)
+ AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // Round up the size to a multiple of the alignment, but only if there are
+ // calls or alloca's in the function. This ensures that any calls to
+ // subroutines have their stack frames suitable aligned.
+ // Also do this if we need runtime alignment of the stack. In this case
+ // offsets will be relative to SP not FP; round up the stack size so this
+ // works.
+ if (!RegInfo->targetHandlesStackFrameRounding() &&
+ (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(Fn) &&
+ FFI->getObjectIndexEnd() != 0))) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (RegInfo->hasReservedCallFrame(Fn))
+ Offset += FFI->getMaxCallFrameSize();
+
+ unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea());
+
+ // Remember the required stack alignment in case targets need it to perform
+ // dynamic stack alignment.
+ FFI->setMaxAlignment(MaxAlign);
+}
+
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+
+ // Add prologue to the function...
+ TRI->emitPrologue(Fn);
+
+ // Add epilogue to restore the callee-save registers in each exiting block
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().getDesc().isReturn())
+ TRI->emitEpilogue(Fn, *I);
+ }
+}
+
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void PEI::replaceFrameIndices(MachineFunction &Fn) {
+ if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
+
+ const TargetMachine &TM = Fn.getTarget();
+ assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetFrameInfo *TFI = TM.getFrameInfo();
+ bool StackGrowsDown =
+ TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ int FrameSetupOpcode = TRI.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
+
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+ int SPAdj = 0; // SP offset due to call frame setup / destroy.
+ if (RS) RS->enterBasicBlock(BB);
+
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ if (I->getOpcode() == TargetInstrInfo::DECLARE) {
+ // Ignore it.
+ ++I;
+ continue;
+ }
+
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ // Remember how much SP has been adjusted to create the call
+ // frame.
+ int Size = I->getOperand(0).getImm();
+
+ if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
+ (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
+ Size = -Size;
+
+ SPAdj += Size;
+
+ MachineBasicBlock::iterator PrevI = BB->end();
+ if (I != BB->begin()) PrevI = prior(I);
+ TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
+
+ // Visit the instructions created by eliminateCallFramePseudoInstr().
+ if (PrevI == BB->end())
+ I = BB->begin(); // The replaced instr was the first in the block.
+ else
+ I = next(PrevI);
+ continue;
+ }
+
+ MachineInstr *MI = I;
+ bool DoIncr = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).isFI()) {
+ // Some instructions (e.g. inline asm instructions) can have
+ // multiple frame indices and/or cause eliminateFrameIndex
+ // to insert more than one instruction. We need the register
+ // scavenger to go through all of these instructions so that
+ // it can update its register information. We keep the
+ // iterator at the point before insertion so that we can
+ // revisit them in full.
+ bool AtBeginning = (I == BB->begin());
+ if (!AtBeginning) --I;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+
+ TRI.eliminateFrameIndex(MI, SPAdj, RS);
+
+ // Reset the iterator if we were at the beginning of the BB.
+ if (AtBeginning) {
+ I = BB->begin();
+ DoIncr = false;
+ }
+
+ MI = 0;
+ break;
+ }
+
+ if (DoIncr && I != BB->end()) ++I;
+
+ // Update register states.
+ if (RS && MI) RS->forward(MI);
+ }
+
+ assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
+ }
+}
+
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
new file mode 100644
index 0000000..c158dd8
--- /dev/null
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -0,0 +1,167 @@
+//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass also implements a shrink wrapping variant of prolog/epilog
+// insertion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PEI_H
+#define LLVM_CODEGEN_PEI_H
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+ class RegScavenger;
+ class MachineBasicBlock;
+
+ class PEI : public MachineFunctionPass {
+ public:
+ static char ID;
+ PEI() : MachineFunctionPass(&ID) {}
+
+ const char *getPassName() const {
+ return "Prolog/Epilog Insertion & Frame Finalization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ private:
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+ // Analysis info for spill/restore placement.
+ // "CSR": "callee saved register".
+
+ // CSRegSet contains indices into the Callee Saved Register Info
+ // vector built by calculateCalleeSavedRegisters() and accessed
+ // via MF.getFrameInfo()->getCalleeSavedInfo().
+ typedef SparseBitVector<> CSRegSet;
+
+ // CSRegBlockMap maps MachineBasicBlocks to sets of callee
+ // saved register indices.
+ typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
+
+ // Set and maps for computing CSR spill/restore placement:
+ // used in function (UsedCSRegs)
+ // used in a basic block (CSRUsed)
+ // anticipatable in a basic block (Antic{In,Out})
+ // available in a basic block (Avail{In,Out})
+ // to be spilled at the entry to a basic block (CSRSave)
+ // to be restored at the end of a basic block (CSRRestore)
+ CSRegSet UsedCSRegs;
+ CSRegBlockMap CSRUsed;
+ CSRegBlockMap AnticIn, AnticOut;
+ CSRegBlockMap AvailIn, AvailOut;
+ CSRegBlockMap CSRSave;
+ CSRegBlockMap CSRRestore;
+
+ // Entry and return blocks of the current function.
+ MachineBasicBlock* EntryBlock;
+ SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
+
+ // Map of MBBs to top level MachineLoops.
+ DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
+
+ // Flag to control shrink wrapping per-function:
+ // may choose to skip shrink wrapping for certain
+ // functions.
+ bool ShrinkWrapThisFunction;
+
+#ifndef NDEBUG
+ // Machine function handle.
+ MachineFunction* MF;
+
+ // Flag indicating that the current function
+ // has at least one "short" path in the machine
+ // CFG from the entry block to an exit block.
+ bool HasFastExitPath;
+#endif
+
+ bool calculateSets(MachineFunction &Fn);
+ bool calcAnticInOut(MachineBasicBlock* MBB);
+ bool calcAvailInOut(MachineBasicBlock* MBB);
+ void calculateAnticAvail(MachineFunction &Fn);
+ bool addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks);
+ bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
+ bool calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills);
+ bool calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores);
+ void placeSpillsAndRestores(MachineFunction &Fn);
+ void placeCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateCalleeSavedRegisters(MachineFunction &Fn);
+ void insertCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+
+ // Initialize DFA sets, called before iterations.
+ void clearAnticAvailSets();
+ // Clear all sets constructed by shrink wrapping.
+ void clearAllSets();
+
+ // Initialize all shrink wrapping data.
+ void initShrinkWrappingInfo();
+
+ // Convienences for dealing with machine loops.
+ MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP);
+ MachineLoop* getTopLevelLoopParent(MachineLoop *LP);
+
+ // Propgate CSRs used in MBB to all MBBs of loop LP.
+ void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
+
+ // Convenience for recognizing return blocks.
+ bool isReturnBlock(MachineBasicBlock* MBB);
+
+#ifndef NDEBUG
+ // Debugging methods.
+
+ // Mark this function as having fast exit paths.
+ void findFastExitPath();
+
+ // Verify placement of spills/restores.
+ void verifySpillRestorePlacement();
+
+ std::string getBasicBlockName(const MachineBasicBlock* MBB);
+ std::string stringifyCSRegSet(const CSRegSet& s);
+ void dumpSet(const CSRegSet& s);
+ void dumpUsed(MachineBasicBlock* MBB);
+ void dumpAllUsed();
+ void dumpSets(MachineBasicBlock* MBB);
+ void dumpSets1(MachineBasicBlock* MBB);
+ void dumpAllSets();
+ void dumpSRSets();
+#endif
+
+ };
+} // End llvm namespace
+#endif
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
new file mode 100644
index 0000000..b4c20e6
--- /dev/null
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -0,0 +1,92 @@
+//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PseudoSourceValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+using namespace llvm;
+
+static ManagedStatic<PseudoSourceValue[4]> PSVs;
+
+const PseudoSourceValue *PseudoSourceValue::getStack()
+{ return &(*PSVs)[0]; }
+const PseudoSourceValue *PseudoSourceValue::getGOT()
+{ return &(*PSVs)[1]; }
+const PseudoSourceValue *PseudoSourceValue::getJumpTable()
+{ return &(*PSVs)[2]; }
+const PseudoSourceValue *PseudoSourceValue::getConstantPool()
+{ return &(*PSVs)[3]; }
+
+static const char *const PSVNames[] = {
+ "Stack",
+ "GOT",
+ "JumpTable",
+ "ConstantPool"
+};
+
+PseudoSourceValue::PseudoSourceValue() :
+ Value(PointerType::getUnqual(Type::Int8Ty), PseudoSourceValueVal) {}
+
+void PseudoSourceValue::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+void PseudoSourceValue::print(raw_ostream &OS) const {
+ OS << PSVNames[this - *PSVs];
+}
+
+namespace {
+ /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue
+ /// for holding FixedStack values, which must include a frame
+ /// index.
+ class VISIBILITY_HIDDEN FixedStackPseudoSourceValue
+ : public PseudoSourceValue {
+ const int FI;
+ public:
+ explicit FixedStackPseudoSourceValue(int fi) : FI(fi) {}
+
+ virtual bool isConstant(const MachineFrameInfo *MFI) const;
+
+ virtual void print(raw_ostream &OS) const {
+ OS << "FixedStack" << FI;
+ }
+ };
+}
+
+static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues;
+
+const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
+ const PseudoSourceValue *&V = (*FSValues)[FI];
+ if (!V)
+ V = new FixedStackPseudoSourceValue(FI);
+ return V;
+}
+
+bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ if (this == getStack())
+ return false;
+ if (this == getGOT() ||
+ this == getConstantPool() ||
+ this == getJumpTable())
+ return true;
+ assert(0 && "Unknown PseudoSourceValue!");
+ return false;
+}
+
+bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
+ return MFI && MFI->isImmutableObjectIndex(FI);
+}
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
new file mode 100644
index 0000000..64374ce
--- /dev/null
+++ b/lib/CodeGen/README.txt
@@ -0,0 +1,208 @@
+//===---------------------------------------------------------------------===//
+
+Common register allocation / spilling problem:
+
+ mul lr, r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ ldr r4, [sp, #+52]
+ mla r4, r3, lr, r4
+
+can be:
+
+ mul lr, r4, lr
+ mov r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ mla r4, r3, lr, r4
+
+and then "merge" mul and mov:
+
+ mul r4, r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ mla r4, r3, lr, r4
+
+It also increase the likelyhood the store may become dead.
+
+//===---------------------------------------------------------------------===//
+
+I think we should have a "hasSideEffects" flag (which is automatically set for
+stuff that "isLoad" "isCall" etc), and the remat pass should eventually be able
+to remat any instruction that has no side effects, if it can handle it and if
+profitable.
+
+For now, I'd suggest having the remat stuff work like this:
+
+1. I need to spill/reload this thing.
+2. Check to see if it has side effects.
+3. Check to see if it is simple enough: e.g. it only has one register
+destination and no register input.
+4. If so, clone the instruction, do the xform, etc.
+
+Advantages of this are:
+
+1. the .td file describes the behavior of the instructions, not the way the
+ algorithm should work.
+2. as remat gets smarter in the future, we shouldn't have to be changing the .td
+ files.
+3. it is easier to explain what the flag means in the .td file, because you
+ don't have to pull in the explanation of how the current remat algo works.
+
+Some potential added complexities:
+
+1. Some instructions have to be glued to it's predecessor or successor. All of
+ the PC relative instructions and condition code setting instruction. We could
+ mark them as hasSideEffects, but that's not quite right. PC relative loads
+ from constantpools can be remat'ed, for example. But it requires more than
+ just cloning the instruction. Some instructions can be remat'ed but it
+ expands to more than one instruction. But allocator will have to make a
+ decision.
+
+4. As stated in 3, not as simple as cloning in some cases. The target will have
+ to decide how to remat it. For example, an ARM 2-piece constant generation
+ instruction is remat'ed as a load from constantpool.
+
+//===---------------------------------------------------------------------===//
+
+bb27 ...
+ ...
+ %reg1037 = ADDri %reg1039, 1
+ %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10
+ Successors according to CFG: 0x8b03bf0 (#5)
+
+bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5):
+ Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4)
+ %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0>
+
+Note ADDri is not a two-address instruction. However, its result %reg1037 is an
+operand of the PHI node in bb76 and its operand %reg1039 is the result of the
+PHI node. We should treat it as a two-address code and make sure the ADDri is
+scheduled after any node that reads %reg1039.
+
+//===---------------------------------------------------------------------===//
+
+Use local info (i.e. register scavenger) to assign it a free register to allow
+reuse:
+ ldr r3, [sp, #+4]
+ add r3, r3, #3
+ ldr r2, [sp, #+8]
+ add r2, r2, #2
+ ldr r1, [sp, #+4] <==
+ add r1, r1, #1
+ ldr r0, [sp, #+4]
+ add r0, r0, #2
+
+//===---------------------------------------------------------------------===//
+
+LLVM aggressively lift CSE out of loop. Sometimes this can be negative side-
+effects:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+load [i + R1]
+...
+load [i + R2]
+...
+load [i + R3]
+
+Suppose there is high register pressure, R1, R2, R3, can be spilled. We need
+to implement proper re-materialization to handle this:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+R1 = X + 4 @ re-materialized
+load [i + R1]
+...
+R2 = X + 7 @ re-materialized
+load [i + R2]
+...
+R3 = X + 15 @ re-materialized
+load [i + R3]
+
+Furthermore, with re-association, we can enable sharing:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+T = i + X
+load [T + 4]
+...
+load [T + 7]
+...
+load [T + 15]
+//===---------------------------------------------------------------------===//
+
+It's not always a good idea to choose rematerialization over spilling. If all
+the load / store instructions would be folded then spilling is cheaper because
+it won't require new live intervals / registers. See 2003-05-31-LongShifts for
+an example.
+
+//===---------------------------------------------------------------------===//
+
+With a copying garbage collector, derived pointers must not be retained across
+collector safe points; the collector could move the objects and invalidate the
+derived pointer. This is bad enough in the first place, but safe points can
+crop up unpredictably. Consider:
+
+ %array = load { i32, [0 x %obj] }** %array_addr
+ %nth_el = getelementptr { i32, [0 x %obj] }* %array, i32 0, i32 %n
+ %old = load %obj** %nth_el
+ %z = div i64 %x, %y
+ store %obj* %new, %obj** %nth_el
+
+If the i64 division is lowered to a libcall, then a safe point will (must)
+appear for the call site. If a collection occurs, %array and %nth_el no longer
+point into the correct object.
+
+The fix for this is to copy address calculations so that dependent pointers
+are never live across safe point boundaries. But the loads cannot be copied
+like this if there was an intervening store, so may be hard to get right.
+
+Only a concurrent mutator can trigger a collection at the libcall safe point.
+So single-threaded programs do not have this requirement, even with a copying
+collector. Still, LLVM optimizations would probably undo a front-end's careful
+work.
+
+//===---------------------------------------------------------------------===//
+
+The ocaml frametable structure supports liveness information. It would be good
+to support it.
+
+//===---------------------------------------------------------------------===//
+
+The FIXME in ComputeCommonTailLength in BranchFolding.cpp needs to be
+revisited. The check is there to work around a misuse of directives in inline
+assembly.
+
+//===---------------------------------------------------------------------===//
+
+It would be good to detect collector/target compatibility instead of silently
+doing the wrong thing.
+
+//===---------------------------------------------------------------------===//
+
+It would be really nice to be able to write patterns in .td files for copies,
+which would eliminate a bunch of explicit predicates on them (e.g. no side
+effects). Once this is in place, it would be even better to have tblgen
+synthesize the various copy insertion/inspection methods in TargetInstrInfo.
+
+//===---------------------------------------------------------------------===//
+
+Stack coloring improvments:
+
+1. Do proper LiveStackAnalysis on all stack objects including those which are
+ not spill slots.
+2. Reorder objects to fill in gaps between objects.
+ e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4
diff --git a/lib/CodeGen/RegAllocBigBlock.cpp b/lib/CodeGen/RegAllocBigBlock.cpp
new file mode 100644
index 0000000..91e4099
--- /dev/null
+++ b/lib/CodeGen/RegAllocBigBlock.cpp
@@ -0,0 +1,892 @@
+//===- RegAllocBigBlock.cpp - A register allocator for large basic blocks -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RABigBlock class
+//
+//===----------------------------------------------------------------------===//
+
+// This register allocator is derived from RegAllocLocal.cpp. Like it, this
+// allocator works on one basic block at a time, oblivious to others.
+// However, the algorithm used here is suited for long blocks of
+// instructions - registers are spilled by greedily choosing those holding
+// values that will not be needed for the longest amount of time. This works
+// particularly well for blocks with 10 or more times as many instructions
+// as machine registers, but can be used for general code.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: - automagically invoke linearscan for (groups of) small BBs?
+// - break ties when picking regs? (probably not worth it in a
+// JIT context)
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumFolded, "Number of loads/stores folded into instructions");
+
+static RegisterRegAlloc
+ bigBlockRegAlloc("bigblock", "Big-block register allocator",
+ createBigBlockRegisterAllocator);
+
+namespace {
+/// VRegKeyInfo - Defines magic values required to use VirtRegs as DenseMap
+/// keys.
+ struct VRegKeyInfo {
+ static inline unsigned getEmptyKey() { return -1U; }
+ static inline unsigned getTombstoneKey() { return -2U; }
+ static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
+ static unsigned getHashValue(const unsigned &Key) { return Key; }
+ };
+
+
+/// This register allocator is derived from RegAllocLocal.cpp. Like it, this
+/// allocator works on one basic block at a time, oblivious to others.
+/// However, the algorithm used here is suited for long blocks of
+/// instructions - registers are spilled by greedily choosing those holding
+/// values that will not be needed for the longest amount of time. This works
+/// particularly well for blocks with 10 or more times as many instructions
+/// as machine registers, but can be used for general code.
+///
+/// TODO: - automagically invoke linearscan for (groups of) small BBs?
+/// - break ties when picking regs? (probably not worth it in a
+/// JIT context)
+///
+ class VISIBILITY_HIDDEN RABigBlock : public MachineFunctionPass {
+ public:
+ static char ID;
+ RABigBlock() : MachineFunctionPass(&ID) {}
+ private:
+ /// TM - For getting at TargetMachine info
+ ///
+ const TargetMachine *TM;
+
+ /// MF - Our generic MachineFunction pointer
+ ///
+ MachineFunction *MF;
+
+ /// RegInfo - For dealing with machine register info (aliases, folds
+ /// etc)
+ const TargetRegisterInfo *RegInfo;
+
+ typedef SmallVector<unsigned, 2> VRegTimes;
+
+ /// VRegReadTable - maps VRegs in a BB to the set of times they are read
+ ///
+ DenseMap<unsigned, VRegTimes*, VRegKeyInfo> VRegReadTable;
+
+ /// VRegReadIdx - keeps track of the "current time" in terms of
+ /// positions in VRegReadTable
+ DenseMap<unsigned, unsigned , VRegKeyInfo> VRegReadIdx;
+
+ /// StackSlotForVirtReg - Maps virtual regs to the frame index where these
+ /// values are spilled.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ /// Virt2PhysRegMap - This map contains entries for each virtual register
+ /// that is currently available in a physical register.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+ /// PhysRegsUsed - This array is effectively a map, containing entries for
+ /// each physical register that currently has a value (ie, it is in
+ /// Virt2PhysRegMap). The value mapped to is the virtual register
+ /// corresponding to the physical register (the inverse of the
+ /// Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned
+ /// because it is used by a future instruction, and to -2 if it is not
+ /// allocatable. If the entry for a physical register is -1, then the
+ /// physical register is "not in the map".
+ ///
+ std::vector<int> PhysRegsUsed;
+
+ /// VirtRegModified - This bitset contains information about which virtual
+ /// registers need to be spilled back to memory when their registers are
+ /// scavenged. If a virtual register has simply been rematerialized, there
+ /// is no reason to spill it to memory when we need the register back.
+ ///
+ std::vector<int> VirtRegModified;
+
+ /// MBBLastInsnTime - the number of the the last instruction in MBB
+ ///
+ int MBBLastInsnTime;
+
+ /// MBBCurTime - the number of the the instruction being currently processed
+ ///
+ int MBBCurTime;
+
+ unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+ return Virt2PhysRegMap[VirtReg];
+ }
+
+ unsigned &getVirt2StackSlot(unsigned VirtReg) {
+ return StackSlotForVirtReg[VirtReg];
+ }
+
+ /// markVirtRegModified - Lets us flip bits in the VirtRegModified bitset
+ ///
+ void markVirtRegModified(unsigned Reg, bool Val = true) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ Reg -= TargetRegisterInfo::FirstVirtualRegister;
+ if (VirtRegModified.size() <= Reg)
+ VirtRegModified.resize(Reg+1);
+ VirtRegModified[Reg] = Val;
+ }
+
+ /// isVirtRegModified - Lets us query the VirtRegModified bitset
+ ///
+ bool isVirtRegModified(unsigned Reg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size()
+ && "Illegal virtual register!");
+ return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
+ }
+
+ public:
+ /// getPassName - returns the BigBlock allocator's name
+ ///
+ virtual const char *getPassName() const {
+ return "BigBlock Register Allocator";
+ }
+
+ /// getAnalaysisUsage - declares the required analyses
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// runOnMachineFunction - Register allocate the whole function
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ /// AllocateBasicBlock - Register allocate the specified basic block.
+ ///
+ void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+ /// FillVRegReadTable - Fill out the table of vreg read times given a BB
+ ///
+ void FillVRegReadTable(MachineBasicBlock &MBB);
+
+ /// areRegsEqual - This method returns true if the specified registers are
+ /// related to each other. To do this, it checks to see if they are equal
+ /// or if the first register is in the alias set of the second register.
+ ///
+ bool areRegsEqual(unsigned R1, unsigned R2) const {
+ if (R1 == R2) return true;
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(R2);
+ *AliasSet; ++AliasSet) {
+ if (*AliasSet == R1) return true;
+ }
+ return false;
+ }
+
+ /// getStackSpaceFor - This returns the frame index of the specified virtual
+ /// register on the stack, allocating space if necessary.
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+ /// removePhysReg - This method marks the specified physical register as no
+ /// longer being in use.
+ ///
+ void removePhysReg(unsigned PhysReg);
+
+ /// spillVirtReg - This method spills the value specified by PhysReg into
+ /// the virtual register slot specified by VirtReg. It then updates the RA
+ /// data structures to indicate the fact that PhysReg is now available.
+ ///
+ void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned VirtReg, unsigned PhysReg);
+
+ /// spillPhysReg - This method spills the specified physical register into
+ /// the virtual register slot associated with it. If OnlyVirtRegs is set to
+ /// true, then the request is ignored if the physical register does not
+ /// contain a virtual register.
+ ///
+ void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs = false);
+
+ /// assignVirtToPhysReg - This method updates local state so that we know
+ /// that PhysReg is the proper container for VirtReg now. The physical
+ /// register must not be used for anything else when this is called.
+ ///
+ void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+ /// isPhysRegAvailable - Return true if the specified physical register is
+ /// free and available for use. This also includes checking to see if
+ /// aliased registers are all free...
+ ///
+ bool isPhysRegAvailable(unsigned PhysReg) const;
+
+ /// getFreeReg - Look to see if there is a free register available in the
+ /// specified register class. If not, return 0.
+ ///
+ unsigned getFreeReg(const TargetRegisterClass *RC);
+
+ /// chooseReg - Pick a physical register to hold the specified
+ /// virtual register by choosing the one which will be read furthest
+ /// in the future.
+ ///
+ unsigned chooseReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned VirtReg);
+
+ /// reloadVirtReg - This method transforms the specified specified virtual
+ /// register use to refer to a physical register. This method may do this
+ /// in one of several ways: if the register is available in a physical
+ /// register already, it uses that physical register. If the value is not
+ /// in a physical register, and if there are physical registers available,
+ /// it loads it into a register. If register pressure is high, and it is
+ /// possible, it tries to fold the load of the virtual register into the
+ /// instruction itself. It avoids doing this if register pressure is low to
+ /// improve the chance that subsequent instructions can use the reloaded
+ /// value. This method returns the modified instruction.
+ ///
+ MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum);
+
+ };
+ char RABigBlock::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RABigBlock::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+ // Find the location Reg would belong...
+ int FrameIdx = getVirt2StackSlot(VirtReg);
+
+ if (FrameIdx)
+ return FrameIdx - 1; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot...
+ getVirt2StackSlot(VirtReg) = FrameIdx + 1;
+ return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RABigBlock::removePhysReg(unsigned PhysReg) {
+ PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg. It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RABigBlock::spillVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg) {
+ assert(VirtReg && "Spilling a physical register is illegal!"
+ " Must not have appropriate kill for the register or use exists beyond"
+ " the intended one.");
+ DOUT << " Spilling register " << RegInfo->getName(PhysReg)
+ << " containing %reg" << VirtReg;
+
+ const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+
+ if (!isVirtRegModified(VirtReg))
+ DOUT << " which has not been modified, so no store necessary!";
+
+ // Otherwise, there is a virtual register corresponding to this physical
+ // register. We only need to spill it into its stack slot if it has been
+ // modified.
+ if (isVirtRegModified(VirtReg)) {
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DOUT << " to stack slot #" << FrameIndex;
+ TII->storeRegToStackSlot(MBB, I, PhysReg, true, FrameIndex, RC);
+ ++NumStores; // Update statistics
+ }
+
+ getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available
+
+ DOUT << "\n";
+ removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it. If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RABigBlock::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs) {
+ if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used!
+ assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+ if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+ spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+ } else {
+ // If the selected register aliases any other registers, we must make
+ // sure that one of the aliases isn't alive.
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register.
+ PhysRegsUsed[*AliasSet] != -2) // If allocatable.
+ if (PhysRegsUsed[*AliasSet])
+ spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+ }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
+///
+void RABigBlock::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+ // Update information to note the fact that this register was just used, and
+ // it holds VirtReg.
+ PhysRegsUsed[PhysReg] = VirtReg;
+ getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use. This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RABigBlock::isPhysRegAvailable(unsigned PhysReg) const {
+ if (PhysRegsUsed[PhysReg] != -1) return false;
+
+ // If the selected register aliases any other allocated registers, it is
+ // not free!
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
+ return false; // Can't use this reg then.
+ return true;
+}
+
+
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class. If not, return 0.
+///
+unsigned RABigBlock::getFreeReg(const TargetRegisterClass *RC) {
+ // Get iterators defining the range of registers that are valid to allocate in
+ // this class, which also specifies the preferred allocation order.
+ TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+ TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+ for (; RI != RE; ++RI)
+ if (isPhysRegAvailable(*RI)) { // Is reg unused?
+ assert(*RI != 0 && "Cannot use register!");
+ return *RI; // Found an unused register!
+ }
+ return 0;
+}
+
+
+/// chooseReg - Pick a physical register to hold the specified
+/// virtual register by choosing the one whose value will be read
+/// furthest in the future.
+///
+unsigned RABigBlock::chooseReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned VirtReg) {
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ // First check to see if we have a free register of the requested type...
+ unsigned PhysReg = getFreeReg(RC);
+
+ // If we didn't find an unused register, find the one which will be
+ // read at the most distant point in time.
+ if (PhysReg == 0) {
+ unsigned delay=0, longest_delay=0;
+ VRegTimes* ReadTimes;
+
+ unsigned curTime = MBBCurTime;
+
+ // for all physical regs in the RC,
+ for(TargetRegisterClass::iterator pReg = RC->begin();
+ pReg != RC->end(); ++pReg) {
+ // how long until they're read?
+ if(PhysRegsUsed[*pReg]>0) { // ignore non-allocatable regs
+ ReadTimes = VRegReadTable[PhysRegsUsed[*pReg]];
+ if(ReadTimes && !ReadTimes->empty()) {
+ unsigned& pt = VRegReadIdx[PhysRegsUsed[*pReg]];
+ while(pt < ReadTimes->size() && (*ReadTimes)[pt] < curTime) {
+ ++pt;
+ }
+
+ if(pt < ReadTimes->size())
+ delay = (*ReadTimes)[pt] - curTime;
+ else
+ delay = MBBLastInsnTime + 1 - curTime;
+ } else {
+ // This register is only defined, but never
+ // read in this MBB. Therefore the next read
+ // happens after the end of this MBB
+ delay = MBBLastInsnTime + 1 - curTime;
+ }
+
+
+ if(delay > longest_delay) {
+ longest_delay = delay;
+ PhysReg = *pReg;
+ }
+ }
+ }
+
+ if(PhysReg == 0) { // ok, now we're desperate. We couldn't choose
+ // a register to spill by looking through the
+ // read timetable, so now we just spill the
+ // first allocatable register we find.
+
+ // for all physical regs in the RC,
+ for(TargetRegisterClass::iterator pReg = RC->begin();
+ pReg != RC->end(); ++pReg) {
+ // if we find a register we can spill
+ if(PhysRegsUsed[*pReg]>=-1)
+ PhysReg = *pReg; // choose it to be spilled
+ }
+ }
+
+ assert(PhysReg && "couldn't choose a register to spill :( ");
+ // TODO: assert that RC->contains(PhysReg) / handle aliased registers?
+
+ // since we needed to look in the table we need to spill this register.
+ spillPhysReg(MBB, I, PhysReg);
+ }
+
+ // assign the vreg to our chosen physical register
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ return PhysReg; // and return it
+}
+
+
+/// reloadVirtReg - This method transforms an instruction with a virtual
+/// register use to one that references a physical register. It does this as
+/// follows:
+///
+/// 1) If the register is already in a physical register, it uses it.
+/// 2) Otherwise, if there is a free physical register, it uses that.
+/// 3) Otherwise, it calls chooseReg() to get the physical register
+/// holding the most distantly needed value, generating a spill in
+/// the process.
+///
+/// This method returns the modified instruction.
+MachineInstr *RABigBlock::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum) {
+ unsigned VirtReg = MI->getOperand(OpNum).getReg();
+ const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+
+ // If the virtual register is already available in a physical register,
+ // just update the instruction and return.
+ if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+ MI->getOperand(OpNum).setReg(PR);
+ return MI;
+ }
+
+ // Otherwise, if we have free physical registers available to hold the
+ // value, use them.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ unsigned PhysReg = getFreeReg(RC);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+ if (PhysReg) { // we have a free register, so use it.
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ } else { // no free registers available.
+ // try to fold the spill into the instruction
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(OpNum);
+ if(MachineInstr* FMI = TII->foldMemoryOperand(*MF, MI, Ops, FrameIndex)) {
+ ++NumFolded;
+ FMI->copyKillDeadInfo(MI);
+ return MBB.insert(MBB.erase(MI), FMI);
+ }
+
+ // determine which of the physical registers we'll kill off, since we
+ // couldn't fold.
+ PhysReg = chooseReg(MBB, MI, VirtReg);
+ }
+
+ // this virtual register is now unmodified (since we just reloaded it)
+ markVirtRegModified(VirtReg, false);
+
+ DOUT << " Reloading %reg" << VirtReg << " into "
+ << RegInfo->getName(PhysReg) << "\n";
+
+ // Add move instruction(s)
+ TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+ ++NumLoads; // Update statistics
+
+ MF->getRegInfo().setPhysRegUsed(PhysReg);
+ MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
+ return MI;
+}
+
+/// Fill out the vreg read timetable. Since ReadTime increases
+/// monotonically, the individual readtime sets will be sorted
+/// in ascending order.
+void RABigBlock::FillVRegReadTable(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ MachineBasicBlock::iterator MII;
+ unsigned ReadTime;
+
+ for(ReadTime=0, MII = MBB.begin(); MII != MBB.end(); ++ReadTime, ++MII) {
+ MachineInstr *MI = MII;
+
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ // look for vreg reads..
+ if (MO.isReg() && !MO.isDef() && MO.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ // ..and add them to the read table.
+ VRegTimes* &Times = VRegReadTable[MO.getReg()];
+ if(!VRegReadTable[MO.getReg()]) {
+ Times = new VRegTimes;
+ VRegReadIdx[MO.getReg()] = 0;
+ }
+ Times->push_back(ReadTime);
+ }
+ }
+
+ }
+
+ MBBLastInsnTime = ReadTime;
+
+ for(DenseMap<unsigned, VRegTimes*, VRegKeyInfo>::iterator Reads = VRegReadTable.begin();
+ Reads != VRegReadTable.end(); ++Reads) {
+ if(Reads->second) {
+ DOUT << "Reads[" << Reads->first << "]=" << Reads->second->size() << "\n";
+ }
+ }
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+ MO.isDef() && !MO.isDead())
+ return true;
+ }
+ return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+ !MO.isDef() && MO.isKill())
+ return true;
+ }
+ return false;
+}
+
+
+void RABigBlock::AllocateBasicBlock(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ MachineBasicBlock::iterator MII = MBB.begin();
+ const TargetInstrInfo &TII = *TM->getInstrInfo();
+
+ DEBUG(const BasicBlock *LBB = MBB.getBasicBlock();
+ if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName());
+
+ // If this is the first basic block in the machine function, add live-in
+ // registers as active.
+ if (&MBB == &*MF->begin()) {
+ for (MachineRegisterInfo::livein_iterator
+ I = MF->getRegInfo().livein_begin(),
+ E = MF->getRegInfo().livein_end(); I != E; ++I) {
+ unsigned Reg = I->first;
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ MBBCurTime = -1;
+ while (MII != MBB.end()) {
+ MachineInstr *MI = MII++;
+ MBBCurTime++;
+ const TargetInstrDesc &TID = MI->getDesc();
+ DEBUG(DOUT << "\nTime=" << MBBCurTime << " Starting RegAlloc of: " << *MI;
+ DOUT << " Regs have values: ";
+ for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+ DOUT << "[" << RegInfo->getName(i)
+ << ",%reg" << PhysRegsUsed[i] << "] ";
+ DOUT << "\n");
+
+ SmallVector<unsigned, 8> Kills;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ if (!MO.isImplicit())
+ Kills.push_back(MO.getReg());
+ else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+ // These are extra physical register kills when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ Kills.push_back(MO.getReg());
+ }
+ }
+
+ // Get the used operands into registers. This has the potential to spill
+ // incoming values if we are out of registers. Note that we completely
+ // ignore physical register uses here. We assume that if an explicit
+ // physical register is referenced by the instruction, that it is guaranteed
+ // to be live-in, or the input is badly hosed.
+ //
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ // here we are looking for only used operands (never def&use)
+ if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ MI = reloadVirtReg(MBB, MI, i);
+ }
+
+ // If this instruction is the last user of this register, kill the
+ // value, freeing the register being used, so it doesn't need to be
+ // spilled to memory.
+ //
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ unsigned VirtReg = Kills[i];
+ unsigned PhysReg = VirtReg;
+ if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ // If the virtual register was never materialized into a register, it
+ // might not be in the map, but it won't hurt to zero it out anyway.
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ } else {
+ assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
+ "Silently clearing a virtual register?");
+ }
+
+ if (PhysReg) {
+ DOUT << " Last use of " << RegInfo->getName(PhysReg)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DOUT << " Last use of "
+ << RegInfo->getName(*AliasSet)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Loop over all of the operands of the instruction, spilling registers that
+ // are defined, and marking explicit destinations in the PhysRegsUsed map.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Loop over the implicit defs, spilling them as well.
+ if (TID.getImplicitDefs()) {
+ for (const unsigned *ImplicitDefs = TID.getImplicitDefs();
+ *ImplicitDefs; ++ImplicitDefs) {
+ unsigned Reg = *ImplicitDefs;
+ if (PhysRegsUsed[Reg] != -2) {
+ spillPhysReg(MBB, MI, Reg, true);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ }
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*AliasSet);
+ }
+ }
+ }
+ }
+
+ SmallVector<unsigned, 8> DeadDefs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead())
+ DeadDefs.push_back(MO.getReg());
+ }
+
+ // Okay, we have allocated all of the source operands and spilled any values
+ // that would be destroyed by defs of this instruction. Loop over the
+ // explicit defs and assign them to a register, spilling incoming values if
+ // we need to scavenge a register.
+ //
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+ DestPhysReg = chooseReg(MBB, MI, DestVirtReg);
+ MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ MI->getOperand(i).setReg(DestPhysReg); // Assign the output register
+ }
+ }
+
+ // If this instruction defines any registers that are immediately dead,
+ // kill them now.
+ //
+ for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+ unsigned VirtReg = DeadDefs[i];
+ unsigned PhysReg = VirtReg;
+ if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ assert(PhysReg != 0);
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ }
+
+ if (PhysReg) {
+ DOUT << " Register " << RegInfo->getName(PhysReg)
+ << " [%reg" << VirtReg
+ << "] is never used, removing it from live set\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DOUT << " Register " << RegInfo->getName(*AliasSet)
+ << " [%reg" << *AliasSet
+ << "] is never used, removing it from live set\n";
+ removePhysReg(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Finally, if this is a noop copy instruction, zap it.
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (TII.isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ SrcReg == DstReg)
+ MBB.erase(MI);
+ }
+
+ MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+ // Spill all physical registers holding virtual registers now.
+ for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
+ if (unsigned VirtReg = PhysRegsUsed[i])
+ spillVirtReg(MBB, MI, VirtReg, i);
+ else
+ removePhysReg(i);
+ }
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RABigBlock::runOnMachineFunction(MachineFunction &Fn) {
+ DOUT << "Machine Function " << "\n";
+ MF = &Fn;
+ TM = &Fn.getTarget();
+ RegInfo = TM->getRegisterInfo();
+
+ PhysRegsUsed.assign(RegInfo->getNumRegs(), -1);
+
+ // At various places we want to efficiently check to see whether a register
+ // is allocatable. To handle this, we mark all unallocatable registers as
+ // being pinned down, permanently.
+ {
+ BitVector Allocable = RegInfo->getAllocatableSet(Fn);
+ for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+ if (!Allocable[i])
+ PhysRegsUsed[i] = -2; // Mark the reg unallocable.
+ }
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ Virt2PhysRegMap.grow(MF->getRegInfo().getLastVirtReg());
+ StackSlotForVirtReg.grow(MF->getRegInfo().getLastVirtReg());
+ VirtRegModified.resize(MF->getRegInfo().getLastVirtReg() -
+ TargetRegisterInfo::FirstVirtualRegister + 1, 0);
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ // fill out the read timetable
+ FillVRegReadTable(*MBB);
+ // use it to allocate the BB
+ AllocateBasicBlock(*MBB);
+ // clear it
+ VRegReadTable.clear();
+ }
+
+ StackSlotForVirtReg.clear();
+ PhysRegsUsed.clear();
+ VirtRegModified.clear();
+ Virt2PhysRegMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createBigBlockRegisterAllocator() {
+ return new RABigBlock();
+}
+
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
new file mode 100644
index 0000000..ee118de
--- /dev/null
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -0,0 +1,1535 @@
+//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a linear scan register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "Spiller.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+#include <set>
+#include <queue>
+#include <memory>
+#include <cmath>
+#include <iostream>
+
+using namespace llvm;
+
+STATISTIC(NumIters , "Number of iterations performed");
+STATISTIC(NumBacktracks, "Number of times we had to backtrack");
+STATISTIC(NumCoalesce, "Number of copies coalesced");
+STATISTIC(NumDowngrade, "Number of registers downgraded");
+
+static cl::opt<bool>
+NewHeuristic("new-spilling-heuristic",
+ cl::desc("Use new spilling heuristic"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+PreSplitIntervals("pre-alloc-split",
+ cl::desc("Pre-register allocation live interval splitting"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+NewSpillFramework("new-spill-framework",
+ cl::desc("New spilling framework"),
+ cl::init(false), cl::Hidden);
+
+static RegisterRegAlloc
+linearscanRegAlloc("linearscan", "linear scan register allocator",
+ createLinearScanRegisterAllocator);
+
+namespace {
+ struct VISIBILITY_HIDDEN RALinScan : public MachineFunctionPass {
+ static char ID;
+ RALinScan() : MachineFunctionPass(&ID) {}
+
+ typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
+ typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
+ private:
+ /// RelatedRegClasses - This structure is built the first time a function is
+ /// compiled, and keeps track of which register classes have registers that
+ /// belong to multiple classes or have aliases that are in other classes.
+ EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
+ DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
+
+ // NextReloadMap - For each register in the map, it maps to the another
+ // register which is defined by a reload from the same stack slot and
+ // both reloads are in the same basic block.
+ DenseMap<unsigned, unsigned> NextReloadMap;
+
+ // DowngradedRegs - A set of registers which are being "downgraded", i.e.
+ // un-favored for allocation.
+ SmallSet<unsigned, 8> DowngradedRegs;
+
+ // DowngradeMap - A map from virtual registers to physical registers being
+ // downgraded for the virtual registers.
+ DenseMap<unsigned, unsigned> DowngradeMap;
+
+ MachineFunction* mf_;
+ MachineRegisterInfo* mri_;
+ const TargetMachine* tm_;
+ const TargetRegisterInfo* tri_;
+ const TargetInstrInfo* tii_;
+ BitVector allocatableRegs_;
+ LiveIntervals* li_;
+ LiveStacks* ls_;
+ const MachineLoopInfo *loopInfo;
+
+ /// handled_ - Intervals are added to the handled_ set in the order of their
+ /// start value. This is uses for backtracking.
+ std::vector<LiveInterval*> handled_;
+
+ /// fixed_ - Intervals that correspond to machine registers.
+ ///
+ IntervalPtrs fixed_;
+
+ /// active_ - Intervals that are currently being processed, and which have a
+ /// live range active for the current point.
+ IntervalPtrs active_;
+
+ /// inactive_ - Intervals that are currently being processed, but which have
+ /// a hold at the current point.
+ IntervalPtrs inactive_;
+
+ typedef std::priority_queue<LiveInterval*,
+ SmallVector<LiveInterval*, 64>,
+ greater_ptr<LiveInterval> > IntervalHeap;
+ IntervalHeap unhandled_;
+
+ /// regUse_ - Tracks register usage.
+ SmallVector<unsigned, 32> regUse_;
+ SmallVector<unsigned, 32> regUseBackUp_;
+
+ /// vrm_ - Tracks register assignments.
+ VirtRegMap* vrm_;
+
+ std::auto_ptr<VirtRegRewriter> rewriter_;
+
+ std::auto_ptr<Spiller> spiller_;
+
+ public:
+ virtual const char* getPassName() const {
+ return "Linear Scan Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveIntervals>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ // Make sure PassManager knows which analyses to make available
+ // to coalescing and which analyses coalescing invalidates.
+ AU.addRequiredTransitive<RegisterCoalescer>();
+ if (PreSplitIntervals)
+ AU.addRequiredID(PreAllocSplittingID);
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - register allocate the whole function
+ bool runOnMachineFunction(MachineFunction&);
+
+ private:
+ /// linearScan - the linear scan algorithm
+ void linearScan();
+
+ /// initIntervalSets - initialize the interval sets.
+ ///
+ void initIntervalSets();
+
+ /// processActiveIntervals - expire old intervals and move non-overlapping
+ /// ones to the inactive list.
+ void processActiveIntervals(unsigned CurPoint);
+
+ /// processInactiveIntervals - expire old intervals and move overlapping
+ /// ones to the active list.
+ void processInactiveIntervals(unsigned CurPoint);
+
+ /// hasNextReloadInterval - Return the next liveinterval that's being
+ /// defined by a reload from the same SS as the specified one.
+ LiveInterval *hasNextReloadInterval(LiveInterval *cur);
+
+ /// DowngradeRegister - Downgrade a register for allocation.
+ void DowngradeRegister(LiveInterval *li, unsigned Reg);
+
+ /// UpgradeRegister - Upgrade a register for allocation.
+ void UpgradeRegister(unsigned Reg);
+
+ /// assignRegOrStackSlotAtInterval - assign a register if one
+ /// is available, or spill.
+ void assignRegOrStackSlotAtInterval(LiveInterval* cur);
+
+ void updateSpillWeights(std::vector<float> &Weights,
+ unsigned reg, float weight,
+ const TargetRegisterClass *RC);
+
+ /// findIntervalsToSpill - Determine the intervals to spill for the
+ /// specified interval. It's passed the physical registers whose spill
+ /// weight is the lowest among all the registers whose live intervals
+ /// conflict with the interval.
+ void findIntervalsToSpill(LiveInterval *cur,
+ std::vector<std::pair<unsigned,float> > &Candidates,
+ unsigned NumCands,
+ SmallVector<LiveInterval*, 8> &SpillIntervals);
+
+ /// attemptTrivialCoalescing - If a simple interval is defined by a copy,
+ /// try allocate the definition the same register as the source register
+ /// if the register is not defined during live time of the interval. This
+ /// eliminate a copy. This is used to coalesce copies which were not
+ /// coalesced away before allocation either due to dest and src being in
+ /// different register classes or because the coalescer was overly
+ /// conservative.
+ unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg);
+
+ ///
+ /// Register usage / availability tracking helpers.
+ ///
+
+ void initRegUses() {
+ regUse_.resize(tri_->getNumRegs(), 0);
+ regUseBackUp_.resize(tri_->getNumRegs(), 0);
+ }
+
+ void finalizeRegUses() {
+#ifndef NDEBUG
+ // Verify all the registers are "freed".
+ bool Error = false;
+ for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
+ if (regUse_[i] != 0) {
+ cerr << tri_->getName(i) << " is still in use!\n";
+ Error = true;
+ }
+ }
+ if (Error)
+ abort();
+#endif
+ regUse_.clear();
+ regUseBackUp_.clear();
+ }
+
+ void addRegUse(unsigned physReg) {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ ++regUse_[physReg];
+ for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as)
+ ++regUse_[*as];
+ }
+
+ void delRegUse(unsigned physReg) {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ assert(regUse_[physReg] != 0);
+ --regUse_[physReg];
+ for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) {
+ assert(regUse_[*as] != 0);
+ --regUse_[*as];
+ }
+ }
+
+ bool isRegAvail(unsigned physReg) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ return regUse_[physReg] == 0;
+ }
+
+ void backUpRegUses() {
+ regUseBackUp_ = regUse_;
+ }
+
+ void restoreRegUses() {
+ regUse_ = regUseBackUp_;
+ }
+
+ ///
+ /// Register handling helpers.
+ ///
+
+ /// getFreePhysReg - return a free physical register for this virtual
+ /// register interval if we have one, otherwise return 0.
+ unsigned getFreePhysReg(LiveInterval* cur);
+ unsigned getFreePhysReg(const TargetRegisterClass *RC,
+ unsigned MaxInactiveCount,
+ SmallVector<unsigned, 256> &inactiveCounts,
+ bool SkipDGRegs);
+
+ /// assignVirt2StackSlot - assigns this virtual register to a
+ /// stack slot. returns the stack slot
+ int assignVirt2StackSlot(unsigned virtReg);
+
+ void ComputeRelatedRegClasses();
+
+ template <typename ItTy>
+ void printIntervals(const char* const str, ItTy i, ItTy e) const {
+ if (str) DOUT << str << " intervals:\n";
+ for (; i != e; ++i) {
+ DOUT << "\t" << *i->first << " -> ";
+ unsigned reg = i->first->reg;
+ if (TargetRegisterInfo::isVirtualRegister(reg)) {
+ reg = vrm_->getPhys(reg);
+ }
+ DOUT << tri_->getName(reg) << '\n';
+ }
+ }
+ };
+ char RALinScan::ID = 0;
+}
+
+static RegisterPass<RALinScan>
+X("linearscan-regalloc", "Linear Scan Register Allocator");
+
+bool validateRegAlloc(MachineFunction *mf, LiveIntervals *lis,
+ VirtRegMap *vrm) {
+
+ MachineRegisterInfo *mri = &mf->getRegInfo();
+ const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
+ bool allocationValid = true;
+
+
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+
+ LiveInterval *li = itr->second;
+
+ if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
+ continue;
+ }
+
+ if (vrm->hasPhys(li->reg)) {
+ const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+
+ if (lis->hasInterval(vrm->getPhys(li->reg))) {
+ if (li->overlaps(lis->getInterval(vrm->getPhys(li->reg)))) {
+ std::cerr << "vreg " << li->reg << " overlaps its assigned preg "
+ << vrm->getPhys(li->reg) << "(" << tri->getName(vrm->getPhys(li->reg)) << ")\n";
+ }
+ }
+
+ TargetRegisterClass::iterator fReg =
+ std::find(trc->allocation_order_begin(*mf), trc->allocation_order_end(*mf),
+ vrm->getPhys(li->reg));
+
+ if (fReg == trc->allocation_order_end(*mf)) {
+ std::cerr << "preg " << vrm->getPhys(li->reg)
+ << "(" << tri->getName(vrm->getPhys(li->reg)) << ") is not in the allocation set for vreg "
+ << li->reg << "\n";
+ allocationValid &= false;
+ }
+ }
+ else {
+ std::cerr << "No preg for vreg " << li->reg << "\n";
+ // What about conflicting loads/stores?
+ continue;
+ }
+
+ for (LiveIntervals::iterator itr2 = next(itr); itr2 != end; ++itr2) {
+
+ LiveInterval *li2 = itr2->second;
+
+ if (li2->empty())
+ continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(li2->reg)) {
+ if (li->overlaps(*li2)) {
+ if (vrm->getPhys(li->reg) == li2->reg ||
+ tri->areAliases(vrm->getPhys(li->reg), li2->reg)) {
+ std::cerr << "vreg " << li->reg << " overlaps preg "
+ << li2->reg << "(" << tri->getName(li2->reg) << ") which aliases "
+ << vrm->getPhys(li->reg) << "(" << tri->getName(vrm->getPhys(li->reg)) << ")\n";
+ allocationValid &= false;
+ }
+ }
+ }
+ else {
+
+ if (!vrm->hasPhys(li2->reg)) {
+ continue;
+ }
+
+ if (li->overlaps(*li2)) {
+ if (vrm->getPhys(li->reg) == vrm->getPhys(li2->reg) ||
+ tri->areAliases(vrm->getPhys(li->reg), vrm->getPhys(li2->reg))) {
+ std::cerr << "vreg " << li->reg << " (preg " << vrm->getPhys(li->reg)
+ << ") overlaps vreg " << li2->reg << " (preg " << vrm->getPhys(li2->reg)
+ << ") and " << vrm->getPhys(li->reg) << " aliases " << vrm->getPhys(li2->reg) << "\n";
+ allocationValid &= false;
+ }
+ }
+ }
+ }
+
+ }
+
+ return allocationValid;
+
+}
+
+
+void RALinScan::ComputeRelatedRegClasses() {
+ // First pass, add all reg classes to the union, and determine at least one
+ // reg class that each register is in.
+ bool HasAliases = false;
+ for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
+ E = tri_->regclass_end(); RCI != E; ++RCI) {
+ RelatedRegClasses.insert(*RCI);
+ for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
+ I != E; ++I) {
+ HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
+
+ const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
+ if (PRC) {
+ // Already processed this register. Just make sure we know that
+ // multiple register classes share a register.
+ RelatedRegClasses.unionSets(PRC, *RCI);
+ } else {
+ PRC = *RCI;
+ }
+ }
+ }
+
+ // Second pass, now that we know conservatively what register classes each reg
+ // belongs to, add info about aliases. We don't need to do this for targets
+ // without register aliases.
+ if (HasAliases)
+ for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
+ I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
+ I != E; ++I)
+ for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS)
+ RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
+}
+
+/// attemptTrivialCoalescing - If a simple interval is defined by a copy,
+/// try allocate the definition the same register as the source register
+/// if the register is not defined during live time of the interval. This
+/// eliminate a copy. This is used to coalesce copies which were not
+/// coalesced away before allocation either due to dest and src being in
+/// different register classes or because the coalescer was overly
+/// conservative.
+unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
+ if ((cur.preference && cur.preference == Reg) || !cur.containsOneValue())
+ return Reg;
+
+ VNInfo *vni = cur.begin()->valno;
+ if (!vni->def || vni->def == ~1U || vni->def == ~0U)
+ return Reg;
+ MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg;
+ if (!CopyMI ||
+ !tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ return Reg;
+ PhysReg = SrcReg;
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (!vrm_->isAssignedReg(SrcReg))
+ return Reg;
+ PhysReg = vrm_->getPhys(SrcReg);
+ }
+ if (Reg == PhysReg)
+ return Reg;
+
+ const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
+ if (!RC->contains(PhysReg))
+ return Reg;
+
+ // Try to coalesce.
+ if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) {
+ DOUT << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg)
+ << '\n';
+ vrm_->clearVirt(cur.reg);
+ vrm_->assignVirt2Phys(cur.reg, PhysReg);
+
+ // Remove unnecessary kills since a copy does not clobber the register.
+ if (li_->hasInterval(SrcReg)) {
+ LiveInterval &SrcLI = li_->getInterval(SrcReg);
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(cur.reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ if (!O.isUse() || !O.isKill())
+ continue;
+ MachineInstr *MI = &*I;
+ if (SrcLI.liveAt(li_->getDefIndex(li_->getInstructionIndex(MI))))
+ O.setIsKill(false);
+ }
+ }
+
+ ++NumCoalesce;
+ return SrcReg;
+ }
+
+ return Reg;
+}
+
+bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &fn.getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ allocatableRegs_ = tri_->getAllocatableSet(fn);
+ li_ = &getAnalysis<LiveIntervals>();
+ ls_ = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ // We don't run the coalescer here because we have no reason to
+ // interact with it. If the coalescer requires interaction, it
+ // won't do anything. If it doesn't require interaction, we assume
+ // it was run as a separate pass.
+
+ // If this is the first function compiled, compute the related reg classes.
+ if (RelatedRegClasses.empty())
+ ComputeRelatedRegClasses();
+
+ // Also resize register usage trackers.
+ initRegUses();
+
+ vrm_ = &getAnalysis<VirtRegMap>();
+ if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
+
+ if (NewSpillFramework) {
+ spiller_.reset(createSpiller(mf_, li_, ls_, vrm_));
+ }
+
+ initIntervalSets();
+
+ linearScan();
+
+ if (NewSpillFramework) {
+ bool allocValid = validateRegAlloc(mf_, li_, vrm_);
+ }
+
+ // Rewrite spill code and update the PhysRegsUsed set.
+ rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
+
+ assert(unhandled_.empty() && "Unhandled live intervals remain!");
+
+ finalizeRegUses();
+
+ fixed_.clear();
+ active_.clear();
+ inactive_.clear();
+ handled_.clear();
+ NextReloadMap.clear();
+ DowngradedRegs.clear();
+ DowngradeMap.clear();
+ spiller_.reset(0);
+
+ return true;
+}
+
+/// initIntervalSets - initialize the interval sets.
+///
+void RALinScan::initIntervalSets()
+{
+ assert(unhandled_.empty() && fixed_.empty() &&
+ active_.empty() && inactive_.empty() &&
+ "interval sets should be empty on initialization");
+
+ handled_.reserve(li_->getNumIntervals());
+
+ for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+ if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
+ mri_->setPhysRegUsed(i->second->reg);
+ fixed_.push_back(std::make_pair(i->second, i->second->begin()));
+ } else
+ unhandled_.push(i->second);
+ }
+}
+
+void RALinScan::linearScan()
+{
+ // linear scan algorithm
+ DOUT << "********** LINEAR SCAN **********\n";
+ DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n';
+
+ DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end()));
+
+ while (!unhandled_.empty()) {
+ // pick the interval with the earliest start point
+ LiveInterval* cur = unhandled_.top();
+ unhandled_.pop();
+ ++NumIters;
+ DOUT << "\n*** CURRENT ***: " << *cur << '\n';
+
+ if (!cur->empty()) {
+ processActiveIntervals(cur->beginNumber());
+ processInactiveIntervals(cur->beginNumber());
+
+ assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
+ "Can only allocate virtual registers!");
+ }
+
+ // Allocating a virtual register. try to find a free
+ // physical register or spill an interval (possibly this one) in order to
+ // assign it one.
+ assignRegOrStackSlotAtInterval(cur);
+
+ DEBUG(printIntervals("active", active_.begin(), active_.end()));
+ DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end()));
+ }
+
+ // Expire any remaining active intervals
+ while (!active_.empty()) {
+ IntervalPtr &IP = active_.back();
+ unsigned reg = IP.first->reg;
+ DOUT << "\tinterval " << *IP.first << " expired\n";
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+ active_.pop_back();
+ }
+
+ // Expire any remaining inactive intervals
+ DEBUG(for (IntervalPtrs::reverse_iterator
+ i = inactive_.rbegin(); i != inactive_.rend(); ++i)
+ DOUT << "\tinterval " << *i->first << " expired\n");
+ inactive_.clear();
+
+ // Add live-ins to every BB except for entry. Also perform trivial coalescing.
+ MachineFunction::iterator EntryMBB = mf_->begin();
+ SmallVector<MachineBasicBlock*, 8> LiveInMBBs;
+ for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+ LiveInterval &cur = *i->second;
+ unsigned Reg = 0;
+ bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg);
+ if (isPhys)
+ Reg = cur.reg;
+ else if (vrm_->isAssignedReg(cur.reg))
+ Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg));
+ if (!Reg)
+ continue;
+ // Ignore splited live intervals.
+ if (!isPhys && vrm_->getPreSplitReg(cur.reg))
+ continue;
+ for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
+ I != E; ++I) {
+ const LiveRange &LR = *I;
+ if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
+ for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
+ if (LiveInMBBs[i] != EntryMBB)
+ LiveInMBBs[i]->addLiveIn(Reg);
+ LiveInMBBs.clear();
+ }
+ }
+ }
+
+ DOUT << *vrm_;
+
+ // Look for physical registers that end up not being allocated even though
+ // register allocator had to spill other registers in its register class.
+ if (ls_->getNumIntervals() == 0)
+ return;
+ if (!vrm_->FindUnusedRegisters(tri_, li_))
+ return;
+}
+
+/// processActiveIntervals - expire old intervals and move non-overlapping ones
+/// to the inactive list.
+void RALinScan::processActiveIntervals(unsigned CurPoint)
+{
+ DOUT << "\tprocessing active intervals:\n";
+
+ for (unsigned i = 0, e = active_.size(); i != e; ++i) {
+ LiveInterval *Interval = active_[i].first;
+ LiveInterval::iterator IntervalPos = active_[i].second;
+ unsigned reg = Interval->reg;
+
+ IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+ if (IntervalPos == Interval->end()) { // Remove expired intervals.
+ DOUT << "\t\tinterval " << *Interval << " expired\n";
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+
+ // Pop off the end of the list.
+ active_[i] = active_.back();
+ active_.pop_back();
+ --i; --e;
+
+ } else if (IntervalPos->start > CurPoint) {
+ // Move inactive intervals to inactive list.
+ DOUT << "\t\tinterval " << *Interval << " inactive\n";
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ delRegUse(reg);
+ // add to inactive.
+ inactive_.push_back(std::make_pair(Interval, IntervalPos));
+
+ // Pop off the end of the list.
+ active_[i] = active_.back();
+ active_.pop_back();
+ --i; --e;
+ } else {
+ // Otherwise, just update the iterator position.
+ active_[i].second = IntervalPos;
+ }
+ }
+}
+
+/// processInactiveIntervals - expire old intervals and move overlapping
+/// ones to the active list.
+void RALinScan::processInactiveIntervals(unsigned CurPoint)
+{
+ DOUT << "\tprocessing inactive intervals:\n";
+
+ for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
+ LiveInterval *Interval = inactive_[i].first;
+ LiveInterval::iterator IntervalPos = inactive_[i].second;
+ unsigned reg = Interval->reg;
+
+ IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+ if (IntervalPos == Interval->end()) { // remove expired intervals.
+ DOUT << "\t\tinterval " << *Interval << " expired\n";
+
+ // Pop off the end of the list.
+ inactive_[i] = inactive_.back();
+ inactive_.pop_back();
+ --i; --e;
+ } else if (IntervalPos->start <= CurPoint) {
+ // move re-activated intervals in active list
+ DOUT << "\t\tinterval " << *Interval << " active\n";
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ addRegUse(reg);
+ // add to active
+ active_.push_back(std::make_pair(Interval, IntervalPos));
+
+ // Pop off the end of the list.
+ inactive_[i] = inactive_.back();
+ inactive_.pop_back();
+ --i; --e;
+ } else {
+ // Otherwise, just update the iterator position.
+ inactive_[i].second = IntervalPos;
+ }
+ }
+}
+
+/// updateSpillWeights - updates the spill weights of the specifed physical
+/// register and its weight.
+void RALinScan::updateSpillWeights(std::vector<float> &Weights,
+ unsigned reg, float weight,
+ const TargetRegisterClass *RC) {
+ SmallSet<unsigned, 4> Processed;
+ SmallSet<unsigned, 4> SuperAdded;
+ SmallVector<unsigned, 4> Supers;
+ Weights[reg] += weight;
+ Processed.insert(reg);
+ for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
+ Weights[*as] += weight;
+ Processed.insert(*as);
+ if (tri_->isSubRegister(*as, reg) &&
+ SuperAdded.insert(*as) &&
+ RC->contains(*as)) {
+ Supers.push_back(*as);
+ }
+ }
+
+ // If the alias is a super-register, and the super-register is in the
+ // register class we are trying to allocate. Then add the weight to all
+ // sub-registers of the super-register even if they are not aliases.
+ // e.g. allocating for GR32, bh is not used, updating bl spill weight.
+ // bl should get the same spill weight otherwise it will be choosen
+ // as a spill candidate since spilling bh doesn't make ebx available.
+ for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
+ for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
+ if (!Processed.count(*sr))
+ Weights[*sr] += weight;
+ }
+}
+
+static
+RALinScan::IntervalPtrs::iterator
+FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
+ for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
+ I != E; ++I)
+ if (I->first == LI) return I;
+ return IP.end();
+}
+
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){
+ for (unsigned i = 0, e = V.size(); i != e; ++i) {
+ RALinScan::IntervalPtr &IP = V[i];
+ LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
+ IP.second, Point);
+ if (I != IP.first->begin()) --I;
+ IP.second = I;
+ }
+}
+
+/// addStackInterval - Create a LiveInterval for stack if the specified live
+/// interval has been spilled.
+static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
+ LiveIntervals *li_,
+ MachineRegisterInfo* mri_, VirtRegMap &vrm_) {
+ int SS = vrm_.getStackSlot(cur->reg);
+ if (SS == VirtRegMap::NO_STACK_SLOT)
+ return;
+
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ LiveInterval &SI = ls_->getOrCreateInterval(SS, RC);
+
+ VNInfo *VNI;
+ if (SI.hasAtLeastOneValue())
+ VNI = SI.getValNumInfo(0);
+ else
+ VNI = SI.getNextValue(~0U, 0, ls_->getVNInfoAllocator());
+
+ LiveInterval &RI = li_->getInterval(cur->reg);
+ // FIXME: This may be overly conservative.
+ SI.MergeRangesInAsValue(RI, VNI);
+}
+
+/// getConflictWeight - Return the number of conflicts between cur
+/// live interval and defs and uses of Reg weighted by loop depthes.
+static
+float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
+ MachineRegisterInfo *mri_,
+ const MachineLoopInfo *loopInfo) {
+ float Conflicts = 0;
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ if (cur->liveAt(li_->getInstructionIndex(MI))) {
+ unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
+ Conflicts += powf(10.0f, (float)loopDepth);
+ }
+ }
+ return Conflicts;
+}
+
+/// findIntervalsToSpill - Determine the intervals to spill for the
+/// specified interval. It's passed the physical registers whose spill
+/// weight is the lowest among all the registers whose live intervals
+/// conflict with the interval.
+void RALinScan::findIntervalsToSpill(LiveInterval *cur,
+ std::vector<std::pair<unsigned,float> > &Candidates,
+ unsigned NumCands,
+ SmallVector<LiveInterval*, 8> &SpillIntervals) {
+ // We have figured out the *best* register to spill. But there are other
+ // registers that are pretty good as well (spill weight within 3%). Spill
+ // the one that has fewest defs and uses that conflict with cur.
+ float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
+ SmallVector<LiveInterval*, 8> SLIs[3];
+
+ DOUT << "\tConsidering " << NumCands << " candidates: ";
+ DEBUG(for (unsigned i = 0; i != NumCands; ++i)
+ DOUT << tri_->getName(Candidates[i].first) << " ";
+ DOUT << "\n";);
+
+ // Calculate the number of conflicts of each candidate.
+ for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
+ unsigned Reg = i->first->reg;
+ unsigned PhysReg = vrm_->getPhys(Reg);
+ if (!cur->overlapsFrom(*i->first, i->second))
+ continue;
+ for (unsigned j = 0; j < NumCands; ++j) {
+ unsigned Candidate = Candidates[j].first;
+ if (tri_->regsOverlap(PhysReg, Candidate)) {
+ if (NumCands > 1)
+ Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+ SLIs[j].push_back(i->first);
+ }
+ }
+ }
+
+ for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
+ unsigned Reg = i->first->reg;
+ unsigned PhysReg = vrm_->getPhys(Reg);
+ if (!cur->overlapsFrom(*i->first, i->second-1))
+ continue;
+ for (unsigned j = 0; j < NumCands; ++j) {
+ unsigned Candidate = Candidates[j].first;
+ if (tri_->regsOverlap(PhysReg, Candidate)) {
+ if (NumCands > 1)
+ Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+ SLIs[j].push_back(i->first);
+ }
+ }
+ }
+
+ // Which is the best candidate?
+ unsigned BestCandidate = 0;
+ float MinConflicts = Conflicts[0];
+ for (unsigned i = 1; i != NumCands; ++i) {
+ if (Conflicts[i] < MinConflicts) {
+ BestCandidate = i;
+ MinConflicts = Conflicts[i];
+ }
+ }
+
+ std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(),
+ std::back_inserter(SpillIntervals));
+}
+
+namespace {
+ struct WeightCompare {
+ typedef std::pair<unsigned, float> RegWeightPair;
+ bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
+ return LHS.second < RHS.second;
+ }
+ };
+}
+
+static bool weightsAreClose(float w1, float w2) {
+ if (!NewHeuristic)
+ return false;
+
+ float diff = w1 - w2;
+ if (diff <= 0.02f) // Within 0.02f
+ return true;
+ return (diff / w2) <= 0.05f; // Within 5%.
+}
+
+LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
+ DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
+ if (I == NextReloadMap.end())
+ return 0;
+ return &li_->getInterval(I->second);
+}
+
+void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
+ bool isNew = DowngradedRegs.insert(Reg);
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Multiple reloads holding the same register?");
+ DowngradeMap.insert(std::make_pair(li->reg, Reg));
+ for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) {
+ isNew = DowngradedRegs.insert(*AS);
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Multiple reloads holding the same register?");
+ DowngradeMap.insert(std::make_pair(li->reg, *AS));
+ }
+ ++NumDowngrade;
+}
+
+void RALinScan::UpgradeRegister(unsigned Reg) {
+ if (Reg) {
+ DowngradedRegs.erase(Reg);
+ for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
+ DowngradedRegs.erase(*AS);
+ }
+}
+
+namespace {
+ struct LISorter {
+ bool operator()(LiveInterval* A, LiveInterval* B) {
+ return A->beginNumber() < B->beginNumber();
+ }
+ };
+}
+
+/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
+/// spill.
+void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
+{
+ DOUT << "\tallocating current interval: ";
+
+ // This is an implicitly defined live interval, just assign any register.
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ if (cur->empty()) {
+ unsigned physReg = cur->preference;
+ if (!physReg)
+ physReg = *RC->allocation_order_begin(*mf_);
+ DOUT << tri_->getName(physReg) << '\n';
+ // Note the register is not really in use.
+ vrm_->assignVirt2Phys(cur->reg, physReg);
+ return;
+ }
+
+ backUpRegUses();
+
+ std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
+ unsigned StartPosition = cur->beginNumber();
+ const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+ // If start of this live interval is defined by a move instruction and its
+ // source is assigned a physical register that is compatible with the target
+ // register class, then we should try to assign it the same register.
+ // This can happen when the move is from a larger register class to a smaller
+ // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
+ if (!cur->preference && cur->hasAtLeastOneValue()) {
+ VNInfo *vni = cur->begin()->valno;
+ if (vni->def && vni->def != ~1U && vni->def != ~0U) {
+ MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (CopyMI &&
+ tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
+ unsigned Reg = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ Reg = SrcReg;
+ else if (vrm_->isAssignedReg(SrcReg))
+ Reg = vrm_->getPhys(SrcReg);
+ if (Reg) {
+ if (SrcSubReg)
+ Reg = tri_->getSubReg(Reg, SrcSubReg);
+ if (DstSubReg)
+ Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
+ if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
+ cur->preference = Reg;
+ }
+ }
+ }
+ }
+
+ // For every interval in inactive we overlap with, mark the
+ // register as not free and update spill weights.
+ for (IntervalPtrs::const_iterator i = inactive_.begin(),
+ e = inactive_.end(); i != e; ++i) {
+ unsigned Reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Can only allocate virtual registers!");
+ const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
+ // If this is not in a related reg class to the register we're allocating,
+ // don't check it.
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ cur->overlapsFrom(*i->first, i->second-1)) {
+ Reg = vrm_->getPhys(Reg);
+ addRegUse(Reg);
+ SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
+ }
+ }
+
+ // Speculatively check to see if we can get a register right now. If not,
+ // we know we won't be able to by adding more constraints. If so, we can
+ // check to see if it is valid. Doing an exhaustive search of the fixed_ list
+ // is very bad (it contains all callee clobbered registers for any functions
+ // with a call), so we want to avoid doing that if possible.
+ unsigned physReg = getFreePhysReg(cur);
+ unsigned BestPhysReg = physReg;
+ if (physReg) {
+ // We got a register. However, if it's in the fixed_ list, we might
+ // conflict with it. Check to see if we conflict with it or any of its
+ // aliases.
+ SmallSet<unsigned, 8> RegAliases;
+ for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
+ RegAliases.insert(*AS);
+
+ bool ConflictsWithFixed = false;
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
+ // Okay, this reg is on the fixed list. Check to see if we actually
+ // conflict.
+ LiveInterval *I = IP.first;
+ if (I->endNumber() > StartPosition) {
+ LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+ IP.second = II;
+ if (II != I->begin() && II->start > StartPosition)
+ --II;
+ if (cur->overlapsFrom(*I, II)) {
+ ConflictsWithFixed = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // Okay, the register picked by our speculative getFreePhysReg call turned
+ // out to be in use. Actually add all of the conflicting fixed registers to
+ // regUse_ so we can do an accurate query.
+ if (ConflictsWithFixed) {
+ // For every interval in fixed we overlap with, mark the register as not
+ // free and update spill weights.
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ LiveInterval *I = IP.first;
+
+ const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ I->endNumber() > StartPosition) {
+ LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+ IP.second = II;
+ if (II != I->begin() && II->start > StartPosition)
+ --II;
+ if (cur->overlapsFrom(*I, II)) {
+ unsigned reg = I->reg;
+ addRegUse(reg);
+ SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
+ }
+ }
+ }
+
+ // Using the newly updated regUse_ object, which includes conflicts in the
+ // future, see if there are any registers available.
+ physReg = getFreePhysReg(cur);
+ }
+ }
+
+ // Restore the physical register tracker, removing information about the
+ // future.
+ restoreRegUses();
+
+ // If we find a free register, we are done: assign this virtual to
+ // the free physical register and add this interval to the active
+ // list.
+ if (physReg) {
+ DOUT << tri_->getName(physReg) << '\n';
+ vrm_->assignVirt2Phys(cur->reg, physReg);
+ addRegUse(physReg);
+ active_.push_back(std::make_pair(cur, cur->begin()));
+ handled_.push_back(cur);
+
+ // "Upgrade" the physical register since it has been allocated.
+ UpgradeRegister(physReg);
+ if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
+ // "Downgrade" physReg to try to keep physReg from being allocated until
+ // the next reload from the same SS is allocated.
+ NextReloadLI->preference = physReg;
+ DowngradeRegister(cur, physReg);
+ }
+ return;
+ }
+ DOUT << "no free registers\n";
+
+ // Compile the spill weights into an array that is better for scanning.
+ std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
+ for (std::vector<std::pair<unsigned, float> >::iterator
+ I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
+ updateSpillWeights(SpillWeights, I->first, I->second, RC);
+
+ // for each interval in active, update spill weights.
+ for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
+ i != e; ++i) {
+ unsigned reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
+ }
+
+ DOUT << "\tassigning stack slot at interval "<< *cur << ":\n";
+
+ // Find a register to spill.
+ float minWeight = HUGE_VALF;
+ unsigned minReg = 0; /*cur->preference*/; // Try the pref register first.
+
+ bool Found = false;
+ std::vector<std::pair<unsigned,float> > RegsWeights;
+ if (!minReg || SpillWeights[minReg] == HUGE_VALF)
+ for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+ e = RC->allocation_order_end(*mf_); i != e; ++i) {
+ unsigned reg = *i;
+ float regWeight = SpillWeights[reg];
+ if (minWeight > regWeight)
+ Found = true;
+ RegsWeights.push_back(std::make_pair(reg, regWeight));
+ }
+
+ // If we didn't find a register that is spillable, try aliases?
+ if (!Found) {
+ for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+ e = RC->allocation_order_end(*mf_); i != e; ++i) {
+ unsigned reg = *i;
+ // No need to worry about if the alias register size < regsize of RC.
+ // We are going to spill all registers that alias it anyway.
+ for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
+ RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as]));
+ }
+ }
+
+ // Sort all potential spill candidates by weight.
+ std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare());
+ minReg = RegsWeights[0].first;
+ minWeight = RegsWeights[0].second;
+ if (minWeight == HUGE_VALF) {
+ // All registers must have inf weight. Just grab one!
+ minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_);
+ if (cur->weight == HUGE_VALF ||
+ li_->getApproximateInstructionCount(*cur) == 0) {
+ // Spill a physical register around defs and uses.
+ if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
+ // spillPhysRegAroundRegDefsUses may have invalidated iterator stored
+ // in fixed_. Reset them.
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ LiveInterval *I = IP.first;
+ if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg))
+ IP.second = I->advanceTo(I->begin(), StartPosition);
+ }
+
+ DowngradedRegs.clear();
+ assignRegOrStackSlotAtInterval(cur);
+ } else {
+ cerr << "Ran out of registers during register allocation!\n";
+ exit(1);
+ }
+ return;
+ }
+ }
+
+ // Find up to 3 registers to consider as spill candidates.
+ unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1;
+ while (LastCandidate > 1) {
+ if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight))
+ break;
+ --LastCandidate;
+ }
+
+ DOUT << "\t\tregister(s) with min weight(s): ";
+ DEBUG(for (unsigned i = 0; i != LastCandidate; ++i)
+ DOUT << tri_->getName(RegsWeights[i].first)
+ << " (" << RegsWeights[i].second << ")\n");
+
+ // If the current has the minimum weight, we need to spill it and
+ // add any added intervals back to unhandled, and restart
+ // linearscan.
+ if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
+ DOUT << "\t\t\tspilling(c): " << *cur << '\n';
+ SmallVector<LiveInterval*, 8> spillIs;
+ std::vector<LiveInterval*> added;
+
+ if (!NewSpillFramework) {
+ added = li_->addIntervalsForSpills(*cur, spillIs, loopInfo, *vrm_);
+ } else {
+ added = spiller_->spill(cur);
+ }
+
+ std::sort(added.begin(), added.end(), LISorter());
+ addStackInterval(cur, ls_, li_, mri_, *vrm_);
+ if (added.empty())
+ return; // Early exit if all spills were folded.
+
+ // Merge added with unhandled. Note that we have already sorted
+ // intervals returned by addIntervalsForSpills by their starting
+ // point.
+ // This also update the NextReloadMap. That is, it adds mapping from a
+ // register defined by a reload from SS to the next reload from SS in the
+ // same basic block.
+ MachineBasicBlock *LastReloadMBB = 0;
+ LiveInterval *LastReload = 0;
+ int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ LiveInterval *ReloadLi = added[i];
+ if (ReloadLi->weight == HUGE_VALF &&
+ li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+ unsigned ReloadIdx = ReloadLi->beginNumber();
+ MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+ int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+ if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+ // Last reload of same SS is in the same MBB. We want to try to
+ // allocate both reloads the same register and make sure the reg
+ // isn't clobbered in between if at all possible.
+ assert(LastReload->beginNumber() < ReloadIdx);
+ NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+ }
+ LastReloadMBB = ReloadMBB;
+ LastReload = ReloadLi;
+ LastReloadSS = ReloadSS;
+ }
+ unhandled_.push(ReloadLi);
+ }
+ return;
+ }
+
+ ++NumBacktracks;
+
+ // Push the current interval back to unhandled since we are going
+ // to re-run at least this iteration. Since we didn't modify it it
+ // should go back right in the front of the list
+ unhandled_.push(cur);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(minReg) &&
+ "did not choose a register to spill?");
+
+ // We spill all intervals aliasing the register with
+ // minimum weight, rollback to the interval with the earliest
+ // start point and let the linear scan algorithm run again
+ SmallVector<LiveInterval*, 8> spillIs;
+
+ // Determine which intervals have to be spilled.
+ findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs);
+
+ // Set of spilled vregs (used later to rollback properly)
+ SmallSet<unsigned, 8> spilled;
+
+ // The earliest start of a Spilled interval indicates up to where
+ // in handled we need to roll back
+
+ unsigned earliestStart = cur->beginNumber();
+ LiveInterval *earliestStartInterval = cur;
+
+ // Spill live intervals of virtual regs mapped to the physical register we
+ // want to clear (and its aliases). We only spill those that overlap with the
+ // current interval as the rest do not affect its allocation. we also keep
+ // track of the earliest start of all spilled live intervals since this will
+ // mark our rollback point.
+ std::vector<LiveInterval*> added;
+ while (!spillIs.empty()) {
+ bool epicFail = false;
+ LiveInterval *sli = spillIs.back();
+ spillIs.pop_back();
+ DOUT << "\t\t\tspilling(a): " << *sli << '\n';
+ earliestStart = std::min(earliestStart, sli->beginNumber());
+ earliestStartInterval =
+ (earliestStartInterval->beginNumber() < sli->beginNumber()) ?
+ earliestStartInterval : sli;
+
+ if (earliestStartInterval->beginNumber()!=earliestStart) {
+ epicFail |= true;
+ std::cerr << "What the 1 - "
+ << "earliestStart = " << earliestStart
+ << "earliestStartInterval = " << earliestStartInterval->beginNumber()
+ << "\n";
+ }
+
+ std::vector<LiveInterval*> newIs;
+ if (!NewSpillFramework) {
+ newIs = li_->addIntervalsForSpills(*sli, spillIs, loopInfo, *vrm_);
+ } else {
+ newIs = spiller_->spill(sli);
+ }
+ addStackInterval(sli, ls_, li_, mri_, *vrm_);
+ std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
+ spilled.insert(sli->reg);
+
+ if (earliestStartInterval->beginNumber()!=earliestStart) {
+ epicFail |= true;
+ std::cerr << "What the 2 - "
+ << "earliestStart = " << earliestStart
+ << "earliestStartInterval = " << earliestStartInterval->beginNumber()
+ << "\n";
+ }
+
+ if (epicFail) {
+ //abort();
+ }
+ }
+
+ earliestStart = earliestStartInterval->beginNumber();
+
+ DOUT << "\t\trolling back to: " << earliestStart << '\n';
+
+ // Scan handled in reverse order up to the earliest start of a
+ // spilled live interval and undo each one, restoring the state of
+ // unhandled.
+ while (!handled_.empty()) {
+ LiveInterval* i = handled_.back();
+ // If this interval starts before t we are done.
+ if (i->beginNumber() < earliestStart)
+ break;
+ DOUT << "\t\t\tundo changes for: " << *i << '\n';
+ handled_.pop_back();
+
+ // When undoing a live interval allocation we must know if it is active or
+ // inactive to properly update regUse_ and the VirtRegMap.
+ IntervalPtrs::iterator it;
+ if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
+ active_.erase(it);
+ assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+ if (!spilled.count(i->reg))
+ unhandled_.push(i);
+ delRegUse(vrm_->getPhys(i->reg));
+ vrm_->clearVirt(i->reg);
+ } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
+ inactive_.erase(it);
+ assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+ if (!spilled.count(i->reg))
+ unhandled_.push(i);
+ vrm_->clearVirt(i->reg);
+ } else {
+ assert(TargetRegisterInfo::isVirtualRegister(i->reg) &&
+ "Can only allocate virtual registers!");
+ vrm_->clearVirt(i->reg);
+ unhandled_.push(i);
+ }
+
+ DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
+ if (ii == DowngradeMap.end())
+ // It interval has a preference, it must be defined by a copy. Clear the
+ // preference now since the source interval allocation may have been
+ // undone as well.
+ i->preference = 0;
+ else {
+ UpgradeRegister(ii->second);
+ }
+ }
+
+ // Rewind the iterators in the active, inactive, and fixed lists back to the
+ // point we reverted to.
+ RevertVectorIteratorsTo(active_, earliestStart);
+ RevertVectorIteratorsTo(inactive_, earliestStart);
+ RevertVectorIteratorsTo(fixed_, earliestStart);
+
+ // Scan the rest and undo each interval that expired after t and
+ // insert it in active (the next iteration of the algorithm will
+ // put it in inactive if required)
+ for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
+ LiveInterval *HI = handled_[i];
+ if (!HI->expiredAt(earliestStart) &&
+ HI->expiredAt(cur->beginNumber())) {
+ DOUT << "\t\t\tundo changes for: " << *HI << '\n';
+ active_.push_back(std::make_pair(HI, HI->begin()));
+ assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
+ addRegUse(vrm_->getPhys(HI->reg));
+ }
+ }
+
+ // Merge added with unhandled.
+ // This also update the NextReloadMap. That is, it adds mapping from a
+ // register defined by a reload from SS to the next reload from SS in the
+ // same basic block.
+ MachineBasicBlock *LastReloadMBB = 0;
+ LiveInterval *LastReload = 0;
+ int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+ std::sort(added.begin(), added.end(), LISorter());
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ LiveInterval *ReloadLi = added[i];
+ if (ReloadLi->weight == HUGE_VALF &&
+ li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+ unsigned ReloadIdx = ReloadLi->beginNumber();
+ MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+ int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+ if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+ // Last reload of same SS is in the same MBB. We want to try to
+ // allocate both reloads the same register and make sure the reg
+ // isn't clobbered in between if at all possible.
+ assert(LastReload->beginNumber() < ReloadIdx);
+ NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+ }
+ LastReloadMBB = ReloadMBB;
+ LastReload = ReloadLi;
+ LastReloadSS = ReloadSS;
+ }
+ unhandled_.push(ReloadLi);
+ }
+}
+
+unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC,
+ unsigned MaxInactiveCount,
+ SmallVector<unsigned, 256> &inactiveCounts,
+ bool SkipDGRegs) {
+ unsigned FreeReg = 0;
+ unsigned FreeRegInactiveCount = 0;
+
+ TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_);
+ TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
+ assert(I != E && "No allocatable register in this register class!");
+
+ // Scan for the first available register.
+ for (; I != E; ++I) {
+ unsigned Reg = *I;
+ // Ignore "downgraded" registers.
+ if (SkipDGRegs && DowngradedRegs.count(Reg))
+ continue;
+ if (isRegAvail(Reg)) {
+ FreeReg = Reg;
+ if (FreeReg < inactiveCounts.size())
+ FreeRegInactiveCount = inactiveCounts[FreeReg];
+ else
+ FreeRegInactiveCount = 0;
+ break;
+ }
+ }
+
+ // If there are no free regs, or if this reg has the max inactive count,
+ // return this register.
+ if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount)
+ return FreeReg;
+
+ // Continue scanning the registers, looking for the one with the highest
+ // inactive count. Alkis found that this reduced register pressure very
+ // slightly on X86 (in rev 1.94 of this file), though this should probably be
+ // reevaluated now.
+ for (; I != E; ++I) {
+ unsigned Reg = *I;
+ // Ignore "downgraded" registers.
+ if (SkipDGRegs && DowngradedRegs.count(Reg))
+ continue;
+ if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
+ FreeRegInactiveCount < inactiveCounts[Reg]) {
+ FreeReg = Reg;
+ FreeRegInactiveCount = inactiveCounts[Reg];
+ if (FreeRegInactiveCount == MaxInactiveCount)
+ break; // We found the one with the max inactive count.
+ }
+ }
+
+ return FreeReg;
+}
+
+/// getFreePhysReg - return a free physical register for this virtual register
+/// interval if we have one, otherwise return 0.
+unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
+ SmallVector<unsigned, 256> inactiveCounts;
+ unsigned MaxInactiveCount = 0;
+
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+ for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
+ i != e; ++i) {
+ unsigned reg = i->first->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+
+ // If this is not in a related reg class to the register we're allocating,
+ // don't check it.
+ const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
+ reg = vrm_->getPhys(reg);
+ if (inactiveCounts.size() <= reg)
+ inactiveCounts.resize(reg+1);
+ ++inactiveCounts[reg];
+ MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
+ }
+ }
+
+ // If copy coalescer has assigned a "preferred" register, check if it's
+ // available first.
+ if (cur->preference) {
+ DOUT << "(preferred: " << tri_->getName(cur->preference) << ") ";
+ if (isRegAvail(cur->preference) &&
+ RC->contains(cur->preference))
+ return cur->preference;
+ }
+
+ if (!DowngradedRegs.empty()) {
+ unsigned FreeReg = getFreePhysReg(RC, MaxInactiveCount, inactiveCounts,
+ true);
+ if (FreeReg)
+ return FreeReg;
+ }
+ return getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, false);
+}
+
+FunctionPass* llvm::createLinearScanRegisterAllocator() {
+ return new RALinScan();
+}
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
new file mode 100644
index 0000000..e1cc20c
--- /dev/null
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -0,0 +1,1068 @@
+//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+
+static RegisterRegAlloc
+ localRegAlloc("local", "local register allocator",
+ createLocalRegisterAllocator);
+
+namespace {
+ class VISIBILITY_HIDDEN RALocal : public MachineFunctionPass {
+ public:
+ static char ID;
+ RALocal() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {}
+ private:
+ const TargetMachine *TM;
+ MachineFunction *MF;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+ // values are spilled.
+ IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ // Virt2PhysRegMap - This map contains entries for each virtual register
+ // that is currently available in a physical register.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+ unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+ return Virt2PhysRegMap[VirtReg];
+ }
+
+ // PhysRegsUsed - This array is effectively a map, containing entries for
+ // each physical register that currently has a value (ie, it is in
+ // Virt2PhysRegMap). The value mapped to is the virtual register
+ // corresponding to the physical register (the inverse of the
+ // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned
+ // because it is used by a future instruction, and to -2 if it is not
+ // allocatable. If the entry for a physical register is -1, then the
+ // physical register is "not in the map".
+ //
+ std::vector<int> PhysRegsUsed;
+
+ // PhysRegsUseOrder - This contains a list of the physical registers that
+ // currently have a virtual register value in them. This list provides an
+ // ordering of registers, imposing a reallocation order. This list is only
+ // used if all registers are allocated and we have to spill one, in which
+ // case we spill the least recently used register. Entries at the front of
+ // the list are the least recently used registers, entries at the back are
+ // the most recently used.
+ //
+ std::vector<unsigned> PhysRegsUseOrder;
+
+ // Virt2LastUseMap - This maps each virtual register to its last use
+ // (MachineInstr*, operand index pair).
+ IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor>
+ Virt2LastUseMap;
+
+ std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ return Virt2LastUseMap[Reg];
+ }
+
+ // VirtRegModified - This bitset contains information about which virtual
+ // registers need to be spilled back to memory when their registers are
+ // scavenged. If a virtual register has simply been rematerialized, there
+ // is no reason to spill it to memory when we need the register back.
+ //
+ BitVector VirtRegModified;
+
+ // UsedInMultipleBlocks - Tracks whether a particular register is used in
+ // more than one block.
+ BitVector UsedInMultipleBlocks;
+
+ void markVirtRegModified(unsigned Reg, bool Val = true) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ Reg -= TargetRegisterInfo::FirstVirtualRegister;
+ if (Val)
+ VirtRegModified.set(Reg);
+ else
+ VirtRegModified.reset(Reg);
+ }
+
+ bool isVirtRegModified(unsigned Reg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size()
+ && "Illegal virtual register!");
+ return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
+ }
+
+ void AddToPhysRegsUseOrder(unsigned Reg) {
+ std::vector<unsigned>::iterator It =
+ std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg);
+ if (It != PhysRegsUseOrder.end())
+ PhysRegsUseOrder.erase(It);
+ PhysRegsUseOrder.push_back(Reg);
+ }
+
+ void MarkPhysRegRecentlyUsed(unsigned Reg) {
+ if (PhysRegsUseOrder.empty() ||
+ PhysRegsUseOrder.back() == Reg) return; // Already most recently used
+
+ for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i)
+ if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) {
+ unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle
+ PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1);
+ // Add it to the end of the list
+ PhysRegsUseOrder.push_back(RegMatch);
+ if (RegMatch == Reg)
+ return; // Found an exact match, exit early
+ }
+ }
+
+ public:
+ virtual const char *getPassName() const {
+ return "Local Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// runOnMachineFunction - Register allocate the whole function
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ /// AllocateBasicBlock - Register allocate the specified basic block.
+ void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+
+ /// areRegsEqual - This method returns true if the specified registers are
+ /// related to each other. To do this, it checks to see if they are equal
+ /// or if the first register is in the alias set of the second register.
+ ///
+ bool areRegsEqual(unsigned R1, unsigned R2) const {
+ if (R1 == R2) return true;
+ for (const unsigned *AliasSet = TRI->getAliasSet(R2);
+ *AliasSet; ++AliasSet) {
+ if (*AliasSet == R1) return true;
+ }
+ return false;
+ }
+
+ /// getStackSpaceFor - This returns the frame index of the specified virtual
+ /// register on the stack, allocating space if necessary.
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+ /// removePhysReg - This method marks the specified physical register as no
+ /// longer being in use.
+ ///
+ void removePhysReg(unsigned PhysReg);
+
+ /// spillVirtReg - This method spills the value specified by PhysReg into
+ /// the virtual register slot specified by VirtReg. It then updates the RA
+ /// data structures to indicate the fact that PhysReg is now available.
+ ///
+ void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned VirtReg, unsigned PhysReg);
+
+ /// spillPhysReg - This method spills the specified physical register into
+ /// the virtual register slot associated with it. If OnlyVirtRegs is set to
+ /// true, then the request is ignored if the physical register does not
+ /// contain a virtual register.
+ ///
+ void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs = false);
+
+ /// assignVirtToPhysReg - This method updates local state so that we know
+ /// that PhysReg is the proper container for VirtReg now. The physical
+ /// register must not be used for anything else when this is called.
+ ///
+ void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+ /// isPhysRegAvailable - Return true if the specified physical register is
+ /// free and available for use. This also includes checking to see if
+ /// aliased registers are all free...
+ ///
+ bool isPhysRegAvailable(unsigned PhysReg) const;
+
+ /// getFreeReg - Look to see if there is a free register available in the
+ /// specified register class. If not, return 0.
+ ///
+ unsigned getFreeReg(const TargetRegisterClass *RC);
+
+ /// getReg - Find a physical register to hold the specified virtual
+ /// register. If all compatible physical registers are used, this method
+ /// spills the last used virtual register to the stack, and uses that
+ /// register. If NoFree is true, that means the caller knows there isn't
+ /// a free register, do not call getFreeReg().
+ unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned VirtReg, bool NoFree = false);
+
+ /// reloadVirtReg - This method transforms the specified virtual
+ /// register use to refer to a physical register. This method may do this
+ /// in one of several ways: if the register is available in a physical
+ /// register already, it uses that physical register. If the value is not
+ /// in a physical register, and if there are physical registers available,
+ /// it loads it into a register. If register pressure is high, and it is
+ /// possible, it tries to fold the load of the virtual register into the
+ /// instruction itself. It avoids doing this if register pressure is low to
+ /// improve the chance that subsequent instructions can use the reloaded
+ /// value. This method returns the modified instruction.
+ ///
+ MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum, SmallSet<unsigned, 4> &RRegs);
+
+ /// ComputeLocalLiveness - Computes liveness of registers within a basic
+ /// block, setting the killed/dead flags as appropriate.
+ void ComputeLocalLiveness(MachineBasicBlock& MBB);
+
+ void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
+ unsigned PhysReg);
+ };
+ char RALocal::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+ // Find the location Reg would belong...
+ int SS = StackSlotForVirtReg[VirtReg];
+ if (SS != -1)
+ return SS; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot...
+ StackSlotForVirtReg[VirtReg] = FrameIdx;
+ return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RALocal::removePhysReg(unsigned PhysReg) {
+ PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used
+
+ std::vector<unsigned>::iterator It =
+ std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg);
+ if (It != PhysRegsUseOrder.end())
+ PhysRegsUseOrder.erase(It);
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg. It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RALocal::spillVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg) {
+ assert(VirtReg && "Spilling a physical register is illegal!"
+ " Must not have appropriate kill for the register or use exists beyond"
+ " the intended one.");
+ DOUT << " Spilling register " << TRI->getName(PhysReg)
+ << " containing %reg" << VirtReg;
+
+ if (!isVirtRegModified(VirtReg)) {
+ DOUT << " which has not been modified, so no store necessary!";
+ std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
+ if (LastUse.first)
+ LastUse.first->getOperand(LastUse.second).setIsKill();
+ } else {
+ // Otherwise, there is a virtual register corresponding to this physical
+ // register. We only need to spill it into its stack slot if it has been
+ // modified.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DOUT << " to stack slot #" << FrameIndex;
+ // If the instruction reads the register that's spilled, (e.g. this can
+ // happen if it is a move to a physical register), then the spill
+ // instruction is not a kill.
+ bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
+ TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
+ ++NumStores; // Update statistics
+ }
+
+ getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available
+
+ DOUT << "\n";
+ removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it. If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs) {
+ if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used!
+ assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+ if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+ spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+ } else {
+ // If the selected register aliases any other registers, we must make
+ // sure that one of the aliases isn't alive.
+ for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register.
+ PhysRegsUsed[*AliasSet] != -2) // If allocatable.
+ if (PhysRegsUsed[*AliasSet])
+ spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+ }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
+///
+void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+ // Update information to note the fact that this register was just used, and
+ // it holds VirtReg.
+ PhysRegsUsed[PhysReg] = VirtReg;
+ getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+ AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use. This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RALocal::isPhysRegAvailable(unsigned PhysReg) const {
+ if (PhysRegsUsed[PhysReg] != -1) return false;
+
+ // If the selected register aliases any other allocated registers, it is
+ // not free!
+ for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
+ return false; // Can't use this reg then.
+ return true;
+}
+
+
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class. If not, return 0.
+///
+unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) {
+ // Get iterators defining the range of registers that are valid to allocate in
+ // this class, which also specifies the preferred allocation order.
+ TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+ TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+ for (; RI != RE; ++RI)
+ if (isPhysRegAvailable(*RI)) { // Is reg unused?
+ assert(*RI != 0 && "Cannot use register!");
+ return *RI; // Found an unused register!
+ }
+ return 0;
+}
+
+
+/// getReg - Find a physical register to hold the specified virtual
+/// register. If all compatible physical registers are used, this method spills
+/// the last used virtual register to the stack, and uses that register.
+///
+unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned VirtReg, bool NoFree) {
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+
+ // First check to see if we have a free register of the requested type...
+ unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
+
+ // If we didn't find an unused register, scavenge one now!
+ if (PhysReg == 0) {
+ assert(!PhysRegsUseOrder.empty() && "No allocated registers??");
+
+ // Loop over all of the preallocated registers from the least recently used
+ // to the most recently used. When we find one that is capable of holding
+ // our register, use it.
+ for (unsigned i = 0; PhysReg == 0; ++i) {
+ assert(i != PhysRegsUseOrder.size() &&
+ "Couldn't find a register of the appropriate class!");
+
+ unsigned R = PhysRegsUseOrder[i];
+
+ // We can only use this register if it holds a virtual register (ie, it
+ // can be spilled). Do not use it if it is an explicitly allocated
+ // physical register!
+ assert(PhysRegsUsed[R] != -1 &&
+ "PhysReg in PhysRegsUseOrder, but is not allocated?");
+ if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) {
+ // If the current register is compatible, use it.
+ if (RC->contains(R)) {
+ PhysReg = R;
+ break;
+ } else {
+ // If one of the registers aliased to the current register is
+ // compatible, use it.
+ for (const unsigned *AliasIt = TRI->getAliasSet(R);
+ *AliasIt; ++AliasIt) {
+ if (RC->contains(*AliasIt) &&
+ // If this is pinned down for some reason, don't use it. For
+ // example, if CL is pinned, and we run across CH, don't use
+ // CH as justification for using scavenging ECX (which will
+ // fail).
+ PhysRegsUsed[*AliasIt] != 0 &&
+
+ // Make sure the register is allocatable. Don't allocate SIL on
+ // x86-32.
+ PhysRegsUsed[*AliasIt] != -2) {
+ PhysReg = *AliasIt; // Take an aliased register
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ assert(PhysReg && "Physical register not assigned!?!?");
+
+ // At this point PhysRegsUseOrder[i] is the least recently used register of
+ // compatible register class. Spill it to memory and reap its remains.
+ spillPhysReg(MBB, I, PhysReg);
+ }
+
+ // Now that we know which register we need to assign this to, do it now!
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ return PhysReg;
+}
+
+
+/// reloadVirtReg - This method transforms the specified virtual
+/// register use to refer to a physical register. This method may do this in
+/// one of several ways: if the register is available in a physical register
+/// already, it uses that physical register. If the value is not in a physical
+/// register, and if there are physical registers available, it loads it into a
+/// register. If register pressure is high, and it is possible, it tries to
+/// fold the load of the virtual register into the instruction itself. It
+/// avoids doing this if register pressure is low to improve the chance that
+/// subsequent instructions can use the reloaded value. This method returns the
+/// modified instruction.
+///
+MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum,
+ SmallSet<unsigned, 4> &ReloadedRegs) {
+ unsigned VirtReg = MI->getOperand(OpNum).getReg();
+
+ // If the virtual register is already available, just update the instruction
+ // and return.
+ if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+ MarkPhysRegRecentlyUsed(PR); // Already have this value available!
+ MI->getOperand(OpNum).setReg(PR); // Assign the input register
+ getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+ return MI;
+ }
+
+ // Otherwise, we need to fold it into the current instruction, or reload it.
+ // If we have registers available to hold the value, use them.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ unsigned PhysReg = getFreeReg(RC);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+ if (PhysReg) { // Register is available, allocate it!
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ } else { // No registers available.
+ // Force some poor hapless value out of the register file to
+ // make room for the new register, and reload it.
+ PhysReg = getReg(MBB, MI, VirtReg, true);
+ }
+
+ markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded
+
+ DOUT << " Reloading %reg" << VirtReg << " into "
+ << TRI->getName(PhysReg) << "\n";
+
+ // Add move instruction(s)
+ TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+ ++NumLoads; // Update statistics
+
+ MF->getRegInfo().setPhysRegUsed(PhysReg);
+ MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
+ getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+
+ if (!ReloadedRegs.insert(PhysReg)) {
+ cerr << "Ran out of registers during register allocation!\n";
+ if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+ cerr << "Please check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(cerr.stream(), TM);
+ }
+ exit(1);
+ }
+ for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+ *SubRegs; ++SubRegs) {
+ if (!ReloadedRegs.insert(*SubRegs)) {
+ cerr << "Ran out of registers during register allocation!\n";
+ if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+ cerr << "Please check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(cerr.stream(), TM);
+ }
+ exit(1);
+ }
+ }
+
+ return MI;
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+ MO.isDef() && !MO.isDead())
+ return true;
+ }
+ return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
+ !MO.isDef() && MO.isKill())
+ return true;
+ }
+ return false;
+}
+
+// precedes - Helper function to determine with MachineInstr A
+// precedes MachineInstr B within the same MBB.
+static bool precedes(MachineBasicBlock::iterator A,
+ MachineBasicBlock::iterator B) {
+ if (A == B)
+ return false;
+
+ MachineBasicBlock::iterator I = A->getParent()->begin();
+ while (I != A->getParent()->end()) {
+ if (I == A)
+ return true;
+ else if (I == B)
+ return false;
+
+ ++I;
+ }
+
+ return false;
+}
+
+/// ComputeLocalLiveness - Computes liveness of registers within a basic
+/// block, setting the killed/dead flags as appropriate.
+void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
+ MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
+ // Keep track of the most recently seen previous use or def of each reg,
+ // so that we can update them with dead/kill markers.
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = I->getOperand(i);
+ // Uses don't trigger any flags, but we need to save
+ // them for later. Also, we have to process these
+ // _before_ processing the defs, since an instr
+ // uses regs before it defs them.
+ if (MO.isReg() && MO.getReg() && MO.isUse()) {
+ LastUseDef[MO.getReg()] = std::make_pair(I, i);
+
+
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
+
+ const unsigned* Aliases = TRI->getAliasSet(MO.getReg());
+ if (Aliases) {
+ while (*Aliases) {
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+ alias = LastUseDef.find(*Aliases);
+
+ if (alias != LastUseDef.end() && alias->second.first != I)
+ LastUseDef[*Aliases] = std::make_pair(I, i);
+
+ ++Aliases;
+ }
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = I->getOperand(i);
+ // Defs others than 2-addr redefs _do_ trigger flag changes:
+ // - A def followed by a def is dead
+ // - A use followed by a def is a kill
+ if (MO.isReg() && MO.getReg() && MO.isDef()) {
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+ last = LastUseDef.find(MO.getReg());
+ if (last != LastUseDef.end()) {
+ // Check if this is a two address instruction. If so, then
+ // the def does not kill the use.
+ if (last->second.first == I &&
+ I->isRegTiedToUseOperand(i))
+ continue;
+
+ MachineOperand& lastUD =
+ last->second.first->getOperand(last->second.second);
+ if (lastUD.isDef())
+ lastUD.setIsDead(true);
+ else
+ lastUD.setIsKill(true);
+ }
+
+ LastUseDef[MO.getReg()] = std::make_pair(I, i);
+ }
+ }
+ }
+
+ // Live-out (of the function) registers contain return values of the function,
+ // so we need to make sure they are alive at return time.
+ if (!MBB.empty() && MBB.back().getDesc().isReturn()) {
+ MachineInstr* Ret = &MBB.back();
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I)
+ if (!Ret->readsRegister(*I)) {
+ Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
+ LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1);
+ }
+ }
+
+ // Finally, loop over the final use/def of each reg
+ // in the block and determine if it is dead.
+ for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+ I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) {
+ MachineInstr* MI = I->second.first;
+ unsigned idx = I->second.second;
+ MachineOperand& MO = MI->getOperand(idx);
+
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg());
+
+ // A crude approximation of "live-out" calculation
+ bool usedOutsideBlock = isPhysReg ? false :
+ UsedInMultipleBlocks.test(MO.getReg() -
+ TargetRegisterInfo::FirstVirtualRegister);
+ if (!isPhysReg && !usedOutsideBlock)
+ for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
+ UE = MRI.reg_end(); UI != UE; ++UI)
+ // Two cases:
+ // - used in another block
+ // - used in the same block before it is defined (loop)
+ if (UI->getParent() != &MBB ||
+ (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) {
+ UsedInMultipleBlocks.set(MO.getReg() -
+ TargetRegisterInfo::FirstVirtualRegister);
+ usedOutsideBlock = true;
+ break;
+ }
+
+ // Physical registers and those that are not live-out of the block
+ // are killed/dead at their last use/def within this block.
+ if (isPhysReg || !usedOutsideBlock) {
+ if (MO.isUse()) {
+ // Don't mark uses that are tied to defs as kills.
+ if (!MI->isRegTiedToDefOperand(idx))
+ MO.setIsKill(true);
+ } else
+ MO.setIsDead(true);
+ }
+ }
+}
+
+void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ MachineBasicBlock::iterator MII = MBB.begin();
+
+ DEBUG(const BasicBlock *LBB = MBB.getBasicBlock();
+ if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName());
+
+ // Add live-in registers as active.
+ for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
+ E = MBB.livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ AddToPhysRegsUseOrder(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ }
+ }
+ }
+
+ ComputeLocalLiveness(MBB);
+
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ while (MII != MBB.end()) {
+ MachineInstr *MI = MII++;
+ const TargetInstrDesc &TID = MI->getDesc();
+ DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI;
+ DOUT << " Regs have values: ";
+ for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+ DOUT << "[" << TRI->getName(i)
+ << ",%reg" << PhysRegsUsed[i] << "] ";
+ DOUT << "\n");
+
+ // Loop over the implicit uses, making sure that they are at the head of the
+ // use order list, so they don't get reallocated.
+ if (TID.ImplicitUses) {
+ for (const unsigned *ImplicitUses = TID.ImplicitUses;
+ *ImplicitUses; ++ImplicitUses)
+ MarkPhysRegRecentlyUsed(*ImplicitUses);
+ }
+
+ SmallVector<unsigned, 8> Kills;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ if (!MO.isImplicit())
+ Kills.push_back(MO.getReg());
+ else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+ // These are extra physical register kills when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ Kills.push_back(MO.getReg());
+ }
+ }
+
+ // If any physical regs are earlyclobber, spill any value they might
+ // have in them, then mark them unallocatable.
+ // If any virtual regs are earlyclobber, allocate them now (before
+ // freeing inputs that are killed).
+ if (MI->getOpcode()==TargetInstrInfo::INLINEASM) {
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.isEarlyClobber() &&
+ MO.getReg()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+ DestPhysReg = getReg(MBB, MI, DestVirtReg);
+ MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ getVirtRegLastUse(DestVirtReg) =
+ std::make_pair((MachineInstr*)0, 0);
+ DOUT << " Assigning " << TRI->getName(DestPhysReg)
+ << " to %reg" << DestVirtReg << "\n";
+ MO.setReg(DestPhysReg); // Assign the earlyclobber register
+ } else {
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(*SubRegs);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Get the used operands into registers. This has the potential to spill
+ // incoming values if we are out of registers. Note that we completely
+ // ignore physical register uses here. We assume that if an explicit
+ // physical register is referenced by the instruction, that it is guaranteed
+ // to be live-in, or the input is badly hosed.
+ //
+ SmallSet<unsigned, 4> ReloadedRegs;
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ // here we are looking for only used operands (never def&use)
+ if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ MI = reloadVirtReg(MBB, MI, i, ReloadedRegs);
+ }
+
+ // If this instruction is the last user of this register, kill the
+ // value, freeing the register being used, so it doesn't need to be
+ // spilled to memory.
+ //
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ unsigned VirtReg = Kills[i];
+ unsigned PhysReg = VirtReg;
+ if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ // If the virtual register was never materialized into a register, it
+ // might not be in the map, but it won't hurt to zero it out anyway.
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ } else {
+ assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
+ "Silently clearing a virtual register?");
+ }
+
+ if (PhysReg) {
+ DOUT << " Last use of " << TRI->getName(PhysReg)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ DOUT << " Last use of "
+ << TRI->getName(*SubRegs)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(*SubRegs);
+ }
+ }
+ }
+ }
+
+ // Loop over all of the operands of the instruction, spilling registers that
+ // are defined, and marking explicit destinations in the PhysRegsUsed map.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() &&
+ !MO.isEarlyClobber() &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(*SubRegs);
+ }
+ }
+ }
+ }
+
+ // Loop over the implicit defs, spilling them as well.
+ if (TID.ImplicitDefs) {
+ for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
+ *ImplicitDefs; ++ImplicitDefs) {
+ unsigned Reg = *ImplicitDefs;
+ if (PhysRegsUsed[Reg] != -2) {
+ spillPhysReg(MBB, MI, Reg, true);
+ AddToPhysRegsUseOrder(Reg);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ }
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ AddToPhysRegsUseOrder(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ }
+ }
+ }
+ }
+
+ SmallVector<unsigned, 8> DeadDefs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead())
+ DeadDefs.push_back(MO.getReg());
+ }
+
+ // Okay, we have allocated all of the source operands and spilled any values
+ // that would be destroyed by defs of this instruction. Loop over the
+ // explicit defs and assign them to a register, spilling incoming values if
+ // we need to scavenge a register.
+ //
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() &&
+ !MO.isEarlyClobber() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+ DestPhysReg = getReg(MBB, MI, DestVirtReg);
+ MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
+ DOUT << " Assigning " << TRI->getName(DestPhysReg)
+ << " to %reg" << DestVirtReg << "\n";
+ MO.setReg(DestPhysReg); // Assign the output register
+ }
+ }
+
+ // If this instruction defines any registers that are immediately dead,
+ // kill them now.
+ //
+ for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+ unsigned VirtReg = DeadDefs[i];
+ unsigned PhysReg = VirtReg;
+ if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ assert(PhysReg != 0);
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ }
+
+ if (PhysReg) {
+ DOUT << " Register " << TRI->getName(PhysReg)
+ << " [%reg" << VirtReg
+ << "] is never used, removing it from live set\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DOUT << " Register " << TRI->getName(*AliasSet)
+ << " [%reg" << *AliasSet
+ << "] is never used, removing it from live set\n";
+ removePhysReg(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Finally, if this is a noop copy instruction, zap it. (Except that if
+ // the copy is dead, it must be kept to avoid messing up liveness info for
+ // the register scavenger. See pr4100.)
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ SrcReg == DstReg && DeadDefs.empty())
+ MBB.erase(MI);
+ }
+
+ MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+ // Spill all physical registers holding virtual registers now.
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
+ if (unsigned VirtReg = PhysRegsUsed[i])
+ spillVirtReg(MBB, MI, VirtReg, i);
+ else
+ removePhysReg(i);
+ }
+
+#if 0
+ // This checking code is very expensive.
+ bool AllOk = true;
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
+ if (unsigned PR = Virt2PhysRegMap[i]) {
+ cerr << "Register still mapped: " << i << " -> " << PR << "\n";
+ AllOk = false;
+ }
+ assert(AllOk && "Virtual registers still in phys regs?");
+#endif
+
+ // Clear any physical register which appear live at the end of the basic
+ // block, but which do not hold any virtual registers. e.g., the stack
+ // pointer.
+ PhysRegsUseOrder.clear();
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
+ DOUT << "Machine Function " << "\n";
+ MF = &Fn;
+ TM = &Fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+
+ PhysRegsUsed.assign(TRI->getNumRegs(), -1);
+
+ // At various places we want to efficiently check to see whether a register
+ // is allocatable. To handle this, we mark all unallocatable registers as
+ // being pinned down, permanently.
+ {
+ BitVector Allocable = TRI->getAllocatableSet(Fn);
+ for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+ if (!Allocable[i])
+ PhysRegsUsed[i] = -2; // Mark the reg unallocable.
+ }
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
+ StackSlotForVirtReg.grow(LastVirtReg);
+ Virt2PhysRegMap.grow(LastVirtReg);
+ Virt2LastUseMap.grow(LastVirtReg);
+ VirtRegModified.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister);
+ UsedInMultipleBlocks.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister);
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB)
+ AllocateBasicBlock(*MBB);
+
+ StackSlotForVirtReg.clear();
+ PhysRegsUsed.clear();
+ VirtRegModified.clear();
+ UsedInMultipleBlocks.clear();
+ Virt2PhysRegMap.clear();
+ Virt2LastUseMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createLocalRegisterAllocator() {
+ return new RALocal();
+}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
new file mode 100644
index 0000000..61450a7
--- /dev/null
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -0,0 +1,871 @@
+//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
+// register allocator for LLVM. This allocator works by constructing a PBQP
+// problem representing the register allocation problem under consideration,
+// solving this using a PBQP solver, and mapping the solution back to a
+// register assignment. If any variables are selected for spilling then spill
+// code is inserted and the process repeated.
+//
+// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
+// for register allocation. For more information on PBQP for register
+// allocation, see the following papers:
+//
+// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
+// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
+// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
+//
+// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
+// architectures. In Proceedings of the Joint Conference on Languages,
+// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
+// NY, USA, 139-148.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+
+#include "PBQP.h"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <limits>
+#include <map>
+#include <memory>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+static RegisterRegAlloc
+registerPBQPRepAlloc("pbqp", "PBQP register allocator",
+ createPBQPRegisterAllocator);
+
+namespace {
+
+ //!
+ //! PBQP based allocators solve the register allocation problem by mapping
+ //! register allocation problems to Partitioned Boolean Quadratic
+ //! Programming problems.
+ class VISIBILITY_HIDDEN PBQPRegAlloc : public MachineFunctionPass {
+ public:
+
+ static char ID;
+
+ //! Construct a PBQP register allocator.
+ PBQPRegAlloc() : MachineFunctionPass((intptr_t)&ID) {}
+
+ //! Return the pass name.
+ virtual const char* getPassName() const throw() {
+ return "PBQP Register Allocator";
+ }
+
+ //! PBQP analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<LiveIntervals>();
+ au.addRequiredTransitive<RegisterCoalescer>();
+ au.addRequired<LiveStacks>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ au.addRequired<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(au);
+ }
+
+ //! Perform register allocation
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+ typedef std::vector<const LiveInterval*> Node2LIMap;
+ typedef std::vector<unsigned> AllowedSet;
+ typedef std::vector<AllowedSet> AllowedSetMap;
+ typedef std::set<unsigned> RegSet;
+ typedef std::pair<unsigned, unsigned> RegPair;
+ typedef std::map<RegPair, PBQPNum> CoalesceMap;
+
+ typedef std::set<LiveInterval*> LiveIntervalSet;
+
+ MachineFunction *mf;
+ const TargetMachine *tm;
+ const TargetRegisterInfo *tri;
+ const TargetInstrInfo *tii;
+ const MachineLoopInfo *loopInfo;
+ MachineRegisterInfo *mri;
+
+ LiveIntervals *lis;
+ LiveStacks *lss;
+ VirtRegMap *vrm;
+
+ LI2NodeMap li2Node;
+ Node2LIMap node2LI;
+ AllowedSetMap allowedSets;
+ LiveIntervalSet vregIntervalsToAlloc,
+ emptyVRegIntervals;
+
+
+ //! Builds a PBQP cost vector.
+ template <typename RegContainer>
+ PBQPVector* buildCostVector(unsigned vReg,
+ const RegContainer &allowed,
+ const CoalesceMap &cealesces,
+ PBQPNum spillCost) const;
+
+ //! \brief Builds a PBQP interference matrix.
+ //!
+ //! @return Either a pointer to a non-zero PBQP matrix representing the
+ //! allocation option costs, or a null pointer for a zero matrix.
+ //!
+ //! Expects allowed sets for two interfering LiveIntervals. These allowed
+ //! sets should contain only allocable registers from the LiveInterval's
+ //! register class, with any interfering pre-colored registers removed.
+ template <typename RegContainer>
+ PBQPMatrix* buildInterferenceMatrix(const RegContainer &allowed1,
+ const RegContainer &allowed2) const;
+
+ //!
+ //! Expects allowed sets for two potentially coalescable LiveIntervals,
+ //! and an estimated benefit due to coalescing. The allowed sets should
+ //! contain only allocable registers from the LiveInterval's register
+ //! classes, with any interfering pre-colored registers removed.
+ template <typename RegContainer>
+ PBQPMatrix* buildCoalescingMatrix(const RegContainer &allowed1,
+ const RegContainer &allowed2,
+ PBQPNum cBenefit) const;
+
+ //! \brief Finds coalescing opportunities and returns them as a map.
+ //!
+ //! Any entries in the map are guaranteed coalescable, even if their
+ //! corresponding live intervals overlap.
+ CoalesceMap findCoalesces();
+
+ //! \brief Finds the initial set of vreg intervals to allocate.
+ void findVRegIntervalsToAlloc();
+
+ //! \brief Constructs a PBQP problem representation of the register
+ //! allocation problem for this function.
+ //!
+ //! @return a PBQP solver object for the register allocation problem.
+ pbqp* constructPBQPProblem();
+
+ //! \brief Adds a stack interval if the given live interval has been
+ //! spilled. Used to support stack slot coloring.
+ void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
+
+ //! \brief Given a solved PBQP problem maps this solution back to a register
+ //! assignment.
+ bool mapPBQPToRegAlloc(pbqp *problem);
+
+ //! \brief Postprocessing before final spilling. Sets basic block "live in"
+ //! variables.
+ void finalizeAlloc() const;
+
+ };
+
+ char PBQPRegAlloc::ID = 0;
+}
+
+
+template <typename RegContainer>
+PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg,
+ const RegContainer &allowed,
+ const CoalesceMap &coalesces,
+ PBQPNum spillCost) const {
+
+ typedef typename RegContainer::const_iterator AllowedItr;
+
+ // Allocate vector. Additional element (0th) used for spill option
+ PBQPVector *v = new PBQPVector(allowed.size() + 1);
+
+ (*v)[0] = spillCost;
+
+ // Iterate over the allowed registers inserting coalesce benefits if there
+ // are any.
+ unsigned ai = 0;
+ for (AllowedItr itr = allowed.begin(), end = allowed.end();
+ itr != end; ++itr, ++ai) {
+
+ unsigned pReg = *itr;
+
+ CoalesceMap::const_iterator cmItr =
+ coalesces.find(RegPair(vReg, pReg));
+
+ // No coalesce - on to the next preg.
+ if (cmItr == coalesces.end())
+ continue;
+
+ // We have a coalesce - insert the benefit.
+ (*v)[ai + 1] = -cmItr->second;
+ }
+
+ return v;
+}
+
+template <typename RegContainer>
+PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix(
+ const RegContainer &allowed1, const RegContainer &allowed2) const {
+
+ typedef typename RegContainer::const_iterator RegContainerIterator;
+
+ // Construct a PBQP matrix representing the cost of allocation options. The
+ // rows and columns correspond to the allocation options for the two live
+ // intervals. Elements will be infinite where corresponding registers alias,
+ // since we cannot allocate aliasing registers to interfering live intervals.
+ // All other elements (non-aliasing combinations) will have zero cost. Note
+ // that the spill option (element 0,0) has zero cost, since we can allocate
+ // both intervals to memory safely (the cost for each individual allocation
+ // to memory is accounted for by the cost vectors for each live interval).
+ PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1);
+
+ // Assume this is a zero matrix until proven otherwise. Zero matrices occur
+ // between interfering live ranges with non-overlapping register sets (e.g.
+ // non-overlapping reg classes, or disjoint sets of allowed regs within the
+ // same class). The term "overlapping" is used advisedly: sets which do not
+ // intersect, but contain registers which alias, will have non-zero matrices.
+ // We optimize zero matrices away to improve solver speed.
+ bool isZeroMatrix = true;
+
+
+ // Row index. Starts at 1, since the 0th row is for the spill option, which
+ // is always zero.
+ unsigned ri = 1;
+
+ // Iterate over allowed sets, insert infinities where required.
+ for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
+ a1Itr != a1End; ++a1Itr) {
+
+ // Column index, starts at 1 as for row index.
+ unsigned ci = 1;
+ unsigned reg1 = *a1Itr;
+
+ for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
+ a2Itr != a2End; ++a2Itr) {
+
+ unsigned reg2 = *a2Itr;
+
+ // If the row/column regs are identical or alias insert an infinity.
+ if ((reg1 == reg2) || tri->areAliases(reg1, reg2)) {
+ (*m)[ri][ci] = std::numeric_limits<PBQPNum>::infinity();
+ isZeroMatrix = false;
+ }
+
+ ++ci;
+ }
+
+ ++ri;
+ }
+
+ // If this turns out to be a zero matrix...
+ if (isZeroMatrix) {
+ // free it and return null.
+ delete m;
+ return 0;
+ }
+
+ // ...otherwise return the cost matrix.
+ return m;
+}
+
+template <typename RegContainer>
+PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix(
+ const RegContainer &allowed1, const RegContainer &allowed2,
+ PBQPNum cBenefit) const {
+
+ typedef typename RegContainer::const_iterator RegContainerIterator;
+
+ // Construct a PBQP Matrix representing the benefits of coalescing. As with
+ // interference matrices the rows and columns represent allowed registers
+ // for the LiveIntervals which are (potentially) to be coalesced. The amount
+ // -cBenefit will be placed in any element representing the same register
+ // for both intervals.
+ PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1);
+
+ // Reset costs to zero.
+ m->reset(0);
+
+ // Assume the matrix is zero till proven otherwise. Zero matrices will be
+ // optimized away as in the interference case.
+ bool isZeroMatrix = true;
+
+ // Row index. Starts at 1, since the 0th row is for the spill option, which
+ // is always zero.
+ unsigned ri = 1;
+
+ // Iterate over the allowed sets, insert coalescing benefits where
+ // appropriate.
+ for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
+ a1Itr != a1End; ++a1Itr) {
+
+ // Column index, starts at 1 as for row index.
+ unsigned ci = 1;
+ unsigned reg1 = *a1Itr;
+
+ for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
+ a2Itr != a2End; ++a2Itr) {
+
+ // If the row and column represent the same register insert a beneficial
+ // cost to preference this allocation - it would allow us to eliminate a
+ // move instruction.
+ if (reg1 == *a2Itr) {
+ (*m)[ri][ci] = -cBenefit;
+ isZeroMatrix = false;
+ }
+
+ ++ci;
+ }
+
+ ++ri;
+ }
+
+ // If this turns out to be a zero matrix...
+ if (isZeroMatrix) {
+ // ...free it and return null.
+ delete m;
+ return 0;
+ }
+
+ return m;
+}
+
+PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
+
+ typedef MachineFunction::const_iterator MFIterator;
+ typedef MachineBasicBlock::const_iterator MBBIterator;
+ typedef LiveInterval::const_vni_iterator VNIIterator;
+
+ CoalesceMap coalescesFound;
+
+ // To find coalesces we need to iterate over the function looking for
+ // copy instructions.
+ for (MFIterator bbItr = mf->begin(), bbEnd = mf->end();
+ bbItr != bbEnd; ++bbItr) {
+
+ const MachineBasicBlock *mbb = &*bbItr;
+
+ for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end();
+ iItr != iEnd; ++iItr) {
+
+ const MachineInstr *instr = &*iItr;
+ unsigned srcReg, dstReg, srcSubReg, dstSubReg;
+
+ // If this isn't a copy then continue to the next instruction.
+ if (!tii->isMoveInstr(*instr, srcReg, dstReg, srcSubReg, dstSubReg))
+ continue;
+
+ // If the registers are already the same our job is nice and easy.
+ if (dstReg == srcReg)
+ continue;
+
+ bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg),
+ dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg);
+
+ // If both registers are physical then we can't coalesce.
+ if (srcRegIsPhysical && dstRegIsPhysical)
+ continue;
+
+ // If it's a copy that includes a virtual register but the source and
+ // destination classes differ then we can't coalesce, so continue with
+ // the next instruction.
+ const TargetRegisterClass *srcRegClass = srcRegIsPhysical ?
+ tri->getPhysicalRegisterRegClass(srcReg) : mri->getRegClass(srcReg);
+
+ const TargetRegisterClass *dstRegClass = dstRegIsPhysical ?
+ tri->getPhysicalRegisterRegClass(dstReg) : mri->getRegClass(dstReg);
+
+ if (srcRegClass != dstRegClass)
+ continue;
+
+ // We also need any physical regs to be allocable, coalescing with
+ // a non-allocable register is invalid.
+ if (srcRegIsPhysical) {
+ if (std::find(srcRegClass->allocation_order_begin(*mf),
+ srcRegClass->allocation_order_end(*mf), srcReg) ==
+ srcRegClass->allocation_order_end(*mf))
+ continue;
+ }
+
+ if (dstRegIsPhysical) {
+ if (std::find(dstRegClass->allocation_order_begin(*mf),
+ dstRegClass->allocation_order_end(*mf), dstReg) ==
+ dstRegClass->allocation_order_end(*mf))
+ continue;
+ }
+
+ // If we've made it here we have a copy with compatible register classes.
+ // We can probably coalesce, but we need to consider overlap.
+ const LiveInterval *srcLI = &lis->getInterval(srcReg),
+ *dstLI = &lis->getInterval(dstReg);
+
+ if (srcLI->overlaps(*dstLI)) {
+ // Even in the case of an overlap we might still be able to coalesce,
+ // but we need to make sure that no definition of either range occurs
+ // while the other range is live.
+
+ // Otherwise start by assuming we're ok.
+ bool badDef = false;
+
+ // Test all defs of the source range.
+ for (VNIIterator
+ vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end();
+ vniItr != vniEnd; ++vniItr) {
+
+ // If we find a def that kills the coalescing opportunity then
+ // record it and break from the loop.
+ if (dstLI->liveAt((*vniItr)->def)) {
+ badDef = true;
+ break;
+ }
+ }
+
+ // If we have a bad def give up, continue to the next instruction.
+ if (badDef)
+ continue;
+
+ // Otherwise test definitions of the destination range.
+ for (VNIIterator
+ vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end();
+ vniItr != vniEnd; ++vniItr) {
+
+ // We want to make sure we skip the copy instruction itself.
+ if ((*vniItr)->copy == instr)
+ continue;
+
+ if (srcLI->liveAt((*vniItr)->def)) {
+ badDef = true;
+ break;
+ }
+ }
+
+ // As before a bad def we give up and continue to the next instr.
+ if (badDef)
+ continue;
+ }
+
+ // If we make it to here then either the ranges didn't overlap, or they
+ // did, but none of their definitions would prevent us from coalescing.
+ // We're good to go with the coalesce.
+
+ float cBenefit = powf(10.0f, loopInfo->getLoopDepth(mbb)) / 5.0;
+
+ coalescesFound[RegPair(srcReg, dstReg)] = cBenefit;
+ coalescesFound[RegPair(dstReg, srcReg)] = cBenefit;
+ }
+
+ }
+
+ return coalescesFound;
+}
+
+void PBQPRegAlloc::findVRegIntervalsToAlloc() {
+
+ // Iterate over all live ranges.
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+
+ // Ignore physical ones.
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+ continue;
+
+ LiveInterval *li = itr->second;
+
+ // If this live interval is non-empty we will use pbqp to allocate it.
+ // Empty intervals we allocate in a simple post-processing stage in
+ // finalizeAlloc.
+ if (!li->empty()) {
+ vregIntervalsToAlloc.insert(li);
+ }
+ else {
+ emptyVRegIntervals.insert(li);
+ }
+ }
+}
+
+pbqp* PBQPRegAlloc::constructPBQPProblem() {
+
+ typedef std::vector<const LiveInterval*> LIVector;
+ typedef std::vector<unsigned> RegVector;
+
+ // This will store the physical intervals for easy reference.
+ LIVector physIntervals;
+
+ // Start by clearing the old node <-> live interval mappings & allowed sets
+ li2Node.clear();
+ node2LI.clear();
+ allowedSets.clear();
+
+ // Populate physIntervals, update preg use:
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
+ physIntervals.push_back(itr->second);
+ mri->setPhysRegUsed(itr->second->reg);
+ }
+ }
+
+ // Iterate over vreg intervals, construct live interval <-> node number
+ // mappings.
+ for (LiveIntervalSet::const_iterator
+ itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end();
+ itr != end; ++itr) {
+ const LiveInterval *li = *itr;
+
+ li2Node[li] = node2LI.size();
+ node2LI.push_back(li);
+ }
+
+ // Get the set of potential coalesces.
+ CoalesceMap coalesces(findCoalesces());
+
+ // Construct a PBQP solver for this problem
+ pbqp *solver = alloc_pbqp(vregIntervalsToAlloc.size());
+
+ // Resize allowedSets container appropriately.
+ allowedSets.resize(vregIntervalsToAlloc.size());
+
+ // Iterate over virtual register intervals to compute allowed sets...
+ for (unsigned node = 0; node < node2LI.size(); ++node) {
+
+ // Grab pointers to the interval and its register class.
+ const LiveInterval *li = node2LI[node];
+ const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+
+ // Start by assuming all allocable registers in the class are allowed...
+ RegVector liAllowed(liRC->allocation_order_begin(*mf),
+ liRC->allocation_order_end(*mf));
+
+ // Eliminate the physical registers which overlap with this range, along
+ // with all their aliases.
+ for (LIVector::iterator pItr = physIntervals.begin(),
+ pEnd = physIntervals.end(); pItr != pEnd; ++pItr) {
+
+ if (!li->overlaps(**pItr))
+ continue;
+
+ unsigned pReg = (*pItr)->reg;
+
+ // If we get here then the live intervals overlap, but we're still ok
+ // if they're coalescable.
+ if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end())
+ continue;
+
+ // If we get here then we have a genuine exclusion.
+
+ // Remove the overlapping reg...
+ RegVector::iterator eraseItr =
+ std::find(liAllowed.begin(), liAllowed.end(), pReg);
+
+ if (eraseItr != liAllowed.end())
+ liAllowed.erase(eraseItr);
+
+ const unsigned *aliasItr = tri->getAliasSet(pReg);
+
+ if (aliasItr != 0) {
+ // ...and its aliases.
+ for (; *aliasItr != 0; ++aliasItr) {
+ RegVector::iterator eraseItr =
+ std::find(liAllowed.begin(), liAllowed.end(), *aliasItr);
+
+ if (eraseItr != liAllowed.end()) {
+ liAllowed.erase(eraseItr);
+ }
+ }
+ }
+ }
+
+ // Copy the allowed set into a member vector for use when constructing cost
+ // vectors & matrices, and mapping PBQP solutions back to assignments.
+ allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end());
+
+ // Set the spill cost to the interval weight, or epsilon if the
+ // interval weight is zero
+ PBQPNum spillCost = (li->weight != 0.0) ?
+ li->weight : std::numeric_limits<PBQPNum>::min();
+
+ // Build a cost vector for this interval.
+ add_pbqp_nodecosts(solver, node,
+ buildCostVector(li->reg, allowedSets[node], coalesces,
+ spillCost));
+
+ }
+
+
+ // Now add the cost matrices...
+ for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) {
+ const LiveInterval *li = node2LI[node1];
+
+ // Test for live range overlaps and insert interference matrices.
+ for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) {
+ const LiveInterval *li2 = node2LI[node2];
+
+ CoalesceMap::const_iterator cmItr =
+ coalesces.find(RegPair(li->reg, li2->reg));
+
+ PBQPMatrix *m = 0;
+
+ if (cmItr != coalesces.end()) {
+ m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2],
+ cmItr->second);
+ }
+ else if (li->overlaps(*li2)) {
+ m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]);
+ }
+
+ if (m != 0) {
+ add_pbqp_edgecosts(solver, node1, node2, m);
+ delete m;
+ }
+ }
+ }
+
+ // We're done, PBQP problem constructed - return it.
+ return solver;
+}
+
+void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
+ MachineRegisterInfo* mri) {
+ int stackSlot = vrm->getStackSlot(spilled->reg);
+
+ if (stackSlot == VirtRegMap::NO_STACK_SLOT)
+ return;
+
+ const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
+ LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
+
+ VNInfo *vni;
+ if (stackInterval.getNumValNums() != 0)
+ vni = stackInterval.getValNumInfo(0);
+ else
+ vni = stackInterval.getNextValue(-0U, 0, lss->getVNInfoAllocator());
+
+ LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
+ stackInterval.MergeRangesInAsValue(rhsInterval, vni);
+}
+
+bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) {
+
+ // Set to true if we have any spills
+ bool anotherRoundNeeded = false;
+
+ // Clear the existing allocation.
+ vrm->clearAllVirt();
+
+ // Iterate over the nodes mapping the PBQP solution to a register assignment.
+ for (unsigned node = 0; node < node2LI.size(); ++node) {
+ unsigned virtReg = node2LI[node]->reg,
+ allocSelection = get_pbqp_solution(problem, node);
+
+ // If the PBQP solution is non-zero it's a physical register...
+ if (allocSelection != 0) {
+ // Get the physical reg, subtracting 1 to account for the spill option.
+ unsigned physReg = allowedSets[node][allocSelection - 1];
+
+ DOUT << "VREG " << virtReg << " -> " << tri->getName(physReg) << "\n";
+
+ assert(physReg != 0);
+
+ // Add to the virt reg map and update the used phys regs.
+ vrm->assignVirt2Phys(virtReg, physReg);
+ }
+ // ...Otherwise it's a spill.
+ else {
+
+ // Make sure we ignore this virtual reg on the next round
+ // of allocation
+ vregIntervalsToAlloc.erase(&lis->getInterval(virtReg));
+
+ // Insert spill ranges for this live range
+ const LiveInterval *spillInterval = node2LI[node];
+ double oldSpillWeight = spillInterval->weight;
+ SmallVector<LiveInterval*, 8> spillIs;
+ std::vector<LiveInterval*> newSpills =
+ lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm);
+ addStackInterval(spillInterval, mri);
+
+ DOUT << "VREG " << virtReg << " -> SPILLED (Cost: "
+ << oldSpillWeight << ", New vregs: ";
+
+ // Copy any newly inserted live intervals into the list of regs to
+ // allocate.
+ for (std::vector<LiveInterval*>::const_iterator
+ itr = newSpills.begin(), end = newSpills.end();
+ itr != end; ++itr) {
+
+ assert(!(*itr)->empty() && "Empty spill range.");
+
+ DOUT << (*itr)->reg << " ";
+
+ vregIntervalsToAlloc.insert(*itr);
+ }
+
+ DOUT << ")\n";
+
+ // We need another round if spill intervals were added.
+ anotherRoundNeeded |= !newSpills.empty();
+ }
+ }
+
+ return !anotherRoundNeeded;
+}
+
+void PBQPRegAlloc::finalizeAlloc() const {
+ typedef LiveIntervals::iterator LIIterator;
+ typedef LiveInterval::Ranges::const_iterator LRIterator;
+
+ // First allocate registers for the empty intervals.
+ for (LiveIntervalSet::const_iterator
+ itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end();
+ itr != end; ++itr) {
+ LiveInterval *li = *itr;
+
+ unsigned physReg = li->preference;
+
+ if (physReg == 0) {
+ const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+ physReg = *liRC->allocation_order_begin(*mf);
+ }
+
+ vrm->assignVirt2Phys(li->reg, physReg);
+ }
+
+ // Finally iterate over the basic blocks to compute and set the live-in sets.
+ SmallVector<MachineBasicBlock*, 8> liveInMBBs;
+ MachineBasicBlock *entryMBB = &*mf->begin();
+
+ for (LIIterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+
+ const LiveInterval *li = liItr->second;
+ unsigned reg = 0;
+
+ // Get the physical register for this interval
+ if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
+ reg = li->reg;
+ }
+ else if (vrm->isAssignedReg(li->reg)) {
+ reg = vrm->getPhys(li->reg);
+ }
+ else {
+ // Ranges which are assigned a stack slot only are ignored.
+ continue;
+ }
+
+ // Ignore unallocated vregs:
+ if (reg == 0) {
+ continue;
+ }
+
+ // Iterate over the ranges of the current interval...
+ for (LRIterator lrItr = li->begin(), lrEnd = li->end();
+ lrItr != lrEnd; ++lrItr) {
+
+ // Find the set of basic blocks which this range is live into...
+ if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) {
+ // And add the physreg for this interval to their live-in sets.
+ for (unsigned i = 0; i < liveInMBBs.size(); ++i) {
+ if (liveInMBBs[i] != entryMBB) {
+ if (!liveInMBBs[i]->isLiveIn(reg)) {
+ liveInMBBs[i]->addLiveIn(reg);
+ }
+ }
+ }
+ liveInMBBs.clear();
+ }
+ }
+ }
+
+}
+
+bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
+
+ mf = &MF;
+ tm = &mf->getTarget();
+ tri = tm->getRegisterInfo();
+ tii = tm->getInstrInfo();
+ mri = &mf->getRegInfo();
+
+ lis = &getAnalysis<LiveIntervals>();
+ lss = &getAnalysis<LiveStacks>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ vrm = &getAnalysis<VirtRegMap>();
+
+ DOUT << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n";
+
+ // Allocator main loop:
+ //
+ // * Map current regalloc problem to a PBQP problem
+ // * Solve the PBQP problem
+ // * Map the solution back to a register allocation
+ // * Spill if necessary
+ //
+ // This process is continued till no more spills are generated.
+
+ // Find the vreg intervals in need of allocation.
+ findVRegIntervalsToAlloc();
+
+ // If there aren't any then we're done here.
+ if (vregIntervalsToAlloc.empty() && emptyVRegIntervals.empty())
+ return true;
+
+ // If there are non-empty intervals allocate them using pbqp.
+ if (!vregIntervalsToAlloc.empty()) {
+
+ bool pbqpAllocComplete = false;
+ unsigned round = 0;
+
+ while (!pbqpAllocComplete) {
+ DOUT << " PBQP Regalloc round " << round << ":\n";
+
+ pbqp *problem = constructPBQPProblem();
+
+ solve_pbqp(problem);
+
+ pbqpAllocComplete = mapPBQPToRegAlloc(problem);
+
+ free_pbqp(problem);
+
+ ++round;
+ }
+ }
+
+ // Finalise allocation, allocate empty ranges.
+ finalizeAlloc();
+
+ vregIntervalsToAlloc.clear();
+ emptyVRegIntervals.clear();
+ li2Node.clear();
+ node2LI.clear();
+ allowedSets.clear();
+
+ DOUT << "Post alloc VirtRegMap:\n" << *vrm << "\n";
+
+ // Run rewriter
+ std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
+
+ rewriter->runOnMachineFunction(*mf, *vrm, lis);
+
+ return true;
+}
+
+FunctionPass* llvm::createPBQPRegisterAllocator() {
+ return new PBQPRegAlloc();
+}
+
+
+#undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegAllocSimple.cpp b/lib/CodeGen/RegAllocSimple.cpp
new file mode 100644
index 0000000..447e54c
--- /dev/null
+++ b/lib/CodeGen/RegAllocSimple.cpp
@@ -0,0 +1,257 @@
+//===-- RegAllocSimple.cpp - A simple generic register allocator ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register allocator. *Very* simple: It immediate
+// spills every value right after it is computed, and it reloads all used
+// operands from the spill area to temporary registers before each instruction.
+// It does not keep values in registers across instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+
+namespace {
+ static RegisterRegAlloc
+ simpleRegAlloc("simple", "simple register allocator",
+ createSimpleRegisterAllocator);
+
+ class VISIBILITY_HIDDEN RegAllocSimple : public MachineFunctionPass {
+ public:
+ static char ID;
+ RegAllocSimple() : MachineFunctionPass(&ID) {}
+ private:
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ // StackSlotForVirtReg - Maps SSA Regs => frame index on the stack where
+ // these values are spilled
+ std::map<unsigned, int> StackSlotForVirtReg;
+
+ // RegsUsed - Keep track of what registers are currently in use. This is a
+ // bitset.
+ std::vector<bool> RegsUsed;
+
+ // RegClassIdx - Maps RegClass => which index we can take a register
+ // from. Since this is a simple register allocator, when we need a register
+ // of a certain class, we just take the next available one.
+ std::map<const TargetRegisterClass*, unsigned> RegClassIdx;
+
+ public:
+ virtual const char *getPassName() const {
+ return "Simple Register Allocator";
+ }
+
+ /// runOnMachineFunction - Register allocate the whole function
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(PHIEliminationID); // Eliminate PHI nodes
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ private:
+ /// AllocateBasicBlock - Register allocate the specified basic block.
+ void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+ /// getStackSpaceFor - This returns the offset of the specified virtual
+ /// register on the stack, allocating space if necessary.
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+ /// Given a virtual register, return a compatible physical register that is
+ /// currently unused.
+ ///
+ /// Side effect: marks that register as being used until manually cleared
+ ///
+ unsigned getFreeReg(unsigned virtualReg);
+
+ /// Moves value from memory into that register
+ unsigned reloadVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, unsigned VirtReg);
+
+ /// Saves reg value on the stack (maps virtual register to stack value)
+ void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg);
+ };
+ char RegAllocSimple::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual
+/// register to be held on the stack.
+int RegAllocSimple::getStackSpaceFor(unsigned VirtReg,
+ const TargetRegisterClass *RC) {
+ // Find the location VirtReg would belong...
+ std::map<unsigned, int>::iterator I = StackSlotForVirtReg.find(VirtReg);
+
+ if (I != StackSlotForVirtReg.end())
+ return I->second; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot...
+ StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx));
+
+ return FrameIdx;
+}
+
+unsigned RegAllocSimple::getFreeReg(unsigned virtualReg) {
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtualReg);
+ TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+#ifndef NDEBUG
+ TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+#endif
+
+ while (1) {
+ unsigned regIdx = RegClassIdx[RC]++;
+ assert(RI+regIdx != RE && "Not enough registers!");
+ unsigned PhysReg = *(RI+regIdx);
+
+ if (!RegsUsed[PhysReg]) {
+ MF->getRegInfo().setPhysRegUsed(PhysReg);
+ return PhysReg;
+ }
+ }
+}
+
+unsigned RegAllocSimple::reloadVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg) {
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(VirtReg);
+ int FrameIdx = getStackSpaceFor(VirtReg, RC);
+ unsigned PhysReg = getFreeReg(VirtReg);
+
+ // Add move instruction(s)
+ ++NumLoads;
+ TII->loadRegFromStackSlot(MBB, I, PhysReg, FrameIdx, RC);
+ return PhysReg;
+}
+
+void RegAllocSimple::spillVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg) {
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(VirtReg);
+
+ int FrameIdx = getStackSpaceFor(VirtReg, RC);
+
+ // Add move instruction(s)
+ ++NumStores;
+ TII->storeRegToStackSlot(MBB, I, PhysReg, true, FrameIdx, RC);
+}
+
+
+void RegAllocSimple::AllocateBasicBlock(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) {
+ // Made to combat the incorrect allocation of r2 = add r1, r1
+ std::map<unsigned, unsigned> Virt2PhysRegMap;
+
+ RegsUsed.resize(TRI->getNumRegs());
+
+ // This is a preliminary pass that will invalidate any registers that are
+ // used by the instruction (including implicit uses).
+ const TargetInstrDesc &Desc = MI->getDesc();
+ const unsigned *Regs;
+ if (Desc.ImplicitUses) {
+ for (Regs = Desc.ImplicitUses; *Regs; ++Regs)
+ RegsUsed[*Regs] = true;
+ }
+
+ if (Desc.ImplicitDefs) {
+ for (Regs = Desc.ImplicitDefs; *Regs; ++Regs) {
+ RegsUsed[*Regs] = true;
+ MF->getRegInfo().setPhysRegUsed(*Regs);
+ }
+ }
+
+ // Loop over uses, move from memory into registers.
+ for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+ MachineOperand &MO = MI->getOperand(i);
+
+ if (MO.isReg() && MO.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned virtualReg = (unsigned) MO.getReg();
+ DOUT << "op: " << MO << "\n";
+ DOUT << "\t inst[" << i << "]: ";
+ DEBUG(MI->print(*cerr.stream(), TM));
+
+ // make sure the same virtual register maps to the same physical
+ // register in any given instruction
+ unsigned physReg = Virt2PhysRegMap[virtualReg];
+ if (physReg == 0) {
+ if (MO.isDef()) {
+ unsigned TiedOp;
+ if (!MI->isRegTiedToUseOperand(i, &TiedOp)) {
+ physReg = getFreeReg(virtualReg);
+ } else {
+ // must be same register number as the source operand that is
+ // tied to. This maps a = b + c into b = b + c, and saves b into
+ // a's spot.
+ assert(MI->getOperand(TiedOp).isReg() &&
+ MI->getOperand(TiedOp).getReg() &&
+ MI->getOperand(TiedOp).isUse() &&
+ "Two address instruction invalid!");
+
+ physReg = MI->getOperand(TiedOp).getReg();
+ }
+ spillVirtReg(MBB, next(MI), virtualReg, physReg);
+ } else {
+ physReg = reloadVirtReg(MBB, MI, virtualReg);
+ Virt2PhysRegMap[virtualReg] = physReg;
+ }
+ }
+ MO.setReg(physReg);
+ DOUT << "virt: " << virtualReg << ", phys: " << MO.getReg() << "\n";
+ }
+ }
+ RegClassIdx.clear();
+ RegsUsed.clear();
+ }
+}
+
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RegAllocSimple::runOnMachineFunction(MachineFunction &Fn) {
+ DOUT << "Machine Function\n";
+ MF = &Fn;
+ TM = &MF->getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB)
+ AllocateBasicBlock(*MBB);
+
+ StackSlotForVirtReg.clear();
+ return true;
+}
+
+FunctionPass *llvm::createSimpleRegisterAllocator() {
+ return new RegAllocSimple();
+}
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
new file mode 100644
index 0000000..1131e3d
--- /dev/null
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -0,0 +1,41 @@
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic RegisterCoalescer interface which
+// is used as the common interface used by all clients and
+// implementations of register coalescing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+// Register the RegisterCoalescer interface, providing a nice name to refer to.
+static RegisterAnalysisGroup<RegisterCoalescer> Z("Register Coalescer");
+char RegisterCoalescer::ID = 0;
+
+// RegisterCoalescer destructor: DO NOT move this to the header file
+// for RegisterCoalescer or else clients of the RegisterCoalescer
+// class may not depend on the RegisterCoalescer.o file in the current
+// .a file, causing alias analysis support to not be included in the
+// tool correctly!
+//
+RegisterCoalescer::~RegisterCoalescer() {}
+
+// Because of the way .a files work, we must force the SimpleRC
+// implementation to be pulled in if the RegisterCoalescer classes are
+// pulled in. Otherwise we run the risk of RegisterCoalescer being
+// used, but the default implementation not being linked into the tool
+// that uses it.
+DEFINING_FILE_FOR(RegisterCoalescer)
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 0000000..944468e
--- /dev/null
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,480 @@
+//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine register scavenger. It can provide
+// information, such as unused registers, at any point in a machine basic block.
+// It also provides a mechanism to make registers available by evicting them to
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg-scavenging"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// RedefinesSuperRegPart - Return true if the specified register is redefining
+/// part of a super-register.
+static bool RedefinesSuperRegPart(const MachineInstr *MI, unsigned SubReg,
+ const TargetRegisterInfo *TRI) {
+ bool SeenSuperUse = false;
+ bool SeenSuperDef = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (TRI->isSuperRegister(SubReg, MO.getReg())) {
+ if (MO.isUse())
+ SeenSuperUse = true;
+ else if (MO.isImplicit())
+ SeenSuperDef = true;
+ }
+ }
+
+ return SeenSuperDef && SeenSuperUse;
+}
+
+static bool RedefinesSuperRegPart(const MachineInstr *MI,
+ const MachineOperand &MO,
+ const TargetRegisterInfo *TRI) {
+ assert(MO.isReg() && MO.isDef() && "Not a register def!");
+ return RedefinesSuperRegPart(MI, MO.getReg(), TRI);
+}
+
+/// setUsed - Set the register and its sub-registers as being used.
+void RegScavenger::setUsed(unsigned Reg, bool ImpDef) {
+ RegsAvailable.reset(Reg);
+ ImplicitDefed[Reg] = ImpDef;
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ RegsAvailable.reset(SubReg);
+ ImplicitDefed[SubReg] = ImpDef;
+ }
+}
+
+/// setUnused - Set the register and its sub-registers as being unused.
+void RegScavenger::setUnused(unsigned Reg, const MachineInstr *MI) {
+ RegsAvailable.set(Reg);
+ ImplicitDefed.reset(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ if (!RedefinesSuperRegPart(MI, Reg, TRI)) {
+ RegsAvailable.set(SubReg);
+ ImplicitDefed.reset(SubReg);
+ }
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
+ MachineFunction &MF = *mbb->getParent();
+ const TargetMachine &TM = MF.getTarget();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
+ "Target changed?");
+
+ if (!MBB) {
+ NumPhysRegs = TRI->getNumRegs();
+ RegsAvailable.resize(NumPhysRegs);
+ ImplicitDefed.resize(NumPhysRegs);
+
+ // Create reserved registers bitvector.
+ ReservedRegs = TRI->getReservedRegs(MF);
+
+ // Create callee-saved registers bitvector.
+ CalleeSavedRegs.resize(NumPhysRegs);
+ const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+ if (CSRegs != NULL)
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ CalleeSavedRegs.set(CSRegs[i]);
+ }
+
+ MBB = mbb;
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+ ScavengeRestore = NULL;
+ CurrDist = 0;
+ DistanceMap.clear();
+ ImplicitDefed.reset();
+
+ // All registers started out unused.
+ RegsAvailable.set();
+
+ // Reserved registers are always used.
+ RegsAvailable ^= ReservedRegs;
+
+ // Live-in registers are in use.
+ if (!MBB->livein_empty())
+ for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ setUsed(*I);
+
+ Tracking = false;
+}
+
+void RegScavenger::restoreScavengedReg() {
+ TII->loadRegFromStackSlot(*MBB, MBBI, ScavengedReg,
+ ScavengingFrameIndex, ScavengedRC);
+ MachineBasicBlock::iterator II = prior(MBBI);
+ TRI->eliminateFrameIndex(II, 0, this);
+ setUsed(ScavengedReg);
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+}
+
+#ifndef NDEBUG
+/// isLiveInButUnusedBefore - Return true if register is livein the MBB not
+/// not used before it reaches the MI that defines register.
+static bool isLiveInButUnusedBefore(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ const TargetRegisterInfo *TRI,
+ MachineRegisterInfo* MRI) {
+ // First check if register is livein.
+ bool isLiveIn = false;
+ for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ if (Reg == *I || TRI->isSuperRegister(Reg, *I)) {
+ isLiveIn = true;
+ break;
+ }
+ if (!isLiveIn)
+ return false;
+
+ // Is there any use of it before the specified MI?
+ SmallPtrSet<MachineInstr*, 4> UsesInMBB;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI->getParent() == MBB)
+ UsesInMBB.insert(UseMI);
+ }
+ if (UsesInMBB.empty())
+ return true;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I)
+ if (UsesInMBB.count(&*I))
+ return false;
+ return true;
+}
+#endif
+
+void RegScavenger::forward() {
+ // Move ptr forward.
+ if (!Tracking) {
+ MBBI = MBB->begin();
+ Tracking = true;
+ } else {
+ assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+ MBBI = next(MBBI);
+ }
+
+ MachineInstr *MI = MBBI;
+ DistanceMap.insert(std::make_pair(MI, CurrDist++));
+
+ if (MI == ScavengeRestore) {
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+ ScavengeRestore = NULL;
+ }
+
+ bool IsImpDef = MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF;
+
+ // Separate register operands into 3 classes: uses, defs, earlyclobbers.
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs;
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs;
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ if (MO.isUse())
+ UseMOs.push_back(std::make_pair(&MO,i));
+ else if (MO.isEarlyClobber())
+ EarlyClobberMOs.push_back(std::make_pair(&MO,i));
+ else
+ DefMOs.push_back(std::make_pair(&MO,i));
+ }
+
+ // Process uses first.
+ BitVector UseRegs(NumPhysRegs);
+ for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) {
+ const MachineOperand MO = *UseMOs[i].first;
+ unsigned Reg = MO.getReg();
+
+ assert(isUsed(Reg) && "Using an undefined register!");
+
+ if (MO.isKill() && !isReserved(Reg)) {
+ UseRegs.set(Reg);
+
+ // Mark sub-registers as used.
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ UseRegs.set(SubReg);
+ }
+ }
+
+ // Change states of all registers after all the uses are processed to guard
+ // against multiple uses.
+ setUnused(UseRegs);
+
+ // Process early clobber defs then process defs. We can have a early clobber
+ // that is dead, it should not conflict with a def that happens one "slot"
+ // (see InstrSlots in LiveIntervalAnalysis.h) later.
+ unsigned NumECs = EarlyClobberMOs.size();
+ unsigned NumDefs = DefMOs.size();
+
+ for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) {
+ const MachineOperand &MO = (i < NumECs)
+ ? *EarlyClobberMOs[i].first : *DefMOs[i-NumECs].first;
+ unsigned Idx = (i < NumECs)
+ ? EarlyClobberMOs[i].second : DefMOs[i-NumECs].second;
+ unsigned Reg = MO.getReg();
+
+ // If it's dead upon def, then it is now free.
+ if (MO.isDead()) {
+ setUnused(Reg, MI);
+ continue;
+ }
+
+ // Skip two-address destination operand.
+ if (MI->isRegTiedToUseOperand(Idx)) {
+ assert(isUsed(Reg) && "Using an undefined register!");
+ continue;
+ }
+
+ // Skip if this is merely redefining part of a super-register.
+ if (RedefinesSuperRegPart(MI, MO, TRI))
+ continue;
+
+ // Implicit def is allowed to "re-define" any register. Similarly,
+ // implicitly defined registers can be clobbered.
+ assert((isReserved(Reg) || isUnused(Reg) ||
+ IsImpDef || isImplicitlyDefined(Reg) ||
+ isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+ "Re-defining a live register!");
+ setUsed(Reg, IsImpDef);
+ }
+}
+
+void RegScavenger::backward() {
+ assert(Tracking && "Not tracking states!");
+ assert(MBBI != MBB->begin() && "Already at start of basic block!");
+ // Move ptr backward.
+ MBBI = prior(MBBI);
+
+ MachineInstr *MI = MBBI;
+ DistanceMap.erase(MI);
+ --CurrDist;
+
+ // Separate register operands into 3 classes: uses, defs, earlyclobbers.
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs;
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs;
+ SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ if (MO.isUse())
+ UseMOs.push_back(std::make_pair(&MO,i));
+ else if (MO.isEarlyClobber())
+ EarlyClobberMOs.push_back(std::make_pair(&MO,i));
+ else
+ DefMOs.push_back(std::make_pair(&MO,i));
+ }
+
+
+ // Process defs first.
+ unsigned NumECs = EarlyClobberMOs.size();
+ unsigned NumDefs = DefMOs.size();
+ for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) {
+ const MachineOperand &MO = (i < NumDefs)
+ ? *DefMOs[i].first : *EarlyClobberMOs[i-NumDefs].first;
+ unsigned Idx = (i < NumECs)
+ ? DefMOs[i].second : EarlyClobberMOs[i-NumDefs].second;
+
+ // Skip two-address destination operand.
+ if (MI->isRegTiedToUseOperand(Idx))
+ continue;
+
+ unsigned Reg = MO.getReg();
+ assert(isUsed(Reg));
+ if (!isReserved(Reg))
+ setUnused(Reg, MI);
+ }
+
+ // Process uses.
+ BitVector UseRegs(NumPhysRegs);
+ for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) {
+ const MachineOperand MO = *UseMOs[i].first;
+ unsigned Reg = MO.getReg();
+ assert(isUnused(Reg) || isReserved(Reg));
+ UseRegs.set(Reg);
+
+ // Set the sub-registers as "used".
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ UseRegs.set(SubReg);
+ }
+ setUsed(UseRegs);
+}
+
+void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+ if (includeReserved)
+ used = ~RegsAvailable;
+ else
+ used = ~RegsAvailable & ~ReservedRegs;
+}
+
+/// CreateRegClassMask - Set the bits that represent the registers in the
+/// TargetRegisterClass.
+static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E;
+ ++I)
+ Mask.set(*I);
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
+ const BitVector &Candidates) const {
+ // Mask off the registers which are not in the TargetRegisterClass.
+ BitVector RegsAvailableCopy(NumPhysRegs, false);
+ CreateRegClassMask(RegClass, RegsAvailableCopy);
+ RegsAvailableCopy &= RegsAvailable;
+
+ // Restrict the search to candidates.
+ RegsAvailableCopy &= Candidates;
+
+ // Returns the first unused (bit is set) register, or 0 is none is found.
+ int Reg = RegsAvailableCopy.find_first();
+ return (Reg == -1) ? 0 : Reg;
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
+ bool ExCalleeSaved) const {
+ // Mask off the registers which are not in the TargetRegisterClass.
+ BitVector RegsAvailableCopy(NumPhysRegs, false);
+ CreateRegClassMask(RegClass, RegsAvailableCopy);
+ RegsAvailableCopy &= RegsAvailable;
+
+ // If looking for a non-callee-saved register, mask off all the callee-saved
+ // registers.
+ if (ExCalleeSaved)
+ RegsAvailableCopy &= ~CalleeSavedRegs;
+
+ // Returns the first unused (bit is set) register, or 0 is none is found.
+ int Reg = RegsAvailableCopy.find_first();
+ return (Reg == -1) ? 0 : Reg;
+}
+
+/// findFirstUse - Calculate the distance to the first use of the
+/// specified register.
+MachineInstr*
+RegScavenger::findFirstUse(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I, unsigned Reg,
+ unsigned &Dist) {
+ MachineInstr *UseMI = 0;
+ Dist = ~0U;
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
+ RE = MRI->reg_end(); RI != RE; ++RI) {
+ MachineInstr *UDMI = &*RI;
+ if (UDMI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+ if (DI == DistanceMap.end()) {
+ // If it's not in map, it's below current MI, let's initialize the
+ // map.
+ I = next(I);
+ unsigned Dist = CurrDist + 1;
+ while (I != MBB->end()) {
+ DistanceMap.insert(std::make_pair(I, Dist++));
+ I = next(I);
+ }
+ }
+ DI = DistanceMap.find(UDMI);
+ if (DI->second > CurrDist && DI->second < Dist) {
+ Dist = DI->second;
+ UseMI = UDMI;
+ }
+ }
+ return UseMI;
+}
+
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+ MachineBasicBlock::iterator I,
+ int SPAdj) {
+ assert(ScavengingFrameIndex >= 0 &&
+ "Cannot scavenge a register without an emergency spill slot!");
+
+ // Mask off the registers which are not in the TargetRegisterClass.
+ BitVector Candidates(NumPhysRegs, false);
+ CreateRegClassMask(RC, Candidates);
+ Candidates ^= ReservedRegs; // Do not include reserved registers.
+
+ // Exclude all the registers being used by the instruction.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (MO.isReg())
+ Candidates.reset(MO.getReg());
+ }
+
+ // Find the register whose use is furthest away.
+ unsigned SReg = 0;
+ unsigned MaxDist = 0;
+ MachineInstr *MaxUseMI = 0;
+ int Reg = Candidates.find_first();
+ while (Reg != -1) {
+ unsigned Dist;
+ MachineInstr *UseMI = findFirstUse(MBB, I, Reg, Dist);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ unsigned AsDist;
+ MachineInstr *AsUseMI = findFirstUse(MBB, I, *AS, AsDist);
+ if (AsDist < Dist) {
+ Dist = AsDist;
+ UseMI = AsUseMI;
+ }
+ }
+ if (Dist >= MaxDist) {
+ MaxDist = Dist;
+ MaxUseMI = UseMI;
+ SReg = Reg;
+ }
+ Reg = Candidates.find_next(Reg);
+ }
+
+ if (ScavengedReg != 0) {
+ assert(0 && "Scavenger slot is live, unable to scavenge another register!");
+ abort();
+ }
+
+ // Spill the scavenged register before I.
+ TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC);
+ MachineBasicBlock::iterator II = prior(I);
+ TRI->eliminateFrameIndex(II, SPAdj, this);
+
+ // Restore the scavenged register before its use (or first terminator).
+ II = MaxUseMI
+ ? MachineBasicBlock::iterator(MaxUseMI) : MBB->getFirstTerminator();
+ TII->loadRegFromStackSlot(*MBB, II, SReg, ScavengingFrameIndex, RC);
+ ScavengeRestore = prior(II);
+ ScavengedReg = SReg;
+ ScavengedRC = RC;
+
+ return SReg;
+}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
new file mode 100644
index 0000000..a8452df
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -0,0 +1,572 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include <climits>
+using namespace llvm;
+
+ScheduleDAG::ScheduleDAG(MachineFunction &mf)
+ : TM(mf.getTarget()),
+ TII(TM.getInstrInfo()),
+ TRI(TM.getRegisterInfo()),
+ TLI(TM.getTargetLowering()),
+ MF(mf), MRI(mf.getRegInfo()),
+ ConstPool(MF.getConstantPool()),
+ EntrySU(), ExitSU() {
+}
+
+ScheduleDAG::~ScheduleDAG() {}
+
+/// dump - dump the schedule.
+void ScheduleDAG::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ cerr << "**** NOOP ****\n";
+ }
+}
+
+
+/// Run - perform scheduling.
+///
+void ScheduleDAG::Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos) {
+ BB = bb;
+ InsertPos = insertPos;
+
+ SUnits.clear();
+ Sequence.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+
+ Schedule();
+
+ DOUT << "*** Final schedule ***\n";
+ DEBUG(dumpSchedule());
+ DOUT << "\n";
+}
+
+/// addPred - This adds the specified edge as a pred of the current node if
+/// not already. It also adds the current node as a successor of the
+/// specified node.
+void SUnit::addPred(const SDep &D) {
+ // If this node already has this depenence, don't add a redundant one.
+ for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I)
+ if (*I == D)
+ return;
+ // Now add a corresponding succ to N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ // Update the bookkeeping.
+ if (D.getKind() == SDep::Data) {
+ ++NumPreds;
+ ++N->NumSuccs;
+ }
+ if (!N->isScheduled)
+ ++NumPredsLeft;
+ if (!isScheduled)
+ ++N->NumSuccsLeft;
+ Preds.push_back(D);
+ N->Succs.push_back(P);
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+}
+
+/// removePred - This removes the specified edge as a pred of the current
+/// node if it exists. It also removes the current node as a successor of
+/// the specified node.
+void SUnit::removePred(const SDep &D) {
+ // Find the matching predecessor.
+ for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I)
+ if (*I == D) {
+ bool FoundSucc = false;
+ // Find the corresponding successor in N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(),
+ EE = N->Succs.end(); II != EE; ++II)
+ if (*II == P) {
+ FoundSucc = true;
+ N->Succs.erase(II);
+ break;
+ }
+ assert(FoundSucc && "Mismatching preds / succs lists!");
+ Preds.erase(I);
+ // Update the bookkeeping.
+ if (P.getKind() == SDep::Data) {
+ --NumPreds;
+ --N->NumSuccs;
+ }
+ if (!N->isScheduled)
+ --NumPredsLeft;
+ if (!isScheduled)
+ --N->NumSuccsLeft;
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return;
+ }
+}
+
+void SUnit::setDepthDirty() {
+ if (!isDepthCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isDepthCurrent = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(),
+ E = SU->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isDepthCurrent)
+ WorkList.push_back(SuccSU);
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::setHeightDirty() {
+ if (!isHeightCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isHeightCurrent = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),
+ E = SU->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isHeightCurrent)
+ WorkList.push_back(PredSU);
+ }
+ } while (!WorkList.empty());
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+ if (NewDepth <= getDepth())
+ return;
+ setDepthDirty();
+ Depth = NewDepth;
+ isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+ if (NewHeight <= getHeight())
+ return;
+ setHeightDirty();
+ Height = NewHeight;
+ isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxPredDepth = 0;
+ for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+ E = Cur->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isDepthCurrent)
+ MaxPredDepth = std::max(MaxPredDepth,
+ PredSU->Depth + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(PredSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxPredDepth != Cur->Depth) {
+ Cur->setDepthDirty();
+ Cur->Depth = MaxPredDepth;
+ }
+ Cur->isDepthCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxSuccHeight = 0;
+ for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+ E = Cur->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isHeightCurrent)
+ MaxSuccHeight = std::max(MaxSuccHeight,
+ SuccSU->Height + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(SuccSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxSuccHeight != Cur->Height) {
+ Cur->setHeightDirty();
+ Cur->Height = MaxSuccHeight;
+ }
+ Cur->isHeightCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const ScheduleDAG *G) const {
+ cerr << "SU(" << NodeNum << "): ";
+ G->dumpNode(this);
+}
+
+void SUnit::dumpAll(const ScheduleDAG *G) const {
+ dump(G);
+
+ cerr << " # preds left : " << NumPredsLeft << "\n";
+ cerr << " # succs left : " << NumSuccsLeft << "\n";
+ cerr << " Latency : " << Latency << "\n";
+ cerr << " Depth : " << Depth << "\n";
+ cerr << " Height : " << Height << "\n";
+
+ if (Preds.size() != 0) {
+ cerr << " Predecessors:\n";
+ for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ cerr << " ";
+ switch (I->getKind()) {
+ case SDep::Data: cerr << "val "; break;
+ case SDep::Anti: cerr << "anti"; break;
+ case SDep::Output: cerr << "out "; break;
+ case SDep::Order: cerr << "ch "; break;
+ }
+ cerr << "#";
+ cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ cerr << " *";
+ cerr << "\n";
+ }
+ }
+ if (Succs.size() != 0) {
+ cerr << " Successors:\n";
+ for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+ I != E; ++I) {
+ cerr << " ";
+ switch (I->getKind()) {
+ case SDep::Data: cerr << "val "; break;
+ case SDep::Anti: cerr << "anti"; break;
+ case SDep::Output: cerr << "out "; break;
+ case SDep::Order: cerr << "ch "; break;
+ }
+ cerr << "#";
+ cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ cerr << " *";
+ cerr << "\n";
+ }
+ }
+ cerr << "\n";
+}
+
+#ifndef NDEBUG
+/// VerifySchedule - Verify that all SUnits were scheduled and that
+/// their state is consistent.
+///
+void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+ bool AnyNotSched = false;
+ unsigned DeadNodes = 0;
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (!SUnits[i].isScheduled) {
+ if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ ++DeadNodes;
+ continue;
+ }
+ if (!AnyNotSched)
+ cerr << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ if (SUnits[i].isScheduled &&
+ (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) >
+ unsigned(INT_MAX)) {
+ if (!AnyNotSched)
+ cerr << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has an unexpected "
+ << (isBottomUp ? "Height" : "Depth") << " value!\n";
+ AnyNotSched = true;
+ }
+ if (isBottomUp) {
+ if (SUnits[i].NumSuccsLeft != 0) {
+ if (!AnyNotSched)
+ cerr << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has successors left!\n";
+ AnyNotSched = true;
+ }
+ } else {
+ if (SUnits[i].NumPredsLeft != 0) {
+ if (!AnyNotSched)
+ cerr << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has predecessors left!\n";
+ AnyNotSched = true;
+ }
+ }
+ }
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(!AnyNotSched);
+ assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif
+
+/// InitDAGTopologicalSorting - create the initial topological
+/// ordering from the DAG to be scheduled.
+///
+/// The idea of the algorithm is taken from
+/// "Online algorithms for managing the topological order of
+/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+/// This is the MNR algorithm, which was first introduced by
+/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+/// "Maintaining a topological order under edge insertions".
+///
+/// Short description of the algorithm:
+///
+/// Topological ordering, ord, of a DAG maps each node to a topological
+/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+///
+/// This means that if there is a path from the node X to the node Z,
+/// then ord(X) < ord(Z).
+///
+/// This property can be used to check for reachability of nodes:
+/// if Z is reachable from X, then an insertion of the edge Z->X would
+/// create a cycle.
+///
+/// The algorithm first computes a topological ordering for the DAG by
+/// initializing the Index2Node and Node2Index arrays and then tries to keep
+/// the ordering up-to-date after edge insertions by reordering the DAG.
+///
+/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
+/// the nodes reachable from Y, and then shifts them using Shift to lie
+/// immediately after X in Index2Node.
+void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+ unsigned DAGSize = SUnits.size();
+ std::vector<SUnit*> WorkList;
+ WorkList.reserve(DAGSize);
+
+ Index2Node.resize(DAGSize);
+ Node2Index.resize(DAGSize);
+
+ // Initialize the data structures.
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ int NodeNum = SU->NodeNum;
+ unsigned Degree = SU->Succs.size();
+ // Temporarily use the Node2Index array as scratch space for degree counts.
+ Node2Index[NodeNum] = Degree;
+
+ // Is it a node without dependencies?
+ if (Degree == 0) {
+ assert(SU->Succs.empty() && "SUnit should have no successors");
+ // Collect leaf nodes.
+ WorkList.push_back(SU);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ Allocate(SU->NodeNum, --Id);
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit *SU = I->getSUnit();
+ if (!--Node2Index[SU->NodeNum])
+ // If all dependencies of the node are processed already,
+ // then the node can be computed now.
+ WorkList.push_back(SU);
+ }
+ }
+
+ Visited.resize(DAGSize);
+
+#ifndef NDEBUG
+ // Check correctness of the ordering
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ "Wrong topological sorting");
+ }
+ }
+#endif
+}
+
+/// AddPred - Updates the topological ordering to accomodate an edge
+/// to be added from SUnit X to SUnit Y.
+void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[Y->NodeNum];
+ UpperBound = Node2Index[X->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(X) < Ord(Y) ?
+ if (LowerBound < UpperBound) {
+ // Update the topological order.
+ Visited.reset();
+ DFS(Y, UpperBound, HasLoop);
+ assert(!HasLoop && "Inserted edge creates a loop!");
+ // Recompute topological indexes.
+ Shift(Visited, LowerBound, UpperBound);
+ }
+}
+
+/// RemovePred - Updates the topological ordering to accomodate an
+/// an edge to be removed from the specified node N from the predecessors
+/// of the current node M.
+void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
+ // InitDAGTopologicalSorting();
+}
+
+/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
+/// all nodes affected by the edge insertion. These nodes will later get new
+/// topological indexes by means of the Shift method.
+void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
+ bool& HasLoop) {
+ std::vector<const SUnit*> WorkList;
+ WorkList.reserve(SUnits.size());
+
+ WorkList.push_back(SU);
+ do {
+ SU = WorkList.back();
+ WorkList.pop_back();
+ Visited.set(SU->NodeNum);
+ for (int I = SU->Succs.size()-1; I >= 0; --I) {
+ int s = SU->Succs[I].getSUnit()->NodeNum;
+ if (Node2Index[s] == UpperBound) {
+ HasLoop = true;
+ return;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ WorkList.push_back(SU->Succs[I].getSUnit());
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+/// Shift - Renumber the nodes so that the topological ordering is
+/// preserved.
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+ int UpperBound) {
+ std::vector<int> L;
+ int shift = 0;
+ int i;
+
+ for (i = LowerBound; i <= UpperBound; ++i) {
+ // w is node at topological index i.
+ int w = Index2Node[i];
+ if (Visited.test(w)) {
+ // Unmark.
+ Visited.reset(w);
+ L.push_back(w);
+ shift = shift + 1;
+ } else {
+ Allocate(w, i - shift);
+ }
+ }
+
+ for (unsigned j = 0; j < L.size(); ++j) {
+ Allocate(L[j], i - shift);
+ i = i + 1;
+ }
+}
+
+
+/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+/// create a cycle.
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ if (IsReachable(TargetSU, SU))
+ return true;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (I->isAssignedRegDep() &&
+ IsReachable(TargetSU, I->getSUnit()))
+ return true;
+ return false;
+}
+
+/// IsReachable - Checks if SU is reachable from TargetSU.
+bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
+ const SUnit *TargetSU) {
+ // If insertion of the edge SU->TargetSU would create a cycle
+ // then there is a path from TargetSU to SU.
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[TargetSU->NodeNum];
+ UpperBound = Node2Index[SU->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(TargetSU) < Ord(SU) ?
+ if (LowerBound < UpperBound) {
+ Visited.reset();
+ // There may be a path from TargetSU to SU. Check for it.
+ DFS(TargetSU, UpperBound, HasLoop);
+ }
+ return HasLoop;
+}
+
+/// Allocate - assign the topological index to the node n.
+void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
+ Node2Index[n] = index;
+ Index2Node[index] = n;
+}
+
+ScheduleDAGTopologicalSort::ScheduleDAGTopologicalSort(
+ std::vector<SUnit> &sunits)
+ : SUnits(sunits) {}
+
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
new file mode 100644
index 0000000..770f5bb
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -0,0 +1,71 @@
+//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the ScheduleDAG class, which creates
+// MachineInstrs according to the computed schedule.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+void ScheduleDAG::AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO) {
+ MI->addMemOperand(MF, MO);
+}
+
+void ScheduleDAG::EmitNoop() {
+ TII->insertNoop(*BB, InsertPos);
+}
+
+void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
+ DenseMap<SUnit*, unsigned> &VRBaseMap) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second,
+ SU->CopyDstRC, SU->CopySrcRC);
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(),
+ SU->CopyDstRC, SU->CopySrcRC);
+ }
+ break;
+ }
+}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
new file mode 100644
index 0000000..8e18b3d
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -0,0 +1,468 @@
+//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAGInstrs class, which implements re-scheduling
+// of MachineInstrs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched-instrs"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+using namespace llvm;
+
+ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt)
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount) {
+ BB = bb;
+ Begin = begin;
+ InsertPosIndex = endcount;
+
+ ScheduleDAG::Run(bb, end);
+}
+
+/// getOpcode - If this is an Instruction or a ConstantExpr, return the
+/// opcode value. Otherwise return UserOp1.
+static unsigned getOpcode(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getOpcode();
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ return CE->getOpcode();
+ // Use UserOp1 to mean there's no opcode.
+ return Instruction::UserOp1;
+}
+
+/// getUnderlyingObjectFromInt - This is the function that does the work of
+/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObjectFromInt(const Value *V) {
+ do {
+ if (const User *U = dyn_cast<User>(V)) {
+ // If we find a ptrtoint, we can transfer control back to the
+ // regular getUnderlyingObjectFromInt.
+ if (getOpcode(U) == Instruction::PtrToInt)
+ return U->getOperand(0);
+ // If we find an add of a constant or a multiplied value, it's
+ // likely that the other operand will lead us to the base
+ // object. We don't have to worry about the case where the
+ // object address is somehow being computed bt the multiply,
+ // because our callers only care when the result is an
+ // identifibale object.
+ if (getOpcode(U) != Instruction::Add ||
+ (!isa<ConstantInt>(U->getOperand(1)) &&
+ getOpcode(U->getOperand(1)) != Instruction::Mul))
+ return V;
+ V = U->getOperand(0);
+ } else {
+ return V;
+ }
+ assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!");
+ } while (1);
+}
+
+/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject
+/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObject(const Value *V) {
+ // First just call Value::getUnderlyingObject to let it do what it does.
+ do {
+ V = V->getUnderlyingObject();
+ // If it found an inttoptr, use special code to continue climing.
+ if (getOpcode(V) != Instruction::IntToPtr)
+ break;
+ const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+ // If that succeeded in finding a pointer, continue the search.
+ if (!isa<PointerType>(O->getType()))
+ break;
+ V = O;
+ } while (1);
+ return V;
+}
+
+/// getUnderlyingObjectForInstr - If this machine instr has memory reference
+/// information and it can be tracked to a normal reference to a known
+/// object, return the Value for that object. Otherwise return null.
+static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI) {
+ if (!MI->hasOneMemOperand() ||
+ !MI->memoperands_begin()->getValue() ||
+ MI->memoperands_begin()->isVolatile())
+ return 0;
+
+ const Value *V = MI->memoperands_begin()->getValue();
+ if (!V)
+ return 0;
+
+ V = getUnderlyingObject(V);
+ if (!isa<PseudoSourceValue>(V) && !isIdentifiedObject(V))
+ return 0;
+
+ return V;
+}
+
+void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+ if (MachineLoop *ML = MLI.getLoopFor(BB))
+ if (BB == ML->getLoopLatch()) {
+ MachineBasicBlock *Header = ML->getHeader();
+ for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
+ E = Header->livein_end(); I != E; ++I)
+ LoopLiveInRegs.insert(*I);
+ LoopRegs.VisitLoop(ML);
+ }
+}
+
+void ScheduleDAGInstrs::BuildSchedGraph() {
+ // We'll be allocating one SUnit for each instruction, plus one for
+ // the region exit node.
+ SUnits.reserve(BB->size());
+
+ // We build scheduling units by walking a block's instruction list from bottom
+ // to top.
+
+ // Remember where a generic side-effecting instruction is as we procede. If
+ // ChainMMO is null, this is assumed to have arbitrary side-effects. If
+ // ChainMMO is non-null, then Chain makes only a single memory reference.
+ SUnit *Chain = 0;
+ MachineMemOperand *ChainMMO = 0;
+
+ // Memory references to specific known memory locations are tracked so that
+ // they can be given more precise dependencies.
+ std::map<const Value *, SUnit *> MemDefs;
+ std::map<const Value *, std::vector<SUnit *> > MemUses;
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = ForceUnitLatencies();
+
+ // Ask the target if address-backscheduling is desirable, and if so how much.
+ unsigned SpecialAddressLatency =
+ TM.getSubtarget<TargetSubtarget>().getSpecialAddressLatency();
+
+ // Walk the list of instructions, from bottom moving up.
+ for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+ MII != MIE; --MII) {
+ MachineInstr *MI = prior(MII);
+ const TargetInstrDesc &TID = MI->getDesc();
+ assert(!TID.isTerminator() && !MI->isLabel() &&
+ "Cannot schedule terminators or labels!");
+ // Create the SUnit for this MI.
+ SUnit *SU = NewSUnit(MI);
+
+ // Assign the Latency field of SU using target-provided information.
+ if (UnitLatencies)
+ SU->Latency = 1;
+ else
+ ComputeLatency(SU);
+
+ // Add register-based dependencies (data, anti, and output).
+ for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI->getOperand(j);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+ std::vector<SUnit *> &UseList = Uses[Reg];
+ std::vector<SUnit *> &DefList = Defs[Reg];
+ // Optionally add output and anti dependencies.
+ // TODO: Using a latency of 1 here assumes there's no cost for
+ // reusing registers.
+ SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+ for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+ SUnit *DefSU = DefList[i];
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(Reg)))
+ DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ std::vector<SUnit *> &DefList = Defs[*Alias];
+ for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+ SUnit *DefSU = DefList[i];
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(Reg)))
+ DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
+ }
+ }
+
+ if (MO.isDef()) {
+ // Add any data dependencies.
+ unsigned DataLatency = SU->Latency;
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU != SU) {
+ unsigned LDataLatency = DataLatency;
+ // Optionally add in a special extra latency for nodes that
+ // feed addresses.
+ // TODO: Do this for register aliases too.
+ if (SpecialAddressLatency != 0 && !UnitLatencies) {
+ MachineInstr *UseMI = UseSU->getInstr();
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
+ assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
+ if ((UseTID.mayLoad() || UseTID.mayStore()) &&
+ (unsigned)RegUseIndex < UseTID.getNumOperands() &&
+ UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+ LDataLatency += SpecialAddressLatency;
+ }
+ UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg));
+ }
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ std::vector<SUnit *> &UseList = Uses[*Alias];
+ for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+ SUnit *UseSU = UseList[i];
+ if (UseSU != SU)
+ UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias));
+ }
+ }
+
+ // If a def is going to wrap back around to the top of the loop,
+ // backschedule it.
+ if (!UnitLatencies && DefList.empty()) {
+ LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
+ if (I != LoopRegs.Deps.end()) {
+ const MachineOperand *UseMO = I->second.first;
+ unsigned Count = I->second.second;
+ const MachineInstr *UseMI = UseMO->getParent();
+ unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ // TODO: If we knew the total depth of the region here, we could
+ // handle the case where the whole loop is inside the region but
+ // is large enough that the isScheduleHigh trick isn't needed.
+ if (UseMOIdx < UseTID.getNumOperands()) {
+ // Currently, we only support scheduling regions consisting of
+ // single basic blocks. Check to see if the instruction is in
+ // the same region by checking to see if it has the same parent.
+ if (UseMI->getParent() != MI->getParent()) {
+ unsigned Latency = SU->Latency;
+ if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+ Latency += SpecialAddressLatency;
+ // This is a wild guess as to the portion of the latency which
+ // will be overlapped by work done outside the current
+ // scheduling region.
+ Latency -= std::min(Latency, Count);
+ // Add the artifical edge.
+ ExitSU.addPred(SDep(SU, SDep::Order, Latency,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ } else if (SpecialAddressLatency > 0 &&
+ UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+ // The entire loop body is within the current scheduling region
+ // and the latency of this operation is assumed to be greater
+ // than the latency of the loop.
+ // TODO: Recursively mark data-edge predecessors as
+ // isScheduleHigh too.
+ SU->isScheduleHigh = true;
+ }
+ }
+ LoopRegs.Deps.erase(I);
+ }
+ }
+
+ UseList.clear();
+ if (!MO.isDead())
+ DefList.clear();
+ DefList.push_back(SU);
+ } else {
+ UseList.push_back(SU);
+ }
+ }
+
+ // Add chain dependencies.
+ // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
+ // after stack slots are lowered to actual addresses.
+ // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
+ // produce more precise dependence information.
+ if (TID.isCall() || TID.hasUnmodeledSideEffects()) {
+ new_chain:
+ // This is the conservative case. Add dependencies on all memory
+ // references.
+ if (Chain)
+ Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
+ Chain = SU;
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency));
+ PendingLoads.clear();
+ for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
+ E = MemDefs.end(); I != E; ++I) {
+ I->second->addPred(SDep(SU, SDep::Order, SU->Latency));
+ I->second = SU;
+ }
+ for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+ MemUses.begin(), E = MemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ I->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency));
+ I->second.clear();
+ }
+ // See if it is known to just have a single memory reference.
+ MachineInstr *ChainMI = Chain->getInstr();
+ const TargetInstrDesc &ChainTID = ChainMI->getDesc();
+ if (!ChainTID.isCall() &&
+ !ChainTID.hasUnmodeledSideEffects() &&
+ ChainMI->hasOneMemOperand() &&
+ !ChainMI->memoperands_begin()->isVolatile() &&
+ ChainMI->memoperands_begin()->getValue())
+ // We know that the Chain accesses one specific memory location.
+ ChainMMO = &*ChainMI->memoperands_begin();
+ else
+ // Unknown memory accesses. Assume the worst.
+ ChainMMO = 0;
+ } else if (TID.mayStore()) {
+ if (const Value *V = getUnderlyingObjectForInstr(MI)) {
+ // A store to a specific PseudoSourceValue. Add precise dependencies.
+ // Handle the def in MemDefs, if there is one.
+ std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
+ if (I != MemDefs.end()) {
+ I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ I->second = SU;
+ } else {
+ MemDefs[V] = SU;
+ }
+ // Handle the uses in MemUses, if there are any.
+ std::map<const Value *, std::vector<SUnit *> >::iterator J =
+ MemUses.find(V);
+ if (J != MemUses.end()) {
+ for (unsigned i = 0, e = J->second.size(); i != e; ++i)
+ J->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ J->second.clear();
+ }
+ // Add dependencies from all the PendingLoads, since without
+ // memoperands we must assume they alias anything.
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency));
+ // Add a general dependence too, if needed.
+ if (Chain)
+ Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
+ } else
+ // Treat all other stores conservatively.
+ goto new_chain;
+ } else if (TID.mayLoad()) {
+ if (TII->isInvariantLoad(MI)) {
+ // Invariant load, no chain dependencies needed!
+ } else if (const Value *V = getUnderlyingObjectForInstr(MI)) {
+ // A load from a specific PseudoSourceValue. Add precise dependencies.
+ std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
+ if (I != MemDefs.end())
+ I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ MemUses[V].push_back(SU);
+
+ // Add a general dependence too, if needed.
+ if (Chain && (!ChainMMO ||
+ (ChainMMO->isStore() || ChainMMO->isVolatile())))
+ Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
+ } else if (MI->hasVolatileMemoryRef()) {
+ // Treat volatile loads conservatively. Note that this includes
+ // cases where memoperand information is unavailable.
+ goto new_chain;
+ } else {
+ // A normal load. Depend on the general chain, as well as on
+ // all stores. In the absense of MachineMemOperand information,
+ // we can't even assume that the load doesn't alias well-behaved
+ // memory locations.
+ if (Chain)
+ Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
+ for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
+ E = MemDefs.end(); I != E; ++I)
+ I->second->addPred(SDep(SU, SDep::Order, SU->Latency));
+ PendingLoads.push_back(SU);
+ }
+ }
+ }
+
+ for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ Defs[i].clear();
+ Uses[i].clear();
+ }
+ PendingLoads.clear();
+}
+
+void ScheduleDAGInstrs::FinishBlock() {
+ // Nothing to do.
+}
+
+void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes flagged together into this SUnit.
+ SU->Latency =
+ InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass());
+
+ // Simplistic target-independent heuristic: assume that loads take
+ // extra time.
+ if (InstrItins.isEmpty())
+ if (SU->getInstr()->getDesc().mayLoad())
+ SU->Latency += 2;
+}
+
+void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+ SU->getInstr()->dump();
+}
+
+std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ if (SU == &EntrySU)
+ oss << "<entry>";
+ else if (SU == &ExitSU)
+ oss << "<exit>";
+ else
+ SU->getInstr()->print(oss);
+ return oss.str();
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
+ // For MachineInstr-based scheduling, we're rescheduling the instructions in
+ // the block, so start by removing them from the block.
+ while (Begin != InsertPos) {
+ MachineBasicBlock::iterator I = Begin;
+ ++Begin;
+ BB->remove(I);
+ }
+
+ // Then re-insert them according to the given schedule.
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ SUnit *SU = Sequence[i];
+ if (!SU) {
+ // Null SUnit* is a noop.
+ EmitNoop();
+ continue;
+ }
+
+ BB->insert(InsertPos, SU->getInstr());
+ }
+
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (!Sequence.empty())
+ Begin = Sequence[0]->getInstr();
+
+ return BB;
+}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
new file mode 100644
index 0000000..00d6268
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -0,0 +1,184 @@
+//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGInstrs class, which implements
+// scheduling for a MachineInstr-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGINSTRS_H
+#define SCHEDULEDAGINSTRS_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <map>
+
+namespace llvm {
+ class MachineLoopInfo;
+ class MachineDominatorTree;
+
+ /// LoopDependencies - This class analyzes loop-oriented register
+ /// dependencies, which are used to guide scheduling decisions.
+ /// For example, loop induction variable increments should be
+ /// scheduled as soon as possible after the variable's last use.
+ ///
+ class VISIBILITY_HIDDEN LoopDependencies {
+ const MachineLoopInfo &MLI;
+ const MachineDominatorTree &MDT;
+
+ public:
+ typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
+ LoopDeps;
+ LoopDeps Deps;
+
+ LoopDependencies(const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt) :
+ MLI(mli), MDT(mdt) {}
+
+ /// VisitLoop - Clear out any previous state and analyze the given loop.
+ ///
+ void VisitLoop(const MachineLoop *Loop) {
+ Deps.clear();
+ MachineBasicBlock *Header = Loop->getHeader();
+ SmallSet<unsigned, 8> LoopLiveIns;
+ for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
+ LE = Header->livein_end(); LI != LE; ++LI)
+ LoopLiveIns.insert(*LI);
+
+ const MachineDomTreeNode *Node = MDT.getNode(Header);
+ const MachineBasicBlock *MBB = Node->getBlock();
+ assert(Loop->contains(MBB) &&
+ "Loop does not contain header!");
+ VisitRegion(Node, MBB, Loop, LoopLiveIns);
+ }
+
+ private:
+ void VisitRegion(const MachineDomTreeNode *Node,
+ const MachineBasicBlock *MBB,
+ const MachineLoop *Loop,
+ const SmallSet<unsigned, 8> &LoopLiveIns) {
+ unsigned Count = 0;
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I, ++Count) {
+ const MachineInstr *MI = I;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (LoopLiveIns.count(MOReg))
+ Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
+ }
+ }
+
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ for (std::vector<MachineDomTreeNode*>::const_iterator I =
+ Children.begin(), E = Children.end(); I != E; ++I) {
+ const MachineDomTreeNode *ChildNode = *I;
+ MachineBasicBlock *ChildBlock = ChildNode->getBlock();
+ if (Loop->contains(ChildBlock))
+ VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
+ }
+ }
+ };
+
+ /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
+ /// MachineInstrs.
+ class VISIBILITY_HIDDEN ScheduleDAGInstrs : public ScheduleDAG {
+ const MachineLoopInfo &MLI;
+ const MachineDominatorTree &MDT;
+
+ /// Defs, Uses - Remember where defs and uses of each physical register
+ /// are as we iterate upward through the instructions. This is allocated
+ /// here instead of inside BuildSchedGraph to avoid the need for it to be
+ /// initialized and destructed for each block.
+ std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister];
+
+ /// PendingLoads - Remember where unknown loads are after the most recent
+ /// unknown store, as we iterate. As with Defs and Uses, this is here
+ /// to minimize construction/destruction.
+ std::vector<SUnit *> PendingLoads;
+
+ /// LoopRegs - Track which registers are used for loop-carried dependencies.
+ ///
+ LoopDependencies LoopRegs;
+
+ /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
+ /// back-edge-aware scheduling.
+ ///
+ SmallSet<unsigned, 8> LoopLiveInRegs;
+
+ public:
+ MachineBasicBlock *BB; // Current basic block
+ MachineBasicBlock::iterator Begin; // The beginning of the range to
+ // be scheduled. The range extends
+ // to InsertPos.
+ unsigned InsertPosIndex; // The index in BB of InsertPos.
+
+ explicit ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt);
+
+ virtual ~ScheduleDAGInstrs() {}
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *NewSUnit(MachineInstr *MI) {
+#ifndef NDEBUG
+ const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ return &SUnits.back();
+ }
+
+ /// Run - perform scheduling.
+ ///
+ void Run(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endindex);
+
+ /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
+ /// input.
+ virtual void BuildSchedGraph();
+
+ /// ComputeLatency - Compute node latency.
+ ///
+ virtual void ComputeLatency(SUnit *SU);
+
+ virtual MachineBasicBlock *EmitSchedule();
+
+ /// StartBlock - Prepare to perform scheduling in the given block.
+ ///
+ virtual void StartBlock(MachineBasicBlock *BB);
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ /// FinishBlock - Clean up after scheduling in the given block.
+ ///
+ virtual void FinishBlock();
+
+ virtual void dumpNode(const SUnit *SU) const;
+
+ virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
new file mode 100644
index 0000000..594c24d
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -0,0 +1,97 @@
+//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return G->MF.getFunction()->getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool hasNodeAddressLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getNodeLabel(const SUnit *Node,
+ const ScheduleDAG *Graph);
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(ScheduleDAG *G,
+ GraphWriter<ScheduleDAG*> &GW) {
+ return G->addCustomGraphFeatures(GW);
+ }
+ };
+}
+
+std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
+ const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+}
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAG::viewGraph() {
+// This code is only for debugging!
+#ifndef NDEBUG
+ if (BB->getBasicBlock())
+ ViewGraph(this, "dag." + MF.getFunction()->getName(),
+ "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' +
+ BB->getBasicBlock()->getName());
+ else
+ ViewGraph(this, "dag." + MF.getFunction()->getName(),
+ "Scheduling-Units Graph for " + MF.getFunction()->getName());
+#else
+ cerr << "ScheduleDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
new file mode 100644
index 0000000..9ea59ea
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -0,0 +1,22 @@
+add_llvm_library(LLVMSelectionDAG
+ CallingConvLower.cpp
+ DAGCombiner.cpp
+ FastISel.cpp
+ LegalizeDAG.cpp
+ LegalizeFloatTypes.cpp
+ LegalizeIntegerTypes.cpp
+ LegalizeTypes.cpp
+ LegalizeTypesGeneric.cpp
+ LegalizeVectorOps.cpp
+ LegalizeVectorTypes.cpp
+ ScheduleDAGSDNodes.cpp
+ ScheduleDAGSDNodesEmit.cpp
+ ScheduleDAGFast.cpp
+ ScheduleDAGList.cpp
+ ScheduleDAGRRList.cpp
+ SelectionDAGBuild.cpp
+ SelectionDAG.cpp
+ SelectionDAGISel.cpp
+ SelectionDAGPrinter.cpp
+ TargetLowering.cpp
+ )
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
new file mode 100644
index 0000000..7cd2b73
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -0,0 +1,148 @@
+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm,
+ SmallVector<CCValAssign, 16> &locs)
+ : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+ TRI(*TM.getRegisterInfo()), Locs(locs) {
+ // No stack is used.
+ StackOffset = 0;
+
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate a stack slot large enough to pass an argument by
+// value. The size and alignment information of the argument is encoded in its
+// parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign,
+ ISD::ArgFlagsTy ArgFlags) {
+ unsigned Align = ArgFlags.getByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > (int)Align)
+ Align = MinAlign;
+ unsigned Offset = AllocateStack(Size, Align);
+
+ addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+
+ if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
+ for (; (Reg = *RegAliases); ++RegAliases)
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// incorporating info about the formals into this state.
+void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) {
+ unsigned NumArgs = TheArgs->getNumValues()-1;
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = TheArgs->getValueType(i);
+ ISD::ArgFlagsTy ArgFlags =
+ cast<ARG_FLAGSSDNode>(TheArgs->getOperand(3+i))->getArgFlags();
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+ cerr << "Formal argument #" << i << " has unhandled type "
+ << ArgVT.getMVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) {
+ MVT VT = TheRet->getOperand(i*2+1).getValueType();
+ ISD::ArgFlagsTy ArgFlags =
+ cast<ARG_FLAGSSDNode>(TheRet->getOperand(i*2+2))->getArgFlags();
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)){
+ cerr << "Return operand #" << i << " has unhandled type "
+ << VT.getMVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+
+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+/// about the passed values into this state.
+void CCState::AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn) {
+ unsigned NumOps = TheCall->getNumArgs();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = TheCall->getArg(i).getValueType();
+ ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i);
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+ cerr << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getMVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+ cerr << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getMVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn) {
+ for (unsigned i = 0, e = TheCall->getNumRetVals(); i != e; ++i) {
+ MVT VT = TheCall->getRetValType(i);
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (TheCall->isInreg())
+ Flags.setInReg();
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
+ cerr << "Call result #" << i << " has unhandled type "
+ << VT.getMVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
+ cerr << "Call result has unhandled type "
+ << VT.getMVTString() << "\n";
+ abort();
+ }
+}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 0000000..4c1710d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,6203 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NodesCombined , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+
+namespace {
+ static cl::opt<bool>
+ CombinerAA("combiner-alias-analysis", cl::Hidden,
+ cl::desc("Turn on alias analysis during testing"));
+
+ static cl::opt<bool>
+ CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+ class VISIBILITY_HIDDEN DAGCombiner {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ CombineLevel Level;
+ CodeGenOpt::Level OptLevel;
+ bool LegalOperations;
+ bool LegalTypes;
+
+ // Worklist of all of the nodes that need to be simplified.
+ std::vector<SDNode*> WorkList;
+
+ // AA - Used for DAG load/store alias analysis.
+ AliasAnalysis &AA;
+
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(SDNode *N) {
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ AddToWorkList(*UI);
+ }
+
+ /// visit - call the node-specific routine that knows how to fold each
+ /// particular type of node.
+ SDValue visit(SDNode *N);
+
+ public:
+ /// AddToWorkList - Add to the work list making sure it's instance is at the
+ /// the back (next to be processed.)
+ void AddToWorkList(SDNode *N) {
+ removeFromWorkList(N);
+ WorkList.push_back(N);
+ }
+
+ /// removeFromWorkList - remove all instances of N from the worklist.
+ ///
+ void removeFromWorkList(SDNode *N) {
+ WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
+ WorkList.end());
+ }
+
+ SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo = true);
+
+ SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+ return CombineTo(N, &Res, 1, AddTo);
+ }
+
+ SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+ bool AddTo = true) {
+ SDValue To[] = { Res0, Res1 };
+ return CombineTo(N, To, 2, AddTo);
+ }
+
+ void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+ private:
+
+ /// SimplifyDemandedBits - Check the specified integer node value to see if
+ /// it can be simplified or if things it uses can be simplified by bit
+ /// propagation. If so, return true.
+ bool SimplifyDemandedBits(SDValue Op) {
+ APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits());
+ return SimplifyDemandedBits(Op, Demanded);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+
+ bool CombineToPreIndexedLoadStore(SDNode *N);
+ bool CombineToPostIndexedLoadStore(SDNode *N);
+
+
+ /// combine - call the node-specific routine that knows how to fold each
+ /// particular type of node. If that doesn't do anything, try the
+ /// target-specific DAG combines.
+ SDValue combine(SDNode *N);
+
+ // Visitation implementation - Implement dag node combining for different
+ // node types. The semantics are as follows:
+ // Return Value:
+ // SDValue.getNode() == 0 - No change was made
+ // SDValue.getNode() == N - N was replaced, is dead and has been handled.
+ // otherwise - N should be replaced by the returned Operand.
+ //
+ SDValue visitTokenFactor(SDNode *N);
+ SDValue visitMERGE_VALUES(SDNode *N);
+ SDValue visitADD(SDNode *N);
+ SDValue visitSUB(SDNode *N);
+ SDValue visitADDC(SDNode *N);
+ SDValue visitADDE(SDNode *N);
+ SDValue visitMUL(SDNode *N);
+ SDValue visitSDIV(SDNode *N);
+ SDValue visitUDIV(SDNode *N);
+ SDValue visitSREM(SDNode *N);
+ SDValue visitUREM(SDNode *N);
+ SDValue visitMULHU(SDNode *N);
+ SDValue visitMULHS(SDNode *N);
+ SDValue visitSMUL_LOHI(SDNode *N);
+ SDValue visitUMUL_LOHI(SDNode *N);
+ SDValue visitSDIVREM(SDNode *N);
+ SDValue visitUDIVREM(SDNode *N);
+ SDValue visitAND(SDNode *N);
+ SDValue visitOR(SDNode *N);
+ SDValue visitXOR(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N);
+ SDValue visitSHL(SDNode *N);
+ SDValue visitSRA(SDNode *N);
+ SDValue visitSRL(SDNode *N);
+ SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTPOP(SDNode *N);
+ SDValue visitSELECT(SDNode *N);
+ SDValue visitSELECT_CC(SDNode *N);
+ SDValue visitSETCC(SDNode *N);
+ SDValue visitSIGN_EXTEND(SDNode *N);
+ SDValue visitZERO_EXTEND(SDNode *N);
+ SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+ SDValue visitTRUNCATE(SDNode *N);
+ SDValue visitBIT_CONVERT(SDNode *N);
+ SDValue visitBUILD_PAIR(SDNode *N);
+ SDValue visitFADD(SDNode *N);
+ SDValue visitFSUB(SDNode *N);
+ SDValue visitFMUL(SDNode *N);
+ SDValue visitFDIV(SDNode *N);
+ SDValue visitFREM(SDNode *N);
+ SDValue visitFCOPYSIGN(SDNode *N);
+ SDValue visitSINT_TO_FP(SDNode *N);
+ SDValue visitUINT_TO_FP(SDNode *N);
+ SDValue visitFP_TO_SINT(SDNode *N);
+ SDValue visitFP_TO_UINT(SDNode *N);
+ SDValue visitFP_ROUND(SDNode *N);
+ SDValue visitFP_ROUND_INREG(SDNode *N);
+ SDValue visitFP_EXTEND(SDNode *N);
+ SDValue visitFNEG(SDNode *N);
+ SDValue visitFABS(SDNode *N);
+ SDValue visitBRCOND(SDNode *N);
+ SDValue visitBR_CC(SDNode *N);
+ SDValue visitLOAD(SDNode *N);
+ SDValue visitSTORE(SDNode *N);
+ SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+ SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue visitBUILD_VECTOR(SDNode *N);
+ SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitVECTOR_SHUFFLE(SDNode *N);
+
+ SDValue XformToShuffleWithZero(SDNode *N);
+ SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
+
+ SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+
+ bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+ SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+ SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
+ SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare = false);
+ SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans = true);
+ SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp);
+ SDValue CombineConsecutiveLoads(SDNode *N, MVT VT);
+ SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT);
+ SDValue BuildSDIV(SDNode *N);
+ SDValue BuildUDIV(SDNode *N);
+ SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
+ SDValue ReduceLoadWidth(SDNode *N);
+ SDValue ReduceLoadOpStoreWidth(SDNode *N);
+
+ SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+
+ /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for aliasing nodes and adding them to the Aliases vector.
+ void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases);
+
+ /// isAlias - Return true if there is any possibility that the two addresses
+ /// overlap.
+ bool isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2) const;
+
+ /// FindAliasInfo - Extracts the relevant alias information from the memory
+ /// node. Returns true if the operand was a load.
+ bool FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue, int &SrcValueOffset) const;
+
+ /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for a better chain (aliasing node.)
+ SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+ /// getShiftAmountTy - Returns a type large enough to hold any valid
+ /// shift amount - before type legalization these can be huge.
+ MVT getShiftAmountTy() {
+ return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
+ }
+
+public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+ : DAG(D),
+ TLI(D.getTargetLoweringInfo()),
+ Level(Unrestricted),
+ OptLevel(OL),
+ LegalOperations(false),
+ LegalTypes(false),
+ AA(A) {}
+
+ /// Run - runs the dag combiner on all nodes in the work list
+ void Run(CombineLevel AtLevel);
+ };
+}
+
+
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class VISIBILITY_HIDDEN WorkListRemover :
+ public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+public:
+ explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ DC.removeFromWorkList(N);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Ignore updates.
+ }
+};
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ unsigned Depth = 0) {
+ // No compile time optimizations on this type.
+ if (Op.getValueType() == MVT::ppcf128)
+ return 0;
+
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return 2;
+
+ // Don't allow anything with multiple uses.
+ if (!Op.hasOneUse()) return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > 6) return 0;
+
+ switch (Op.getOpcode()) {
+ default: return false;
+ case ISD::ConstantFP:
+ // Don't invert constant FP values after legalize. The negated constant
+ // isn't necessarily legal.
+ return LegalOperations ? 0 : 1;
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ if (!UnsafeFPMath) return 0;
+
+ // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return V;
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!UnsafeFPMath) return 0;
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ if (HonorSignDependentRoundingFPMath()) return 0;
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return V;
+
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+ }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, unsigned Depth = 0) {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+
+ // Don't allow anything with multiple uses.
+ assert(Op.hasOneUse() && "Unknown reuse!");
+
+ assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Unknown code");
+ case ISD::ConstantFP: {
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ return DAG.getConstantFP(V, Op.getValueType());
+ }
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ assert(UnsafeFPMath);
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(0));
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ assert(UnsafeFPMath);
+
+ // fold (fneg (fsub 0, B)) -> B
+ if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+ if (N0CFP->getValueAPF().isZero())
+ return Op.getOperand(1);
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(0));
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ assert(!HonorSignDependentRoundingFPMath());
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(0),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1));
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1));
+ case ISD::FP_ROUND:
+ return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate
+// nodes based on the type of node we are checking. This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) {
+ if (N.getOpcode() == ISD::SETCC) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(2);
+ return true;
+ }
+ if (N.getOpcode() == ISD::SELECT_CC &&
+ N.getOperand(2).getOpcode() == ISD::Constant &&
+ N.getOperand(3).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
+ cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
+ }
+ return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use. If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDValue N) {
+ SDValue N0, N1, N2;
+ if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+ return true;
+ return false;
+}
+
+SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
+ SDValue N0, SDValue N1) {
+ MVT VT = N0.getValueType();
+ if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+ if (isa<ConstantSDNode>(N1)) {
+ // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N0.getOperand(1)),
+ cast<ConstantSDNode>(N1));
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ } else if (N0.hasOneUse()) {
+ // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ }
+ }
+
+ if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+ if (isa<ConstantSDNode>(N0)) {
+ // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+ SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, VT,
+ cast<ConstantSDNode>(N1.getOperand(1)),
+ cast<ConstantSDNode>(N0));
+ return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ } else if (N1.hasOneUse()) {
+ // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+ SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+ N1.getOperand(0), N0);
+ AddToWorkList(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo) {
+ assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+ ++NodesCombined;
+ DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG));
+ DOUT << " and " << NumTo-1 << " other values\n";
+ DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert(N->getValueType(i) == To[i].getValueType() &&
+ "Cannot combine value to value of different type!"));
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
+
+ if (AddTo) {
+ // Push the new nodes and any users onto the worklist
+ for (unsigned i = 0, e = NumTo; i != e; ++i) {
+ if (To[i].getNode()) {
+ AddToWorkList(To[i].getNode());
+ AddUsersToWorkList(To[i].getNode());
+ }
+ }
+ }
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ return SDValue(N, 0);
+}
+
+void
+DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
+ TLO) {
+ // Replace all uses. If any nodes become isomorphic to other nodes and
+ // are deleted, make sure to remove them from our worklist.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+ // Push the new node and any (possibly new) users onto the worklist.
+ AddToWorkList(TLO.New.getNode());
+ AddUsersToWorkList(TLO.New.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (TLO.Old.getNode()->use_empty()) {
+ removeFromWorkList(TLO.Old.getNode());
+
+ // If the operands of this node are only used by the node, they will now
+ // be dead. Make sure to visit them first to delete dead nodes early.
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+ if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+ AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
+ DAG.DeleteNode(TLO.Old.getNode());
+ }
+}
+
+/// SimplifyDemandedBits - Check the specified integer node value to see if
+/// it can be simplified or if things it uses can be simplified by bit
+/// propagation. If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ APInt KnownZero, KnownOne;
+ if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+ return false;
+
+ // Revisit the node.
+ AddToWorkList(Op.getNode());
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG));
+ DOUT << '\n';
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+ // set the instance variables, so that the various visit routines may use it.
+ Level = AtLevel;
+ LegalOperations = Level >= NoIllegalOperations;
+ LegalTypes = Level >= NoIllegalTypes;
+
+ // Add all the dag nodes to the worklist.
+ WorkList.reserve(DAG.allnodes_size());
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I)
+ WorkList.push_back(I);
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+
+ // The root of the dag may dangle to deleted nodes until the dag combiner is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // while the worklist isn't empty, inspect the node on the end of it and
+ // try and combine it.
+ while (!WorkList.empty()) {
+ SDNode *N = WorkList.back();
+ WorkList.pop_back();
+
+ // If N has no uses, it is dead. Make sure to revisit all N's operands once
+ // N is deleted from the DAG, since they too may now be dead or may have a
+ // reduced number of uses, allowing other xforms.
+ if (N->use_empty() && N != &Dummy) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ DAG.DeleteNode(N);
+ continue;
+ }
+
+ SDValue RV = combine(N);
+
+ if (RV.getNode() == 0)
+ continue;
+
+ ++NodesCombined;
+
+ // If we get back the same node we passed in, rather than a new node or
+ // zero, we know that the node must have defined multiple values and
+ // CombineTo was used. Since CombineTo takes care of the worklist
+ // mechanics for us, we have no work to do in this case.
+ if (RV.getNode() == N)
+ continue;
+
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned new node!");
+
+ DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG));
+ DOUT << '\n';
+ WorkListRemover DeadNodes(*this);
+ if (N->getNumValues() == RV.getNode()->getNumValues())
+ DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
+ else {
+ assert(N->getValueType(0) == RV.getValueType() &&
+ N->getNumValues() == 1 && "Type mismatch");
+ SDValue OpV = RV;
+ DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
+ }
+
+ // Push the new node and any users onto the worklist
+ AddToWorkList(RV.getNode());
+ AddUsersToWorkList(RV.getNode());
+
+ // Add any uses of the old node to the worklist in case this node is the
+ // last one that uses them. They may become dead after this node is
+ // deleted.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty()) {
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ }
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ DAG.setRoot(Dummy.getValue());
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: return visitTokenFactor(N);
+ case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
+ case ISD::ADD: return visitADD(N);
+ case ISD::SUB: return visitSUB(N);
+ case ISD::ADDC: return visitADDC(N);
+ case ISD::ADDE: return visitADDE(N);
+ case ISD::MUL: return visitMUL(N);
+ case ISD::SDIV: return visitSDIV(N);
+ case ISD::UDIV: return visitUDIV(N);
+ case ISD::SREM: return visitSREM(N);
+ case ISD::UREM: return visitUREM(N);
+ case ISD::MULHU: return visitMULHU(N);
+ case ISD::MULHS: return visitMULHS(N);
+ case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
+ case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
+ case ISD::SDIVREM: return visitSDIVREM(N);
+ case ISD::UDIVREM: return visitUDIVREM(N);
+ case ISD::AND: return visitAND(N);
+ case ISD::OR: return visitOR(N);
+ case ISD::XOR: return visitXOR(N);
+ case ISD::SHL: return visitSHL(N);
+ case ISD::SRA: return visitSRA(N);
+ case ISD::SRL: return visitSRL(N);
+ case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTPOP: return visitCTPOP(N);
+ case ISD::SELECT: return visitSELECT(N);
+ case ISD::SELECT_CC: return visitSELECT_CC(N);
+ case ISD::SETCC: return visitSETCC(N);
+ case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
+ case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
+ case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
+ case ISD::TRUNCATE: return visitTRUNCATE(N);
+ case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);
+ case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
+ case ISD::FADD: return visitFADD(N);
+ case ISD::FSUB: return visitFSUB(N);
+ case ISD::FMUL: return visitFMUL(N);
+ case ISD::FDIV: return visitFDIV(N);
+ case ISD::FREM: return visitFREM(N);
+ case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
+ case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
+ case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
+ case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::FP_ROUND: return visitFP_ROUND(N);
+ case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
+ case ISD::FP_EXTEND: return visitFP_EXTEND(N);
+ case ISD::FNEG: return visitFNEG(N);
+ case ISD::FABS: return visitFABS(N);
+ case ISD::BRCOND: return visitBRCOND(N);
+ case ISD::BR_CC: return visitBR_CC(N);
+ case ISD::LOAD: return visitLOAD(N);
+ case ISD::STORE: return visitSTORE(N);
+ case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
+ case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+ case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
+ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+ SDValue RV = visit(N);
+
+ // If nothing happened, try a target-specific DAG combine.
+ if (RV.getNode() == 0) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned NULL!");
+
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+ TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+ // Expose the DAG combiner to the target combiner impls.
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level == Unrestricted, false, this);
+
+ RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+ }
+ }
+
+ // If N is a commutative binary node, try commuting it to enable more
+ // sdisel CSE.
+ if (RV.getNode() == 0 &&
+ SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+ N->getNumValues() == 1) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Constant operands are canonicalized to RHS.
+ if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+ SDValue Ops[] = { N1, N0 };
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
+ Ops, 2);
+ if (CSENode)
+ return SDValue(CSENode, 0);
+ }
+ }
+
+ return RV;
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+ if (unsigned NumOps = N->getNumOperands()) {
+ if (N->getOperand(0).getValueType() == MVT::Other)
+ return N->getOperand(0);
+ else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+ return N->getOperand(NumOps-1);
+ for (unsigned i = 1; i < NumOps-1; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ return N->getOperand(i);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+ // If N has two operands, where one has an input chain equal to the other,
+ // the 'other' chain is redundant.
+ if (N->getNumOperands() == 2) {
+ if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+ return N->getOperand(0);
+ if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+ return N->getOperand(1);
+ }
+
+ SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallPtrSet<SDNode*, 16> SeenOps;
+ bool Changed = false; // If we should replace this token factor.
+
+ // Start out with this token factor.
+ TFs.push_back(N);
+
+ // Iterate through token factors. The TFs grows when new token factors are
+ // encountered.
+ for (unsigned i = 0; i < TFs.size(); ++i) {
+ SDNode *TF = TFs[i];
+
+ // Check each of the operands.
+ for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+ SDValue Op = TF->getOperand(i);
+
+ switch (Op.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry tokens don't need to be added to the list. They are
+ // rededundant.
+ Changed = true;
+ break;
+
+ case ISD::TokenFactor:
+ if ((CombinerAA || Op.hasOneUse()) &&
+ std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+ // Queue up for processing.
+ TFs.push_back(Op.getNode());
+ // Clean up in case the token factor is removed.
+ AddToWorkList(Op.getNode());
+ Changed = true;
+ break;
+ }
+ // Fall thru
+
+ default:
+ // Only add if it isn't already in the list.
+ if (SeenOps.insert(Op.getNode()))
+ Ops.push_back(Op);
+ else
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ SDValue Result;
+
+ // If we've change things around then replace token factor.
+ if (Changed) {
+ if (Ops.empty()) {
+ // The entry token is the only possible outcome.
+ Result = DAG.getEntryNode();
+ } else {
+ // New and improved token factor.
+ Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, &Ops[0], Ops.size());
+ }
+
+ // Don't add users to work list.
+ return CombineTo(N, Result, false);
+ }
+
+ return Result;
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+ WorkListRemover DeadNodes(*this);
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
+ &DeadNodes);
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+static
+SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
+ SelectionDAG &DAG) {
+ MVT VT = N0.getValueType();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+
+ if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N00.getOperand(1))) {
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
+ N00.getOperand(0), N01),
+ DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
+ N00.getOperand(1), N01));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (add x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (add c1, c2) -> c1+c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (add x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
+ GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), VT,
+ GA->getOffset() +
+ (uint64_t)N1C->getSExtValue());
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (N1C && N0.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(N1C->getAPIntValue()+
+ N0C->getAPIntValue(), VT),
+ N0.getOperand(1));
+ // reassociate add
+ SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
+ if (RADD.getNode() != 0)
+ return RADD;
+ // fold ((0-A) + B) -> B-A
+ if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+ cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
+ // fold (A + (0-B)) -> A-B
+ if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+ cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
+ // fold (A+(B-A)) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+ return N1.getOperand(0);
+ // fold ((B-A)+A) -> B
+ if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+ return N0.getOperand(0);
+ // fold (A+(B-(A+C))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(0))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ // fold (A+(B-(C+A))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(1))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(0));
+ // fold (A+((B-A)+or-C)) to (B+or-C)
+ if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+ N1.getOperand(0).getOpcode() == ISD::SUB &&
+ N0 == N1.getOperand(0).getOperand(1))
+ return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
+ N1.getOperand(0).getOperand(0), N1.getOperand(1));
+
+ // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
+ DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
+ }
+
+ if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (a+b) -> (a|b) iff a and b share no bits.
+ if (VT.isInteger() && !VT.isVector()) {
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
+ }
+ }
+
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
+ if (Result.getNode()) return Result;
+ }
+ if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an ADD.
+ if (N->hasNUsesOfValue(0, 1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Flag));
+
+ // canonicalize constant to RHS.
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+ // fold (addc x, 0) -> x + no carry out
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Flag));
+
+ // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(), MVT::Flag));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
+ N1, N0, CarryIn);
+
+ // fold (adde x, y, false) -> (addc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ MVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (sub x, x) -> 0
+ if (N0 == N1)
+ return DAG.getConstant(0, N->getValueType(0));
+ // fold (sub c1, c2) -> c1-c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+ // fold (sub x, c) -> (add x, -c)
+ if (N1C)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(-N1C->getAPIntValue(), VT));
+ // fold (A+B)-A -> B
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+ return N0.getOperand(1);
+ // fold (A+B)-B -> A
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+ return N0.getOperand(0);
+ // fold ((A+(B+or-C))-B) -> A+or-C
+ if (N0.getOpcode() == ISD::ADD &&
+ (N0.getOperand(1).getOpcode() == ISD::SUB ||
+ N0.getOperand(1).getOpcode() == ISD::ADD) &&
+ N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(1));
+ // fold ((A+(C+B))-B) -> A+C
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+ // fold ((A-(B-C))-C) -> A-B
+ if (N0.getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+
+ // If either operand of a sub is undef, the result is undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ // If the relocation model supports it, consider symbol offsets.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ // fold (sub Sym, c) -> Sym-c
+ if (N1C && GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), VT,
+ GA->getOffset() -
+ (uint64_t)N1C->getSExtValue());
+ // fold (sub Sym+c1, Sym+c2) -> c1-c2
+ if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+ if (GA->getGlobal() == GB->getGlobal())
+ return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+ VT);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (mul x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (mul c1, c2) -> c1*c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (mul x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mul x, -1) -> 0-x
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // fold (mul x, (1 << c)) -> x << c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy()));
+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+ if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
+ unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
+ // FIXME: If the input is something that is easily negated (e.g. a
+ // single-use add), we should put the negate there.
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT),
+ DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(Log2Val, getShiftAmountTy())));
+ }
+ // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ N1, N0.getOperand(1));
+ AddToWorkList(C3.getNode());
+ return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), C3);
+ }
+
+ // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+ // use.
+ {
+ SDValue Sh(0,0), Y(0,0);
+ // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
+ if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getNode()->hasOneUse()) {
+ Sh = N0; Y = N1;
+ } else if (N1.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N1.getOperand(1)) &&
+ N1.getNode()->hasOneUse()) {
+ Sh = N1; Y = N0;
+ }
+
+ if (Sh.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+ Mul, Sh.getOperand(1));
+ }
+ }
+
+ // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+ if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ // reassociate mul
+ SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
+ if (RMUL.getNode() != 0)
+ return RMUL;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (sdiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+ // fold (sdiv X, 1) -> X
+ if (N1C && N1C->getSExtValue() == 1LL)
+ return N0;
+ // fold (sdiv X, -1) -> 0-X
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), N0);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
+ N0, N1);
+ }
+ // fold (sdiv X, pow2) -> simple ops after legalize
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
+ (isPowerOf2_64(N1C->getSExtValue()) ||
+ isPowerOf2_64(-N1C->getSExtValue()))) {
+ // If dividing by powers of two is cheap, then don't perform the following
+ // fold.
+ if (TLI.isPow2DivCheap())
+ return SDValue();
+
+ int64_t pow2 = N1C->getSExtValue();
+ int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+ unsigned lg2 = Log2_64(abs2);
+
+ // Splat the sign bit into the register
+ SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy()));
+ AddToWorkList(SGN.getNode());
+
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
+ DAG.getConstant(VT.getSizeInBits() - lg2,
+ getShiftAmountTy()));
+ SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
+ AddToWorkList(SRL.getNode());
+ AddToWorkList(ADD.getNode()); // Divide by pow2
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
+ DAG.getConstant(lg2, getShiftAmountTy()));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (pow2 > 0)
+ return SRA;
+
+ AddToWorkList(SRA.getNode());
+ return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+ DAG.getConstant(0, VT), SRA);
+ }
+
+ // if integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence.
+ if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
+ !TLI.isIntDivCheap()) {
+ SDValue Op = BuildSDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (udiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+ // fold (udiv x, (1 << c)) -> x >>u c
+ if (N1C && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(),
+ getShiftAmountTy()));
+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ MVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
+ N1.getOperand(1),
+ DAG.getConstant(SHC->getAPIntValue()
+ .logBase2(),
+ ADDVT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+ // fold (udiv x, c) -> alternate
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+ SDValue Op = BuildUDIV(N);
+ if (Op.getNode()) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold (srem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold (urem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
+ DAG.getConstant(N1C->getAPIntValue()-1,VT));
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ SDValue Add =
+ DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+ VT));
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
+ }
+ }
+ }
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ if (N1C && !N1C->isNullValue()) {
+ SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
+ AddToWorkList(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+ OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+ AddToWorkList(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold (mulhs x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhs x, 1) -> (sra x, size(x)-1)
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
+ DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+ getShiftAmountTy()));
+ // fold (mulhs x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold (mulhu x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhu x, 1) -> 0
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getConstant(0, N0.getValueType());
+ // fold (mulhu x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ return SDValue();
+}
+
+/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
+/// compute two values. LoOp and HiOp give the opcodes for the two computations
+/// that are being performed. Return true if a simplification was made.
+///
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp) {
+ // If the high half is not needed, just compute the low half.
+ bool HiExists = N->hasAnyUseOfValue(1);
+ if (!HiExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
+ SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If the low half is not needed, just compute the high half.
+ bool LoExists = N->hasAnyUseOfValue(0);
+ if (!LoExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+ SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If both halves are used, return as it is.
+ if (LoExists && HiExists)
+ return SDValue();
+
+ // If the two computed results can be simplified separately, separate them.
+ if (LoExists) {
+ SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Lo.getNode());
+ SDValue LoOpt = combine(Lo.getNode());
+ if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+ return CombineTo(N, LoOpt, LoOpt);
+ }
+
+ if (HiExists) {
+ SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+ N->op_begin(), N->getNumOperands());
+ AddToWorkList(Hi.getNode());
+ SDValue HiOpt = combine(Hi.getNode());
+ if (HiOpt.getNode() && HiOpt != Hi &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+ return CombineTo(N, HiOpt, HiOpt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
+ SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
+ if (Res.getNode()) return Res;
+
+ return SDValue();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ MVT VT = N0.getValueType();
+ assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+
+ // For each of OP in AND/OR/XOR:
+ // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+ // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+ // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ (N0.getOpcode() == ISD::TRUNCATE &&
+ !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);
+ }
+
+ // For each of OP in SHL/SRL/SRA/AND...
+ // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+ // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
+ // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+ N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ORNode, N0.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N1.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (and x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (and c1, c2) -> c1&c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);
+ // fold (and x, -1) -> x
+ if (N1C && N1C->isAllOnesValue())
+ return N0;
+ // if (and x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(BitWidth)))
+ return DAG.getConstant(0, VT);
+ // reassociate and
+ SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
+ if (RAND.getNode() != 0)
+ return RAND;
+ // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+ if (N1C && N0.getOpcode() == ISD::OR)
+ if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+ return N1;
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N0Op0 = N0.getOperand(0);
+ APInt Mask = ~N1C->getAPIntValue();
+ Mask.trunc(N0Op0.getValueSizeInBits());
+ if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
+ N0.getValueType(), N0Op0);
+
+ // Replace uses of the AND with uses of the Zero extend node.
+ CombineTo(N, Zext);
+
+ // We actually want to replace all uses of the any_extend with the
+ // zero_extend, to avoid duplicating things. This will later cause this
+ // AND to be folded.
+ CombineTo(N0.getNode(), Zext);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+ SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT EVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - EVT.getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT EVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - EVT.getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (and (load x), 255) -> (zextload x, i8)
+ // fold (and (extload x, i16), 255) -> (zextload x, i8)
+ if (N1C && N0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+ LN0->isUnindexed() && N0.hasOneUse() &&
+ // Do not change the width of a volatile load.
+ !LN0->isVolatile()) {
+ MVT EVT = MVT::Other;
+ uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
+ if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
+ EVT = MVT::getIntegerVT(ActiveBits);
+
+ MVT LoadedVT = LN0->getMemoryVT();
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() &&
+ (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+ MVT PtrType = N0.getOperand(1).getValueType();
+
+ // For big endian targets, we need to add an offset to the pointer to
+ // load the correct bytes. For little endian systems, we merely need to
+ // read fewer bytes from the same pointer.
+ unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8;
+ unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8;
+ unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+ unsigned Alignment = LN0->getAlignment();
+ SDValue NewPtr = LN0->getBasePtr();
+
+ if (TLI.isBigEndian()) {
+ NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+ NewPtr, DAG.getConstant(PtrOff, PtrType));
+ Alignment = MinAlign(Alignment, PtrOff);
+ }
+
+ AddToWorkList(NewPtr.getNode());
+ SDValue Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
+ NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ EVT, LN0->isVolatile(), Alignment);
+ AddToWorkList(N);
+ CombineTo(N0.getNode(), Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N1.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (or x, undef) -> -1
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(~0ULL, VT);
+ // fold (or c1, c2) -> c1|c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);
+ // fold (or x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (or x, -1) -> -1
+ if (N1C && N1C->isAllOnesValue())
+ return N1;
+ // fold (or x, c) -> c iff (x & ~c) == 0
+ if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+ return N1;
+ // reassociate or
+ SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
+ if (ROR.getNode() != 0)
+ return ROR;
+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+ }
+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+ // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+ }
+ // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+ // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),
+ LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.getNode());
+ return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+ return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ N1.getOperand(1).getOpcode() == ISD::Constant &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &RHSMask =
+ cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,
+ DAG.getConstant(LHSMask | RHSMask, VT));
+ }
+ }
+
+ // See if this is some rotate idiom.
+ if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
+ return SDValue(Rot, 0);
+
+ return SDValue();
+}
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
+ if (Op.getOpcode() == ISD::AND) {
+ if (isa<ConstantSDNode>(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ Op = Op.getOperand(0);
+ } else {
+ return false;
+ }
+ }
+
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+ Shift = Op;
+ return true;
+ }
+
+ return false;
+}
+
+// MatchRotate - Handle an 'or' of two operands. If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
+ // Must be a legal type. Expanded 'n promoted things won't work with rotates.
+ MVT VT = LHS.getValueType();
+ if (!TLI.isTypeLegal(VT)) return 0;
+
+ // The target must have at least one rotate flavor.
+ bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
+ bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+ if (!HasROTL && !HasROTR) return 0;
+
+ // Match "(X shl/srl V1) & V2" where V2 may not be present.
+ SDValue LHSShift; // The shift.
+ SDValue LHSMask; // AND value if any.
+ if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+ return 0; // Not part of a rotate.
+
+ SDValue RHSShift; // The shift.
+ SDValue RHSMask; // AND value if any.
+ if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+ return 0; // Not part of a rotate.
+
+ if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+ return 0; // Not shifting the same value.
+
+ if (LHSShift.getOpcode() == RHSShift.getOpcode())
+ return 0; // Shifts must disagree.
+
+ // Canonicalize shl to left side in a shl/srl pair.
+ if (RHSShift.getOpcode() == ISD::SHL) {
+ std::swap(LHS, RHS);
+ std::swap(LHSShift, RHSShift);
+ std::swap(LHSMask , RHSMask );
+ }
+
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ SDValue LHSShiftArg = LHSShift.getOperand(0);
+ SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+ RHSShiftAmt.getOpcode() == ISD::Constant) {
+ uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
+ uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
+ if ((LShVal + RShVal) != OpSizeInBits)
+ return 0;
+
+ SDValue Rot;
+ if (HasROTL)
+ Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
+ else
+ Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+
+ // If there is an AND of either shifted operand, apply it to the result.
+ if (LHSMask.getNode() || RHSMask.getNode()) {
+ APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+
+ if (LHSMask.getNode()) {
+ APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
+ Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+ }
+ if (RHSMask.getNode()) {
+ APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
+ Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+ }
+
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+ }
+
+ return Rot.getNode();
+ }
+
+ // If there is a mask here, and we have a variable shift, we can't be sure
+ // that we're masking out the right stuff.
+ if (LHSMask.getNode() || RHSMask.getNode())
+ return 0;
+
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+ if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+ LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ if (HasROTL)
+ return DAG.getNode(ISD::ROTL, DL, VT,
+ LHSShiftArg, LHSShiftAmt).getNode();
+ else
+ return DAG.getNode(ISD::ROTR, DL, VT,
+ LHSShiftArg, RHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+ if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+ RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ if (HasROTR)
+ return DAG.getNode(ISD::ROTR, DL, VT,
+ LHSShiftArg, RHSShiftAmt).getNode();
+ else
+ return DAG.getNode(ISD::ROTL, DL, VT,
+ LHSShiftArg, LHSShiftAmt).getNode();
+ }
+ }
+ }
+
+ // Look for sign/zext/any-extended or truncate cases:
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
+ SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
+ if (RExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0.getOperand(1) == LExtOp0) {
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y)
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotr x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ }
+ }
+ } else if (LExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0 == LExtOp0.getOperand(1)) {
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotr x, y)
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotl x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+ if (SUBC->getAPIntValue() == OpSizeInBits) {
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
+ LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue LHS, RHS, CC;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (xor x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (xor c1, c2) -> c1^c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold (xor x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // reassociate xor
+ SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);
+ if (RXOR.getNode() != 0)
+ return RXOR;
+
+ // fold !(x cc y) -> (x !cc y)
+ if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ bool isInt = LHS.getValueType().isInteger();
+ ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ isInt);
+
+ if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
+ switch (N0.getOpcode()) {
+ default:
+ assert(0 && "Unhandled SetCC Equivalent!");
+ abort();
+ case ISD::SETCC:
+ return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
+ case ISD::SELECT_CC:
+ return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),
+ N0.getOperand(3), NotCC);
+ }
+ }
+ }
+
+ // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+ if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getNode()->hasOneUse() &&
+ isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+ SDValue V = N0.getOperand(0);
+ V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,
+ DAG.getConstant(1, V.getValueType()));
+ AddToWorkList(V.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);
+ }
+
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+ if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+ if (N1C && N1C->isAllOnesValue() &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+ }
+ }
+ // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
+ if (N1C && N0.getOpcode() == ISD::XOR) {
+ ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N00C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N00C->getAPIntValue(), VT));
+ if (N01C)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N01C->getAPIntValue(), VT));
+ }
+ // fold (xor x, x) -> 0
+ if (N0 == N1) {
+ if (!VT.isVector()) {
+ return DAG.getConstant(0, VT);
+ } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
+ // Produce a vector of zeros.
+ SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+ std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+ }
+
+ // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
+ }
+
+ // Simplify the expression using non-local knowledge.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// visitShiftByConstant - Handle transforms common to the three shifts, when
+/// the shift amount is a constant.
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+ SDNode *LHS = N->getOperand(0).getNode();
+ if (!LHS->hasOneUse()) return SDValue();
+
+ // We want to pull some binops through shifts, so that we have (and (shift))
+ // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
+ // thing happens with address calculations, so it's important to canonicalize
+ // it.
+ bool HighBitSet = false; // Can we transform this if the high bit is set?
+
+ switch (LHS->getOpcode()) {
+ default: return SDValue();
+ case ISD::OR:
+ case ISD::XOR:
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ case ISD::AND:
+ HighBitSet = true; // We can only transform sra if the high bit is set.
+ break;
+ case ISD::ADD:
+ if (N->getOpcode() != ISD::SHL)
+ return SDValue(); // only shl(add) not sr[al](add).
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ }
+
+ // We require the RHS of the binop to be a constant as well.
+ ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ if (!BinOpCst) return SDValue();
+
+ // FIXME: disable this unless the input to the binop is a shift by a constant.
+ // If it is not a shift, it pessimizes some common cases like:
+ //
+ // void foo(int *X, int i) { X[i & 1235] = 1; }
+ // int bar(int *X, int i) { return X[i & 255]; }
+ SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
+ if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
+ BinOpLHSVal->getOpcode() != ISD::SRA &&
+ BinOpLHSVal->getOpcode() != ISD::SRL) ||
+ !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+ return SDValue();
+
+ MVT VT = N->getValueType(0);
+
+ // If this is a signed shift right, and the high bit is modified by the
+ // logical operation, do not perform the transformation. The highBitSet
+ // boolean indicates the value of the high bit of the constant which would
+ // cause it to be modified for this operation.
+ if (N->getOpcode() == ISD::SRA) {
+ bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
+ if (BinOpRHSSignSet != HighBitSet)
+ return SDValue();
+ }
+
+ // Fold the constants, shifting the binop RHS by the shift amount.
+ SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),
+ N->getValueType(0),
+ LHS->getOperand(1), N->getOperand(1));
+
+ // Create the new shift.
+ SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
+ VT, LHS->getOperand(0), N->getOperand(1));
+
+ // Create the new binop.
+ return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getSizeInBits();
+
+ // fold (shl c1, c2) -> c1<<c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+ // fold (shl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (shl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (shl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (shl x, c) is known to be zero, return 0
+ if (DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(VT.getSizeInBits())))
+ return DAG.getConstant(0, VT);
+ // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ MVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 > OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+ // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
+ // (srl (and x, (shl -1, c1)), (sub c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL << c1, VT));
+ if (c2 > c1)
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
+ DAG.getConstant(c2-c1, N1.getValueType()));
+ else
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
+ DAG.getConstant(c1-c2, N1.getValueType()));
+ }
+ // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+ if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL << N1C->getZExtValue(), VT));
+
+ return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+
+ // fold (sra c1, c2) -> (sra c1, c2)
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+ // fold (sra 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (sra -1, x) -> -1
+ if (N0C && N0C->isAllOnesValue())
+ return N0;
+ // fold (sra x, (setge c, size(x))) -> undef
+ if (N1C && N1C->getZExtValue() >= VT.getSizeInBits())
+ return DAG.getUNDEF(VT);
+ // fold (sra x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+ // sext_inreg.
+ if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+ unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue();
+ MVT EVT = MVT::getIntegerVT(LowBits);
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getValueType(EVT));
+ }
+
+ // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRA) {
+ if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
+ if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1;
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(Sum, N1C->getValueType(0)));
+ }
+ }
+
+ // fold (sra (shl X, m), (sub result_size, n))
+ // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+ // result_size - n != m.
+ // If truncate is free for the target sext(shl) is likely to result in better
+ // code.
+ if (N0.getOpcode() == ISD::SHL) {
+ // Get the two constanst of the shifts, CN0 = m, CN = n.
+ const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N01C && N1C) {
+ // Determine what the truncate's result bitsize and type would be.
+ unsigned VTValSize = VT.getSizeInBits();
+ MVT TruncVT =
+ MVT::getIntegerVT(VTValSize - N1C->getZExtValue());
+ // Determine the residual right-shift amount.
+ signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+ // If the shift is not a no-op (in which case this should be just a sign
+ // extend already), the truncated to type is legal, sign_extend is legal
+ // on that type, and the the truncate to that type is both legal and free,
+ // perform the transform.
+ if ((ShiftAmt > 0) &&
+ TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+ TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+
+ SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy());
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
+ N0.getOperand(0), Amt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
+ Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
+ N->getValueType(0), Trunc);
+ }
+ }
+ }
+
+ // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ MVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // Simplify, based on bits shifted out of the LHS.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+
+ // If the sign bit is known to be zero, switch this to a SRL.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
+
+ return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getSizeInBits();
+
+ // fold (srl c1, c2) -> c1 >>u c2
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+ // fold (srl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (srl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (srl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (srl x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, VT);
+
+ // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 > OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+
+ // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ // Shifting in all undef bits?
+ MVT SmallVT = N0.getOperand(0).getValueType();
+ if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+ return DAG.getUNDEF(VT);
+
+ SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+ N0.getOperand(0), N1);
+ AddToWorkList(SmallShift.getNode());
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+ }
+
+ // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
+ // bit, which is unmodified by sra.
+ if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+ if (N0.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);
+ }
+
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
+ if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+
+ // If any of the input bits are KnownOne, then the input couldn't be all
+ // zeros, thus the result of the srl will always be zero.
+ if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+
+ // If all of the bits input the to ctlz node are known to be zero, then
+ // the result of the ctlz is "32" and the result of the shift is one.
+ APInt UnknownBits = ~KnownZero & Mask;
+ if (UnknownBits == 0) return DAG.getConstant(1, VT);
+
+ // Otherwise, check to see if there is exactly one bit input to the ctlz.
+ if ((UnknownBits & (UnknownBits - 1)) == 0) {
+ // Okay, we know that only that the single bit specified by UnknownBits
+ // could be set on input to the CTLZ node. If this bit is set, the SRL
+ // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+ // to an SRL/XOR pair, which is likely to simplify more.
+ unsigned ShAmt = UnknownBits.countTrailingZeros();
+ SDValue Op = N0.getOperand(0);
+
+ if (ShAmt) {
+ Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
+ DAG.getConstant(ShAmt, getShiftAmountTy()));
+ AddToWorkList(Op.getNode());
+ }
+
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ Op, DAG.getConstant(1, VT));
+ }
+ }
+
+ // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND &&
+ N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+ MVT TruncVT = N1.getValueType();
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ APInt TruncC = N101C->getAPIntValue();
+ TruncC.trunc(TruncVT.getSizeInBits());
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+ DAG.getNode(ISD::AND, N->getDebugLoc(),
+ TruncVT,
+ DAG.getNode(ISD::TRUNCATE,
+ N->getDebugLoc(),
+ TruncVT, N100),
+ DAG.getConstant(TruncC, TruncVT)));
+ }
+ }
+
+ // fold operands of srl based on knowledge that the low bits are not
+ // demanded.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (ctlz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (cttz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (ctpop c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ MVT VT = N->getValueType(0);
+ MVT VT0 = N0.getValueType();
+
+ // fold (select C, X, X) -> X
+ if (N1 == N2)
+ return N1;
+ // fold (select true, X, Y) -> X
+ if (N0C && !N0C->isNullValue())
+ return N1;
+ // fold (select false, X, Y) -> Y
+ if (N0C && N0C->isNullValue())
+ return N2;
+ // fold (select C, 1, X) -> (or C, X)
+ if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select C, 0, 1) -> (xor C, 1)
+ if (VT.isInteger() &&
+ (VT0 == MVT::i1 ||
+ (VT0.isInteger() &&
+ TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&
+ N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+ SDValue XORNode;
+ if (VT == VT0)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,
+ N0, DAG.getConstant(1, VT0));
+ AddToWorkList(XORNode.getNode());
+ if (VT.bitsGT(VT0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);
+ }
+ // fold (select C, 0, X) -> (and (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);
+ }
+ // fold (select C, X, 1) -> (or (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+ SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+ AddToWorkList(NOTNode.getNode());
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);
+ }
+ // fold (select C, X, 0) -> (and C, X)
+ if (VT == MVT::i1 && N2C && N2C->isNullValue())
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+ // fold (select X, X, Y) -> (or X, Y)
+ // fold (select X, 1, Y) -> (or X, Y)
+ if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+ // fold (select X, Y, X) -> (and X, Y)
+ // fold (select X, Y, 0) -> (and X, Y)
+ if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold selects based on a setcc into other things, such as min/max/abs
+ if (N0.getOpcode() == ISD::SETCC) {
+ // FIXME:
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other))
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ N1, N2, N0.getOperand(2));
+ return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue N4 = N->getOperand(4);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+ // fold select_cc lhs, rhs, x, x, cc -> x
+ if (N2 == N3)
+ return N2;
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, N->getDebugLoc(), false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+
+ if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+ if (!SCCC->isNullValue())
+ return N2; // cond always true -> true val
+ else
+ return N3; // cond always false -> false val
+ }
+
+ // Fold to a simpler select_cc
+ if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+ SCC.getOperand(2));
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N2, N3))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold select_cc into other things, such as min/max/abs
+ return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+ return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get(),
+ N->getDebugLoc());
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+ unsigned ExtOpc,
+ SmallVector<SDNode*, 4> &ExtendNodes,
+ const TargetLowering &TLI) {
+ bool HasCopyToRegUses = false;
+ bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+ for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+ UE = N0.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == N)
+ continue;
+ if (UI.getUse().getResNo() != N0.getResNo())
+ continue;
+ // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+ if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+ if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+ // Sign bits will be lost after a zext.
+ return false;
+ bool Add = false;
+ for (unsigned i = 0; i != 2; ++i) {
+ SDValue UseOp = User->getOperand(i);
+ if (UseOp == N0)
+ continue;
+ if (!isa<ConstantSDNode>(UseOp))
+ return false;
+ Add = true;
+ }
+ if (Add)
+ ExtendNodes.push_back(User);
+ continue;
+ }
+ // If truncates aren't free and there are users we can't
+ // extend, it isn't worthwhile.
+ if (!isTruncFree)
+ return false;
+ // Remember if this value is live-out.
+ if (User->getOpcode() == ISD::CopyToReg)
+ HasCopyToRegUses = true;
+ }
+
+ if (HasCopyToRegUses) {
+ bool BothLiveOut = false;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+ BothLiveOut = true;
+ break;
+ }
+ }
+ if (BothLiveOut)
+ // Both unextended and extended values are live out. There had better be
+ // good a reason for the transformation.
+ return ExtendNodes.size();
+ }
+ return true;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (sext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // fold (sext (sext x)) -> (sext x)
+ // fold (sext (aext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (sext (truncate (load x))) -> (sext (smaller load x))
+ // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ if (NarrowLoad.getNode() != N0.getNode())
+ CombineTo(N0.getNode(), NarrowLoad);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ SDValue Op = N0.getOperand(0);
+ unsigned OpBits = Op.getValueType().getSizeInBits();
+ unsigned MidBits = N0.getValueType().getSizeInBits();
+ unsigned DestBits = VT.getSizeInBits();
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return Op;
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ }
+
+ // fold (sext (truncate x)) -> (sextinreg x).
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+ N0.getValueType())) {
+ if (Op.getValueType().bitsLT(VT))
+ Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
+ else if (Op.getValueType().bitsGT(VT))
+ Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
+ DAG.getValueType(N0.getValueType()));
+ }
+ }
+
+ // fold (sext (load x)) -> (sext (truncate (sextload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
+ N->getDebugLoc(), VT, SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+ // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+ if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT EVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ }
+
+ // fold (sext x) -> (zext x) if the sign bit is known zero.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+ DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (zext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (zext (zext x)) -> (zext x)
+ // fold (zext (aext x)) -> (zext x)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ if (NarrowLoad.getNode() != N0.getNode())
+ CombineTo(N0.getNode(), NarrowLoad);
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ }
+ }
+
+ // fold (zext (truncate x)) -> (and x, mask)
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+ SDValue Op = N0.getOperand(0);
+ if (Op.getValueType().bitsLT(VT)) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+ } else if (Op.getValueType().bitsGT(VT)) {
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ }
+ return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType());
+ }
+
+ // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+ // if either of the casts is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (zext (load x)) -> (zext (truncate (zextload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
+ N->getDebugLoc(), VT, SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+ // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+ if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT EVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
+ ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode()) return SCC;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // fold (aext (truncate (load x))) -> (aext (smaller load x))
+ // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ if (NarrowLoad.getNode() != N0.getNode())
+ CombineTo(N0.getNode(), NarrowLoad);
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ }
+ }
+
+ // fold (aext (truncate x))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue TruncOp = N0.getOperand(0);
+ if (TruncOp.getValueType() == VT)
+ return TruncOp; // x iff x size == zext size.
+ if (TruncOp.getValueType().bitsGT(VT))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
+ }
+
+ // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+ // if the trunc is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType())) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (aext (load x)) -> (aext (truncate (extload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
+ N->getDebugLoc(), VT, SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+ SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+ // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+ // fold (aext ( extload x)) -> (aext (truncate (extload x)))
+ if (N0.getOpcode() == ISD::LOAD &&
+ !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT EVT = LN0->getMemoryVT();
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue SCC =
+ SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.getNode())
+ return SCC;
+ }
+
+ return SDValue();
+}
+
+/// GetDemandedBits - See if the specified operand can be simplified with the
+/// knowledge that only the bits specified by Mask are used. If so, return the
+/// simpler operand, otherwise return a null SDValue.
+SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::OR:
+ case ISD::XOR:
+ // If the LHS or RHS don't contribute bits to the or, drop them.
+ if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
+ return V.getOperand(1);
+ if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
+ return V.getOperand(0);
+ break;
+ case ISD::SRL:
+ // Only look at single-use SRLs.
+ if (!V.getNode()->hasOneUse())
+ break;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ // See if we can recursively simplify the LHS.
+ unsigned Amt = RHSC->getZExtValue();
+
+ // Watch out for shift count overflow though.
+ if (Amt >= Mask.getBitWidth()) break;
+ APInt NewMask = Mask << Amt;
+ SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
+ if (SimplifyLHS.getNode())
+ return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),
+ SimplifyLHS, V.getOperand(1));
+ }
+ }
+ return SDValue();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+ MVT EVT = VT;
+
+ // This transformation isn't valid for vector loads.
+ if (VT.isVector())
+ return SDValue();
+
+ // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then
+ // extended to VT.
+ if (Opc == ISD::SIGN_EXTEND_INREG) {
+ ExtType = ISD::SEXTLOAD;
+ EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))
+ return SDValue();
+ }
+
+ unsigned EVTBits = EVT.getSizeInBits();
+ unsigned ShAmt = 0;
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShAmt = N01->getZExtValue();
+ // Is the shift amount a multiple of size of VT?
+ if ((ShAmt & (EVTBits-1)) == 0) {
+ N0 = N0.getOperand(0);
+ if (N0.getValueType().getSizeInBits() <= EVTBits)
+ return SDValue();
+ }
+ }
+ }
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (isa<LoadSDNode>(N0) && N0.hasOneUse() && EVT.isRound() &&
+ cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
+ // Do not change the width of a volatile load.
+ !cast<LoadSDNode>(N0)->isVolatile()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT PtrType = N0.getOperand(1).getValueType();
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = EVT.getStoreSizeInBits();
+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ }
+
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.getNode());
+
+ SDValue Load = (ExtType == ISD::NON_EXTLOAD)
+ ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
+ LN0->isVolatile(), NewAlign)
+ : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
+ EVT, LN0->isVolatile(), NewAlign);
+
+ // Replace the old load's chain with the new load's chain.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+ &DeadNodes);
+
+ // Return the new loaded value.
+ return Load;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ MVT VT = N->getValueType(0);
+ MVT EVT = cast<VTSDNode>(N1)->getVT();
+ unsigned VTBits = VT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ // fold (sext_in_reg c1) -> c1
+ if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
+
+ // If the input is already sign extended, just drop the extension.
+ if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1)
+ return N0;
+
+ // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ }
+
+ // fold (sext_in_reg (sext x)) -> (sext x)
+ // fold (sext_in_reg (aext x)) -> (sext x)
+ // if x is small enough.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getValueType().getSizeInBits() < EVTBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
+ }
+
+ // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+ if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+ return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);
+
+ // fold operands of sext_in_reg based on knowledge that the top bits are not
+ // demanded.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (sext_in_reg (load x)) -> (smaller sextload x)
+ // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+ SDValue NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.getNode())
+ return NarrowLoad;
+
+ // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+ // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+ // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+ if (N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) {
+ // We can turn this into an SRA iff the input to the SRL is already sign
+ // extended enough.
+ unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+ if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1));
+ }
+ }
+
+ // fold (sext_inreg (extload x)) -> (sextload x)
+ if (ISD::isEXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse() &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // noop truncate
+ if (N0.getValueType() == N->getValueType(0))
+ return N0;
+ // fold (truncate c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);
+ // fold (truncate (truncate x)) -> (truncate x)
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+ N0.getOpcode() == ISD::ANY_EXTEND) {
+ if (N0.getOperand(0).getValueType().bitsLT(VT))
+ // if the source is smaller than the dest, we still need an extend
+ return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0));
+ else if (N0.getOperand(0).getValueType().bitsGT(VT))
+ // if the source is larger than the dest, than we just need the truncate
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+ else
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate
+ return N0.getOperand(0);
+ }
+
+ // See if we can simplify the input to this truncate through knowledge that
+ // only the low bits are being used. For example "trunc (or (shl x, 8), y)"
+ // -> trunc y
+ SDValue Shorter =
+ GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (Shorter.getNode())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
+
+ // fold (truncate (load x)) -> (smaller load x)
+ // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+ return ReduceLoadWidth(N);
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+ SDValue Elt = N->getOperand(i);
+ if (Elt.getOpcode() != ISD::MERGE_VALUES)
+ return Elt.getNode();
+ return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
+ assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+ SDNode *LD1 = getBuildPairElt(N, 0);
+ if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+ return SDValue();
+ MVT LD1VT = LD1->getValueType(0);
+ SDNode *LD2 = getBuildPairElt(N, 1);
+ const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+ if (ISD::isNON_EXTLoad(LD2) &&
+ LD2->hasOneUse() &&
+ // If both are volatile this would reduce the number of volatile loads.
+ // If one is volatile it might be ok, but play conservative and bail out.
+ !cast<LoadSDNode>(LD1)->isVolatile() &&
+ !cast<LoadSDNode>(LD2)->isVolatile() &&
+ TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
+ LoadSDNode *LD = cast<LoadSDNode>(LD1);
+ unsigned Align = LD->getAlignment();
+ unsigned NewAlign = TLI.getTargetData()->
+ getABITypeAlignment(VT.getTypeForMVT());
+
+ if (NewAlign <= Align &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
+ return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(),
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ false, Align);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+
+ // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+ // Only do this before legalize, since afterward the target may be depending
+ // on the bitconvert.
+ // First check to see if this is all constant.
+ if (!LegalTypes &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+ VT.isVector()) {
+ bool isSimple = true;
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+ if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+ N0.getOperand(i).getOpcode() != ISD::Constant &&
+ N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+ isSimple = false;
+ break;
+ }
+
+ MVT DestEltVT = N->getValueType(0).getVectorElementType();
+ assert(!DestEltVT.isVector() &&
+ "Element type of vector ValueType must not be vector!");
+ if (isSimple)
+ return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+ }
+
+ // If the input is a constant, let getNode fold it.
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
+ if (Res.getNode() != N) return Res;
+ }
+
+ // (conv (conv x, t1), t2) -> (conv x, t2)
+ if (N0.getOpcode() == ISD::BIT_CONVERT)
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
+ N0.getOperand(0));
+
+ // fold (conv (load x)) -> (load (conv*)x)
+ // If the resultant load doesn't need a higher alignment than the original!
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not change the width of a volatile load.
+ !cast<LoadSDNode>(N0)->isVolatile() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ unsigned Align = TLI.getTargetData()->
+ getABITypeAlignment(VT.getTypeForMVT());
+ unsigned OrigAlign = LN0->getAlignment();
+
+ if (Align <= OrigAlign) {
+ SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
+ LN0->getBasePtr(),
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->isVolatile(), OrigAlign);
+ AddToWorkList(N);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ N0.getValueType(), Load),
+ Load.getValue(1));
+ return Load;
+ }
+ }
+
+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ // This often reduces constant pool loads.
+ if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+ N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
+ SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
+ N0.getOperand(0));
+ AddToWorkList(NewConv.getNode());
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ if (N0.getOpcode() == ISD::FNEG)
+ return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(SignBit, VT));
+ assert(N0.getOpcode() == ISD::FABS);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ NewConv, DAG.getConstant(~SignBit, VT));
+ }
+
+ // fold (bitconvert (fcopysign cst, x)) ->
+ // (or (and (bitconvert x), sign), (and cst, (not sign)))
+ // Note that we don't handle (copysign x, cst) because this can always be
+ // folded to an fneg or fabs.
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(0)) &&
+ VT.isInteger() && !VT.isVector()) {
+ unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+ MVT IntXVT = MVT::getIntegerVT(OrigXWidth);
+ if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
+ SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ IntXVT, N0.getOperand(1));
+ AddToWorkList(X.getNode());
+
+ // If X has a different width than the result/lhs, sext it or truncate it.
+ unsigned VTWidth = VT.getSizeInBits();
+ if (OrigXWidth < VTWidth) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ } else if (OrigXWidth > VTWidth) {
+ // To get the sign bit in the right place, we have to shift it right
+ // before truncating.
+ X = DAG.getNode(ISD::SRL, X.getDebugLoc(),
+ X.getValueType(), X,
+ DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+ AddToWorkList(X.getNode());
+ X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+ AddToWorkList(X.getNode());
+ }
+
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,
+ X, DAG.getConstant(SignBit, VT));
+ AddToWorkList(X.getNode());
+
+ SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ VT, N0.getOperand(0));
+ Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
+ Cst, DAG.getConstant(~SignBit, VT));
+ AddToWorkList(Cst.getNode());
+
+ return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);
+ }
+ }
+
+ // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+ if (N0.getOpcode() == ISD::BUILD_PAIR) {
+ SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
+ if (CombineLD.getNode())
+ return CombineLD;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+ MVT VT = N->getValueType(0);
+ return CombineConsecutiveLoads(N, VT);
+}
+
+/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
+/// destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
+ MVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+ // If this is already the right type, we're done.
+ if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+ unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+ unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+ // If this is a conversion of N elements of one type to N elements of another
+ // type, convert each element. This handles FP<->INT cases.
+ if (SrcBitSize == DstBitSize) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ SDValue Op = BV->getOperand(i);
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ if (Op.getValueType() != SrcEltVT)
+ Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ DstEltVT, Op));
+ AddToWorkList(Ops.back().getNode());
+ }
+ MVT VT = MVT::getVectorVT(DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Otherwise, we're growing or shrinking the elements. To avoid having to
+ // handle annoying details of growing/shrinking FP values, we convert them to
+ // int first.
+ if (SrcEltVT.isFloatingPoint()) {
+ // Convert the input float vector to a int vector where the elements are the
+ // same sizes.
+ assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+ MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits());
+ BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
+ SrcEltVT = IntVT;
+ }
+
+ // Now we know the input is an integer vector. If the output is a FP type,
+ // convert to integer first, then to FP of the right size.
+ if (DstEltVT.isFloatingPoint()) {
+ assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+ MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits());
+ SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+ // Next, convert to FP elements of the same size.
+ return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+ }
+
+ // Okay, we know the src/dst types are both integers of differing types.
+ // Handling growing first.
+ assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+ if (SrcBitSize < DstBitSize) {
+ unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+ i += NumInputsPerOutput) {
+ bool isLE = TLI.isLittleEndian();
+ APInt NewBits = APInt(DstBitSize, 0);
+ bool EltIsUndef = true;
+ for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+ // Shift the previously computed bits over.
+ NewBits <<= SrcBitSize;
+ SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+ if (Op.getOpcode() == ISD::UNDEF) continue;
+ EltIsUndef = false;
+
+ NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+ zextOrTrunc(SrcBitSize).zext(DstBitSize));
+ }
+
+ if (EltIsUndef)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+ }
+
+ MVT VT = MVT::getVectorVT(DstEltVT, Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+
+ // Finally, this must be the case where we are shrinking elements: each input
+ // turns into multiple outputs.
+ bool isS2V = ISD::isScalarToVector(BV);
+ unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+ MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands());
+ SmallVector<SDValue, 8> Ops;
+
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ continue;
+ }
+
+ APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
+ getAPIntValue()).zextOrTrunc(SrcBitSize);
+
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+ APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
+ Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+ if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
+ // Simply turn this into a SCALAR_TO_VECTOR of the new type.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ Ops[0]);
+ OpVal = OpVal.lshr(DstBitSize);
+ }
+
+ // For big endian targets, swap the order of the pieces of each element.
+ if (TLI.isBigEndian())
+ std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fadd c1, c2) -> (fadd c1, c2)
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
+ // fold (fadd A, 0) -> A
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fadd A, (fneg B)) -> (fsub A, B)
+ if (isNegatibleForFree(N1, LegalOperations) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ // fold (fadd (fneg A), B) -> (fsub B, A)
+ if (isNegatibleForFree(N0, LegalOperations) == 2)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
+ GetNegatedExpression(N0, DAG, LegalOperations));
+
+ // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fsub c1, c2) -> c1-c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
+ // fold (fsub A, 0) -> A
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+ // fold (fsub 0, B) -> -B
+ if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations))
+ return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+ }
+ // fold (fsub A, (fneg B)) -> (fadd A, B)
+ if (isNegatibleForFree(N1, LegalOperations))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fmul c1, c2) -> c1*c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
+ // fold (fmul A, 0) -> 0
+ if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ return N1;
+ // fold (fmul X, 2.0) -> (fadd X, X)
+ if (N1CFP && N1CFP->isExactlyValue(+2.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
+ // fold (fmul X, (fneg 1.0)) -> (fneg X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0))
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
+
+ // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+ N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), N1));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
+ // fold (fdiv c1, c2) -> c1/c2
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+
+
+ // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ // fold (frem c1, c2) -> fmod(c1,c2)
+ if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT VT = N->getValueType(0);
+
+ if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
+
+ if (N1CFP) {
+ const APFloat& V = N1CFP->getValueAPF();
+ // copysign(x, c1) -> fabs(x) iff ispos(c1)
+ // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+ if (!V.isNegative()) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ } else {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));
+ }
+ }
+
+ // copysign(fabs(x), y) -> copysign(x, y)
+ // copysign(fneg(x), y) -> copysign(x, y)
+ // copysign(copysign(x,z), y) -> copysign(x, y)
+ if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+ N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+
+ // copysign(x, abs(y)) -> abs(x)
+ if (N1.getOpcode() == ISD::FABS)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+
+ // copysign(x, copysign(y,z)) -> copysign(x, z)
+ if (N1.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(1));
+
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ N0, N1.getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ MVT VT = N->getValueType(0);
+ MVT OpVT = N0.getValueType();
+
+ // fold (sint_to_fp c1) -> c1fp
+ if (N0C && OpVT != MVT::ppcf128)
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+ // but UINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ MVT VT = N->getValueType(0);
+ MVT OpVT = N0.getValueType();
+
+ // fold (uint_to_fp c1) -> c1fp
+ if (N0C && OpVT != MVT::ppcf128)
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+ // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+ // but SINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT VT = N->getValueType(0);
+
+ // fold (fp_to_sint c1fp) -> c1
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT VT = N->getValueType(0);
+
+ // fold (fp_to_uint c1fp) -> c1
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT VT = N->getValueType(0);
+
+ // fold (fp_round c1fp) -> c1fp
+ if (N0CFP && N0.getValueType() != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
+
+ // fold (fp_round (fp_extend x)) -> x
+ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+ return N0.getOperand(0);
+
+ // fold (fp_round (fp_round x)) -> (fp_round x)
+ if (N0.getOpcode() == ISD::FP_ROUND) {
+ // This is a value preserving truncation if both round's are.
+ bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
+ N0.getNode()->getConstantOperandVal(1) == 1;
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(IsTrunc));
+ }
+
+ // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+ SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1);
+ AddToWorkList(Tmp.getNode());
+ return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+ Tmp, N0.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ MVT VT = N->getValueType(0);
+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+ // fold (fp_round_inreg c1fp) -> c1fp
+ if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) {
+ SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT VT = N->getValueType(0);
+
+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::FP_ROUND)
+ return SDValue();
+
+ // fold (fp_extend c1fp) -> c1fp
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
+
+ // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+ // value of X.
+ if (N0.getOpcode() == ISD::FP_ROUND
+ && N0.getNode()->getConstantOperandVal(1) == 1) {
+ SDValue In = N0.getOperand(0);
+ if (In.getValueType() == VT) return In;
+ if (VT.bitsLT(In.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,
+ In, N0.getOperand(1));
+ return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);
+ }
+
+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+ if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(), LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
+ N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ if (isNegatibleForFree(N0, LegalOperations))
+ return GetNegatedExpression(N0, DAG, LegalOperations);
+
+ // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+ // constant pool values.
+ if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger() &&
+ !N0.getOperand(0).getValueType().isVector()) {
+ SDValue Int = N0.getOperand(0);
+ MVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ N->getValueType(0), Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT VT = N->getValueType(0);
+
+ // fold (fabs c1) -> fabs(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+ // fold (fabs (fabs x)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FABS)
+ return N->getOperand(0);
+ // fold (fabs (fneg x)) -> (fabs x)
+ // fold (fabs (fcopysign x, y)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));
+
+ // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+ // constant pool values.
+ if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+ N0.getOperand(0).getValueType().isInteger() &&
+ !N0.getOperand(0).getValueType().isVector()) {
+ SDValue Int = N0.getOperand(0);
+ MVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
+ DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+ AddToWorkList(Int.getNode());
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ N->getValueType(0), Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+ // never taken branch, fold to chain
+ if (N1C && N1C->isNullValue())
+ return Chain;
+ // unconditional branch
+ if (N1C && N1C->getAPIntValue() == 1)
+ return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2);
+ // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+ // on the target.
+ if (N1.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ Chain, N1.getOperand(2),
+ N1.getOperand(0), N1.getOperand(1), N2);
+ }
+
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
+ // Match this pattern so that we can generate simpler code:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // This applies only when the AND constant value has one bit set and the
+ // SRL constant is equal to the log2 of the AND constant. The back-end is
+ // smart enough to convert the result into a TEST/JMP sequence.
+ SDValue Op0 = N1.getOperand(0);
+ SDValue Op1 = N1.getOperand(1);
+
+ if (Op0.getOpcode() == ISD::AND &&
+ Op0.hasOneUse() &&
+ Op1.getOpcode() == ISD::Constant) {
+ SDValue AndOp0 = Op0.getOperand(0);
+ SDValue AndOp1 = Op0.getOperand(1);
+
+ if (AndOp1.getOpcode() == ISD::Constant) {
+ const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+ if (AndConst.isPowerOf2() &&
+ cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ SDValue SetCC =
+ DAG.getSetCC(N->getDebugLoc(),
+ TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETNE);
+
+ // Replace the uses of SRL with SETCC
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+ CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+ SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+ // Use SimplifySetCC to simplify SETCC's.
+ SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
+ CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
+ false);
+ if (Simp.getNode()) AddToWorkList(Simp.getNode());
+
+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode());
+
+ // fold br_cc true, dest -> br dest (unconditional branch)
+ if (SCCC && !SCCC->isNullValue())
+ return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ // fold br_cc false, dest -> unconditional fall through
+ if (SCCC && SCCC->isNullValue())
+ return N->getOperand(0);
+
+ // fold to a simpler setcc
+ if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+ N->getOperand(0), Simp.getOperand(2),
+ Simp.getOperand(0), Simp.getOperand(1),
+ N->getOperand(4));
+
+ return SDValue();
+}
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store into a
+/// pre-indexed load / store when the base pointer is an add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (!LegalOperations)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ MVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+ // out. There is no reason to make this a preinc/predec.
+ if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+ Ptr.getNode()->hasOneUse())
+ return false;
+
+ // Ask the target to do addressing mode selection.
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+ return false;
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ return false;
+
+ // Try turning it into a pre-indexed load / store except when:
+ // 1) The new base ptr is a frame index.
+ // 2) If N is a store and the new base ptr is either the same as or is a
+ // predecessor of the value being stored.
+ // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+ // that would create a cycle.
+ // 4) All uses are load / store ops that use it as old base ptr.
+
+ // Check #1. Preinc'ing a frame index would require copying the stack pointer
+ // (plus the implicit offset) to a register to preinc anyway.
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
+
+ // Check #2.
+ if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+ return false;
+ }
+
+ // Now check for #3 and #4.
+ bool RealUse = false;
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Use = *I;
+ if (Use == N)
+ continue;
+ if (Use->isPredecessorOf(N))
+ return false;
+
+ if (!((Use->getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
+ (Use->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+ RealUse = true;
+ }
+
+ if (!RealUse)
+ return false;
+
+ SDValue Result;
+ if (isLoad)
+ Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ else
+ Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PreIndexedNodes;
+ ++NodesCombined;
+ DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
+ DOUT << '\n';
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+ &DeadNodes);
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+ &DeadNodes);
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Ptr with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
+ &DeadNodes);
+ removeFromWorkList(Ptr.getNode());
+ DAG.DeleteNode(Ptr.getNode());
+
+ return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (!LegalOperations)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ MVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ if (Ptr.getNode()->hasOneUse())
+ return false;
+
+ for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+ E = Ptr.getNode()->use_end(); I != E; ++I) {
+ SDNode *Op = *I;
+ if (Op == N ||
+ (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+ continue;
+
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+ if (Ptr == Offset)
+ std::swap(BasePtr, Offset);
+ if (Ptr != BasePtr)
+ continue;
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->isNullValue())
+ continue;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr.
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ continue;
+
+ // Check for #1.
+ bool TryNext = false;
+ for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
+ EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
+ SDNode *Use = *II;
+ if (Use == Ptr.getNode())
+ continue;
+
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ bool RealUse = false;
+ for (SDNode::use_iterator III = Use->use_begin(),
+ EEE = Use->use_end(); III != EEE; ++III) {
+ SDNode *UseUse = *III;
+ if (!((UseUse->getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
+ (UseUse->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+ RealUse = true;
+ }
+
+ if (!RealUse) {
+ TryNext = true;
+ break;
+ }
+ }
+ }
+
+ if (TryNext)
+ continue;
+
+ // Check for #2
+ if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+ SDValue Result = isLoad
+ ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+ BasePtr, Offset, AM);
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
+ DOUT << '\n';
+ WorkListRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+ &DeadNodes);
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+ &DeadNodes);
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+ Result.getValue(isLoad ? 1 : 0),
+ &DeadNodes);
+ removeFromWorkList(Op);
+ DAG.DeleteNode(Op);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// InferAlignment - If we can infer some alignment information from this
+/// pointer, return it.
+static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) {
+ // If this is a direct reference to a stack slot, use information about the
+ // stack slot's alignment.
+ int FrameIdx = 1 << 31;
+ int64_t FrameOffset = 0;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FrameIdx = FI->getIndex();
+ } else if (Ptr.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ FrameOffset = Ptr.getConstantOperandVal(1);
+ }
+
+ if (FrameIdx != (1 << 31)) {
+ // FIXME: Handle FI+CST.
+ const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
+ if (MFI.isFixedObjectIndex(FrameIdx)) {
+ int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
+
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned.
+ unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = MinAlign(ObjectOffset, StackAlign);
+
+ // Finally, the frame object itself may have a known alignment. Factor
+ // the alignment + offset into a new alignment. For example, if we know
+ // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
+ // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte
+ // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
+ unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+ FrameOffset);
+ return std::max(Align, FIInfoAlign);
+ }
+ }
+
+ return 0;
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ // Try to infer better alignment information than the load already has.
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (unsigned Align = InferAlignment(Ptr, DAG)) {
+ if (Align > LD->getAlignment())
+ return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ LD->getValueType(0),
+ Chain, Ptr, LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->getMemoryVT(),
+ LD->isVolatile(), Align);
+ }
+ }
+
+ // If load is not volatile and there are no uses of the loaded value (and
+ // the updated indexed value in case of indexed loads), change uses of the
+ // chain value into uses of the chain input (i.e. delete the dead load).
+ if (!LD->isVolatile()) {
+ if (N->getValueType(1) == MVT::Other) {
+ // Unindexed loads.
+ if (N->hasNUsesOfValue(0, 0)) {
+ // It's not safe to use the two value CombineTo variant here. e.g.
+ // v1, chain2 = load chain1, loc
+ // v2, chain3 = load chain2, loc
+ // v3 = add v2, c
+ // Now we replace use of chain2 with chain1. This makes the second load
+ // isomorphic to the one we are deleting, and thus makes this load live.
+ DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG));
+ DOUT << "\n";
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
+
+ if (N->use_empty()) {
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ } else {
+ // Indexed loads.
+ assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+ if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+ SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG));
+ DOUT << " and 2 other values\n";
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+ DAG.getUNDEF(N->getValueType(1)),
+ &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
+ removeFromWorkList(N);
+ DAG.DeleteNode(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // If this load is directly stored, replace the load value with the stored
+ // value.
+ // TODO: Handle store large -> read small portion.
+ // TODO: Handle TRUNCSTORE/LOADEXT
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ !LD->isVolatile()) {
+ if (ISD::isNON_TRUNCStore(Chain.getNode())) {
+ StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+ if (PrevST->getBasePtr() == Ptr &&
+ PrevST->getValue().getValueType() == N->getValueType(0))
+ return CombineTo(N, Chain.getOperand(1), Chain);
+ }
+ }
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplLoad;
+
+ // Replace the chain to void dependency.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
+ BetterChain, Ptr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ } else {
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->getMemoryVT(),
+ LD->isVolatile(),
+ LD->getAlignment());
+ }
+
+ // Create token factor to keep old chain connected.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplLoad.getValue(1));
+
+ // Replace uses with load result and token factor. Don't add users
+ // to work list.
+ return CombineTo(N, ReplLoad.getValue(0), Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+
+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
+/// of the loaded bits, try narrowing the load and store if it would end up
+/// being a win for performance or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (ST->isVolatile())
+ return SDValue();
+
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ MVT VT = Value.getValueType();
+
+ if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ return SDValue();
+
+ unsigned Opc = Value.getOpcode();
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ Value.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue();
+
+ SDValue N0 = Value.getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LD = cast<LoadSDNode>(N0);
+ if (LD->getBasePtr() != Ptr)
+ return SDValue();
+
+ // Find the type to narrow it the load / op / store to.
+ SDValue N1 = Value.getOperand(1);
+ unsigned BitWidth = N1.getValueSizeInBits();
+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (Opc == ISD::AND)
+ Imm ^= APInt::getAllOnesValue(BitWidth);
+ if (Imm == 0 || Imm.isAllOnesValue())
+ return SDValue();
+ unsigned ShAmt = Imm.countTrailingZeros();
+ unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+ MVT NewVT = MVT::getIntegerVT(NewBW);
+ while (NewBW < BitWidth &&
+ !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
+ TLI.isNarrowingProfitable(VT, NewVT))) {
+ NewBW = NextPowerOf2(NewBW);
+ NewVT = MVT::getIntegerVT(NewBW);
+ }
+ if (NewBW >= BitWidth)
+ return SDValue();
+
+ // If the lsb changed does not start at the type bitwidth boundary,
+ // start at the previous one.
+ if (ShAmt % NewBW)
+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
+ if ((Imm & Mask) == Imm) {
+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnesValue(NewBW);
+ uint64_t PtrOff = ShAmt / 8;
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian())
+ PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+ unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ if (NewAlign <
+ TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT()))
+ return SDValue();
+
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
+ Ptr.getValueType(), Ptr,
+ DAG.getConstant(PtrOff, Ptr.getValueType()));
+ SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
+ LD->getChain(), NewPtr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), NewAlign);
+ SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
+ DAG.getConstant(NewImm, NewVT));
+ SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
+ NewVal, NewPtr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ false, NewAlign);
+
+ AddToWorkList(NewPtr.getNode());
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewVal.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++OpsNarrowed;
+ return NewST;
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+
+ // Try to infer better alignment information than the store already has.
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (unsigned Align = InferAlignment(Ptr, DAG)) {
+ if (Align > ST->getAlignment())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->getMemoryVT(),
+ ST->isVolatile(), Align);
+ }
+ }
+
+ // If this is a store of a bit convert, store the input value if the
+ // resultant store does not need a higher alignment than the original.
+ if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+ ST->isUnindexed()) {
+ unsigned OrigAlign = ST->getAlignment();
+ MVT SVT = Value.getOperand(0).getValueType();
+ unsigned Align = TLI.getTargetData()->
+ getABITypeAlignment(SVT.getTypeForMVT());
+ if (Align <= OrigAlign &&
+ ((!LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
+ return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);
+ }
+
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+ if (Value.getOpcode() != ISD::TargetConstantFP) {
+ SDValue Tmp;
+ switch (CFP->getValueType(0).getSimpleVT()) {
+ default: assert(0 && "Unknown FP type");
+ case MVT::f80: // We don't do this for these yet.
+ case MVT::f128:
+ case MVT::ppcf128:
+ break;
+ case MVT::f32:
+ if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), MVT::i32);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(),
+ ST->getAlignment());
+ }
+ break;
+ case MVT::f64:
+ if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), MVT::i64);
+ return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(),
+ ST->getAlignment());
+ } else if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+
+ SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(),
+ isVolatile, ST->getAlignment());
+ Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, Ptr.getValueType()));
+ SVOffset += 4;
+ Alignment = MinAlign(Alignment, 4U);
+ SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
+ Ptr, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ St0, St1);
+ }
+
+ break;
+ }
+ }
+ }
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ // Replace the chain to avoid dependency.
+ SDValue ReplStore;
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getSrcValue(),ST->getSrcValueOffset(),
+ ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->isVolatile(), ST->getAlignment());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+ MVT::Other, Chain, ReplStore);
+
+ // Don't add users to work list.
+ return CombineTo(N, Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // FIXME: is there such a thing as a truncating indexed store?
+ if (ST->isTruncatingStore() && ST->isUnindexed() &&
+ Value.getValueType().isInteger()) {
+ // See if we can simplify the input to this truncstore with knowledge that
+ // only the low bits are being used. For example:
+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ SDValue Shorter =
+ GetDemandedBits(Value,
+ APInt::getLowBitsSet(Value.getValueSizeInBits(),
+ ST->getMemoryVT().getSizeInBits()));
+ AddToWorkList(Value.getNode());
+ if (Shorter.getNode())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+
+ // Otherwise, see if we can simplify the operation with
+ // SimplifyDemandedBits, which only works if the value has a single use.
+ if (SimplifyDemandedBits(Value,
+ APInt::getLowBitsSet(
+ Value.getValueSizeInBits(),
+ ST->getMemoryVT().getSizeInBits())))
+ return SDValue(N, 0);
+ }
+
+ // If this is a load followed by a store to the same location, then the store
+ // is dead/noop.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+ if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+ ST->isUnindexed() && !ST->isVolatile() &&
+ // There can't be any side effects between the load and store, such as
+ // a call or store.
+ Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
+ // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+ // truncating store. We can do this even if this is already a truncstore.
+ if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+ && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ ST->getMemoryVT())) {
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+ }
+
+ return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InVec = N->getOperand(0);
+ SDValue InVal = N->getOperand(1);
+ SDValue EltNo = N->getOperand(2);
+
+ // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
+ // vector with the inserted element.
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),
+ InVec.getNode()->op_end());
+ if (Elt < Ops.size())
+ Ops[Elt] = InVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ InVec.getValueType(), &Ops[0], Ops.size());
+ }
+ // If the invec is an UNDEF and if EltNo is a constant, create a new
+ // BUILD_VECTOR with undef elements and the inserted element.
+ if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
+ isa<ConstantSDNode>(EltNo)) {
+ MVT VT = InVec.getValueType();
+ MVT EVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT));
+
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ if (Elt < Ops.size())
+ Ops[Elt] = InVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ InVec.getValueType(), &Ops[0], Ops.size());
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+ // (vextract (scalar_to_vector val, 0) -> val
+ SDValue InVec = N->getOperand(0);
+
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ MVT EltVT = InVec.getValueType().getVectorElementType();
+ SDValue InOp = InVec.getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp);
+ return InOp;
+ }
+
+ // Perform only after legalization to ensure build_vector / vector_shuffle
+ // optimizations have already been done.
+ if (!LegalOperations) return SDValue();
+
+ // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+ SDValue EltNo = N->getOperand(1);
+
+ if (isa<ConstantSDNode>(EltNo)) {
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ bool NewLoad = false;
+ bool BCNumEltsChanged = false;
+ MVT VT = InVec.getValueType();
+ MVT EVT = VT.getVectorElementType();
+ MVT LVT = EVT;
+
+ if (InVec.getOpcode() == ISD::BIT_CONVERT) {
+ MVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ EVT = BCVT.getVectorElementType();
+ NewLoad = true;
+ }
+
+ LoadSDNode *LN0 = NULL;
+ const ShuffleVectorSDNode *SVN = NULL;
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ InVec.getOperand(0).getValueType() == EVT &&
+ ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+ LN0 = cast<LoadSDNode>(InVec.getOperand(0));
+ } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+ // =>
+ // (load $addr+1*size)
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ if (BCNumEltsChanged)
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
+ InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
+
+ if (InVec.getOpcode() == ISD::BIT_CONVERT)
+ InVec = InVec.getOperand(0);
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems;
+ }
+ }
+
+ if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
+ return SDValue();
+
+ unsigned Align = LN0->getAlignment();
+ if (NewLoad) {
+ // Check the resultant load doesn't need a higher alignment than the
+ // original load.
+ unsigned NewAlign =
+ TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT());
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
+ return SDValue();
+
+ Align = NewAlign;
+ }
+
+ SDValue NewPtr = LN0->getBasePtr();
+ if (Elt) {
+ unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
+ MVT PtrType = NewPtr.getValueType();
+ if (TLI.isBigEndian())
+ PtrOff = VT.getSizeInBits() / 8 - PtrOff;
+ NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
+ DAG.getConstant(PtrOff, PtrType));
+ }
+
+ return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->isVolatile(), Align);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ unsigned NumInScalars = N->getNumOperands();
+ MVT VT = N->getValueType(0);
+ MVT EltType = VT.getVectorElementType();
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+ // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+ // at most two distinct vectors, turn this into a shuffle node.
+ SDValue VecIn1, VecIn2;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ // Ignore undef inputs.
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ // If this input is something other than a EXTRACT_VECTOR_ELT with a
+ // constant index, bail out.
+ if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+
+ // If the input vector type disagrees with the result of the build_vector,
+ // we can't make a shuffle.
+ SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+ if (ExtractedFromVec.getValueType() != VT) {
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+
+ // Otherwise, remember this. We allow up to two distinct input vectors.
+ if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+ continue;
+
+ if (VecIn1.getNode() == 0) {
+ VecIn1 = ExtractedFromVec;
+ } else if (VecIn2.getNode() == 0) {
+ VecIn2 = ExtractedFromVec;
+ } else {
+ // Too many inputs.
+ VecIn1 = VecIn2 = SDValue(0, 0);
+ break;
+ }
+ }
+
+ // If everything is good, we can make a shuffle operation.
+ if (VecIn1.getNode()) {
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // If extracting from the first vector, just use the index directly.
+ SDValue Extract = N->getOperand(i);
+ SDValue ExtVal = Extract.getOperand(1);
+ if (Extract.getOperand(0) == VecIn1) {
+ unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ if (ExtIndex > VT.getVectorNumElements())
+ return SDValue();
+
+ Mask.push_back(ExtIndex);
+ continue;
+ }
+
+ // Otherwise, use InIdx + VecSize
+ unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ Mask.push_back(Idx+NumInScalars);
+ }
+
+ // Add count and size info.
+ if (!TLI.isTypeLegal(VT) && LegalTypes)
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ SDValue Ops[2];
+ Ops[0] = VecIn1;
+ Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+ // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+ // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
+ // inputs come from at most two distinct vectors, turn this into a shuffle
+ // node.
+
+ // If we only have one input vector, we don't need to do any concatenation.
+ if (N->getNumOperands() == 1)
+ return N->getOperand(0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+ return SDValue();
+
+ MVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ assert(N0.getValueType().getVectorNumElements() == NumElts &&
+ "Vector shuffle must be normalized in DAG");
+
+ // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+
+ // If it is a splat, check if the argument vector is a build_vector with
+ // all scalar elements the same.
+ if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
+ SDNode *V = N0.getNode();
+
+
+ // If this is a bit convert that changes the element type of the vector but
+ // not the number of vector elements, look through it. Be careful not to
+ // look though conversions that change things like v4f32 to v2f64.
+ if (V->getOpcode() == ISD::BIT_CONVERT) {
+ SDValue ConvInput = V->getOperand(0);
+ if (ConvInput.getValueType().isVector() &&
+ ConvInput.getValueType().getVectorNumElements() == NumElts)
+ V = ConvInput.getNode();
+ }
+
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElems = V->getNumOperands();
+ unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
+ if (NumElems > BaseIdx) {
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ Base = V->getOperand(i);
+ break;
+ }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
+ }
+ }
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
+ }
+ }
+ }
+ return SDValue();
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+/// vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (N->getOpcode() == ISD::AND) {
+ if (RHS.getOpcode() == ISD::BIT_CONVERT)
+ RHS = RHS.getOperand(0);
+ if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<int, 8> Indices;
+ unsigned NumElts = RHS.getNumOperands();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Elt = RHS.getOperand(i);
+ if (!isa<ConstantSDNode>(Elt))
+ return SDValue();
+ else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+ Indices.push_back(i);
+ else if (cast<ConstantSDNode>(Elt)->isNullValue())
+ Indices.push_back(NumElts);
+ else
+ return SDValue();
+ }
+
+ // Let's see if the target supports this vector_shuffle.
+ MVT RVT = RHS.getValueType();
+ if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ MVT EVT = RVT.getVectorElementType();
+ SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+ DAG.getConstant(0, EVT));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ RVT, &ZeroOps[0], ZeroOps.size());
+ LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
+ SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
+ // After legalize, the target may be depending on adds and other
+ // binary ops to provide legal ways to construct constants or other
+ // things. Simplifying them may result in a loss of legality.
+ if (LegalOperations) return SDValue();
+
+ MVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
+
+ MVT EltType = VT.getVectorElementType();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Shuffle = XformToShuffleWithZero(N);
+ if (Shuffle.getNode()) return Shuffle;
+
+ // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+ // this operation.
+ if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
+ RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ SDValue LHSOp = LHS.getOperand(i);
+ SDValue RHSOp = RHS.getOperand(i);
+ // If these two elements can't be folded, bail out.
+ if ((LHSOp.getOpcode() != ISD::UNDEF &&
+ LHSOp.getOpcode() != ISD::Constant &&
+ LHSOp.getOpcode() != ISD::ConstantFP) ||
+ (RHSOp.getOpcode() != ISD::UNDEF &&
+ RHSOp.getOpcode() != ISD::Constant &&
+ RHSOp.getOpcode() != ISD::ConstantFP))
+ break;
+
+ // Can't fold divide by zero.
+ if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+ N->getOpcode() == ISD::FDIV) {
+ if ((RHSOp.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
+ (RHSOp.getOpcode() == ISD::ConstantFP &&
+ cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+ break;
+ }
+
+ Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(),
+ EltType, LHSOp, RHSOp));
+ AddToWorkList(Ops.back().getNode());
+ assert((Ops.back().getOpcode() == ISD::UNDEF ||
+ Ops.back().getOpcode() == ISD::Constant ||
+ Ops.back().getOpcode() == ISD::ConstantFP) &&
+ "Scalar binop didn't fold!");
+ }
+
+ if (Ops.size() == LHS.getNumOperands()) {
+ MVT VT = LHS.getValueType();
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &Ops[0], Ops.size());
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
+ SDValue N1, SDValue N2){
+ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+ SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If we got a simplified select_cc node back from SimplifySelectCC, then
+ // break it down into a new SETCC node, and a new SELECT node, and then return
+ // the SELECT node, since we were called with a SELECT node.
+ if (SCC.getNode()) {
+ // Check to see if we got a select_cc back (to turn into setcc/select).
+ // Otherwise, just return whatever node we got back, like fabs.
+ if (SCC.getOpcode() == ISD::SELECT_CC) {
+ SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),
+ N0.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1),
+ SCC.getOperand(4));
+ AddToWorkList(SETCC.getNode());
+ return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),
+ SCC.getOperand(2), SCC.getOperand(3), SETCC);
+ }
+
+ return SCC;
+ }
+ return SDValue();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select. Callers of this should assume that TheSelect is deleted if this
+/// returns true. As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+ SDValue RHS) {
+
+ // If this is a select from two identical things, try to pull the operation
+ // through the select.
+ if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD &&
+ // Do not let this transformation reduce the number of volatile loads.
+ !cast<LoadSDNode>(LHS)->isVolatile() &&
+ !cast<LoadSDNode>(RHS)->isVolatile() &&
+ // Token chains must be identical.
+ LHS.getOperand(0) == RHS.getOperand(0)) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // If this is an EXTLOAD, the VT's must match.
+ if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
+ // FIXME: this conflates two src values, discarding one. This is not
+ // the right thing to do, but nothing uses srcvalues now. When they do,
+ // turn SrcValue into a list of locations.
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ // Check that the condition doesn't reach either load. If so, folding
+ // this will induce a cycle into the DAG.
+ if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) {
+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ }
+ } else {
+ // Check that the condition doesn't reach either load. If so, folding
+ // this will induce a cycle into the DAG.
+ if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) &&
+ !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) {
+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+ }
+
+ if (Addr.getNode()) {
+ SDValue Load;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ Load = DAG.getLoad(TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
+ LLD->getChain(),
+ Addr,LLD->getSrcValue(),
+ LLD->getSrcValueOffset(),
+ LLD->isVolatile(),
+ LLD->getAlignment());
+ } else {
+ Load = DAG.getExtLoad(LLD->getExtensionType(),
+ TheSelect->getDebugLoc(),
+ TheSelect->getValueType(0),
+ LLD->getChain(), Addr, LLD->getSrcValue(),
+ LLD->getSrcValueOffset(),
+ LLD->getMemoryVT(),
+ LLD->isVolatile(),
+ LLD->getAlignment());
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3,
+ ISD::CondCode CC, bool NotExtCompare) {
+ // (x ? y : y) -> y.
+ if (N2 == N3) return N2;
+
+ MVT VT = N2.getValueType();
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, DL, false);
+ if (SCC.getNode()) AddToWorkList(SCC.getNode());
+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
+
+ // fold select_cc true, x, y -> x
+ if (SCCC && !SCCC->isNullValue())
+ return N2;
+ // fold select_cc false, x, y -> y
+ if (SCCC && SCCC->isNullValue())
+ return N3;
+
+ // Check to see if we can simplify the select into an fabs node
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+ // Allow either -0.0 or 0.0
+ if (CFP->getValueAPF().isZero()) {
+ // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+ if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+ N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+ N2 == N3.getOperand(0))
+ return DAG.getNode(ISD::FABS, DL, VT, N0);
+
+ // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+ if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+ N2.getOperand(0) == N3)
+ return DAG.getNode(ISD::FABS, DL, VT, N3);
+ }
+ }
+
+ // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+ // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+ // in it. This is a win when the constant is not otherwise available because
+ // it replaces two constant pool loads with one. We only do this if the FP
+ // type is known to be legal, because if it isn't, then we are before legalize
+ // types an we want the other legalization to happen first (e.g. to avoid
+ // messing with soft float) and if the ConstantFP is not legal, because if
+ // it is legal, we may not need to store the FP constant in a constant pool.
+ if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
+ if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
+ if (TLI.isTypeLegal(N2.getValueType()) &&
+ (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
+ TargetLowering::Legal) &&
+ // If both constants have multiple uses, then we won't need to do an
+ // extra load, they are likely around in registers for other users.
+ (TV->hasOneUse() || FV->hasOneUse())) {
+ Constant *Elts[] = {
+ const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue())
+ };
+ const Type *FPTy = Elts[0]->getType();
+ const TargetData &TD = *TLI.getTargetData();
+
+ // Create a ConstantArray of the two constants.
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
+ TD.getPrefTypeAlignment(FPTy));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+ // Get the offsets to the 0 and 1 element of the array so that we can
+ // select between them.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize);
+
+ SDValue Cond = DAG.getSetCC(DL,
+ TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
+ Cond, One, Zero);
+ CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+ CstOffset);
+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0, false,
+ Alignment);
+
+ }
+ }
+
+ // Check to see if we can perform the "gzip trick", transforming
+ // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
+ if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+ N0.getValueType().isInteger() &&
+ N2.getValueType().isInteger() &&
+ (N1C->isNullValue() || // (a < 0) ? b : 0
+ (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
+ MVT XType = N0.getValueType();
+ MVT AType = N2.getValueType();
+ if (XType.bitsGE(AType)) {
+ // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+ // single-bit constant.
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+ unsigned ShCtV = N2C->getAPIntValue().logBase2();
+ ShCtV = XType.getSizeInBits()-ShCtV-1;
+ SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy());
+ SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
+ XType, N0, ShCt);
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
+ XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ AddToWorkList(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorkList(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+ }
+
+ // fold select C, 16, 0 -> shl C, 4
+ if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+ TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
+
+ // If the caller doesn't want us to simplify this into a zext of a compare,
+ // don't do it.
+ if (NotExtCompare && N2C->getAPIntValue() == 1)
+ return SDValue();
+
+ // Get a SetCC of the condition
+ // FIXME: Should probably make sure that setcc is legal if we ever have a
+ // target where it isn't.
+ SDValue Temp, SCC;
+ // cast from setcc result type to select result type
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ if (N2.getValueType().bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ } else {
+ SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ }
+
+ AddToWorkList(SCC.getNode());
+ AddToWorkList(Temp.getNode());
+
+ if (N2C->getAPIntValue() == 1)
+ return Temp;
+
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ getShiftAmountTy()));
+ }
+
+ // Check to see if this is the equivalent of setcc
+ // FIXME: Turn all of these into setcc if setcc if setcc is legal
+ // otherwise, go ahead with the folds.
+ if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+ MVT XType = N0.getValueType();
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
+ SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
+ if (Res.getValueType() != VT)
+ Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+ return Res;
+ }
+
+ // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::CTLZ, XType))) {
+ SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
+ return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
+ DAG.getConstant(Log2_32(XType.getSizeInBits()),
+ getShiftAmountTy()));
+ }
+ // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
+ SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),
+ XType, DAG.getConstant(0, XType), N0);
+ SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);
+ return DAG.getNode(ISD::SRL, DL, XType,
+ DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ }
+ // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+ SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+ }
+ }
+
+ // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
+ N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
+ MVT XType = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType,
+ N0, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ }
+ // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
+ N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
+ if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
+ MVT XType = N0.getValueType();
+ if (SubC->isNullValue() && XType.isInteger()) {
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
+ N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
+ XType, N0, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0,
+ SDValue N1, ISD::CondCode Cond,
+ DebugLoc DL, bool foldBooleans) {
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level == Unrestricted, false, this);
+ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// FindBaseOffset - Return true if base is known not to alias with anything
+/// but itself. Provides base object and offset as results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) {
+ // Assume it is a primitive operation.
+ Base = Ptr; Offset = 0;
+
+ // If it's an adding a simple constant then integrate the offset.
+ if (Base.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+ Base = Base.getOperand(0);
+ Offset += C->getZExtValue();
+ }
+ }
+
+ // If it's any of the following then it can't alias with anything but itself.
+ return isa<FrameIndexSDNode>(Base) ||
+ isa<ConstantPoolSDNode>(Base) ||
+ isa<GlobalAddressSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ SDValue Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2) const {
+ // If they are the same then they must be aliases.
+ if (Ptr1 == Ptr2) return true;
+
+ // Gather base node and offset information.
+ SDValue Base1, Base2;
+ int64_t Offset1, Offset2;
+ bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1);
+ bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2);
+
+ // If they have a same base address then...
+ if (Base1 == Base2)
+ // Check to see if the addresses overlap.
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+
+ // If we know both bases then they can't alias.
+ if (KnownBase1 && KnownBase2) return false;
+
+ if (CombinerGlobalAA) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
+ int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
+ int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+ AliasAnalysis::AliasResult AAResult =
+ AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+ if (AAResult == AliasAnalysis::NoAlias)
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node. Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue, int &SrcValueOffset) const {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ Size = LD->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = LD->getSrcValue();
+ SrcValueOffset = LD->getSrcValueOffset();
+ return true;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ Size = ST->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = ST->getSrcValue();
+ SrcValueOffset = ST->getSrcValueOffset();
+ } else {
+ assert(0 && "FindAliasInfo expected a memory operand");
+ }
+
+ return false;
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVector<SDValue, 8> &Aliases) {
+ SmallVector<SDValue, 8> Chains; // List of chains to visit.
+ std::set<SDNode *> Visited; // Visited node set.
+
+ // Get alias information for node.
+ SDValue Ptr;
+ int64_t Size = 0;
+ const Value *SrcValue = 0;
+ int SrcValueOffset = 0;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset);
+
+ // Starting off.
+ Chains.push_back(OriginalChain);
+
+ // Look at each chain and determine if it is an alias. If so, add it to the
+ // aliases list. If not, then continue up the chain looking for the next
+ // candidate.
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.back();
+ Chains.pop_back();
+
+ // Don't bother if we've been before.
+ if (Visited.find(Chain.getNode()) != Visited.end()) continue;
+ Visited.insert(Chain.getNode());
+
+ switch (Chain.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry token is ideal chain operand, but handled in FindBetterChain.
+ break;
+
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for Chain.
+ SDValue OpPtr;
+ int64_t OpSize = 0;
+ const Value *OpSrcValue = 0;
+ int OpSrcValueOffset = 0;
+ bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
+ OpSrcValue, OpSrcValueOffset);
+
+ // If chain is alias then stop here.
+ if (!(IsLoad && IsOpLoad) &&
+ isAlias(Ptr, Size, SrcValue, SrcValueOffset,
+ OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) {
+ Aliases.push_back(Chain);
+ } else {
+ // Look further up the chain.
+ Chains.push_back(Chain.getOperand(0));
+ // Clean up old chain.
+ AddToWorkList(Chain.getNode());
+ }
+ break;
+ }
+
+ case ISD::TokenFactor:
+ // We have to check each of the operands of the token factor, so we queue
+ // then up. Adding the operands to the queue (stack) in reverse order
+ // maintains the original order and increases the likelihood that getNode
+ // will find a matching token factor (CSE.)
+ for (unsigned n = Chain.getNumOperands(); n;)
+ Chains.push_back(Chain.getOperand(--n));
+ // Eliminate the token factor if we can.
+ AddToWorkList(Chain.getNode());
+ break;
+
+ default:
+ // For all other instructions we will just have to take what we can get.
+ Aliases.push_back(Chain);
+ break;
+ }
+ }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+ SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
+
+ // Accumulate all the aliases to this node.
+ GatherAllAliases(N, OldChain, Aliases);
+
+ if (Aliases.size() == 0) {
+ // If no operands then chain to entry token.
+ return DAG.getEntryNode();
+ } else if (Aliases.size() == 1) {
+ // If a single operand then chain to it. We don't need to revisit it.
+ return Aliases[0];
+ }
+
+ // Construct a custom tailored token factor.
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ &Aliases[0], Aliases.size());
+
+ // Make sure the old chain gets cleaned up.
+ if (NewChain != OldChain) AddToWorkList(OldChain.getNode());
+
+ return NewChain;
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+ CodeGenOpt::Level OptLevel) {
+ /// run - This is the main entry point to this class.
+ ///
+ DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 0000000..6becff3
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,1033 @@
+///===-- FastISel.cpp - Implementation of the FastISel class --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported. It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time. For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support. In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated. Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time. Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators. More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/DebugLoc.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "SelectionDAGBuild.h"
+using namespace llvm;
+
+unsigned FastISel::getRegForValue(Value *V) {
+ MVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+ // Don't handle non-simple values in FastISel.
+ if (!RealVT.isSimple())
+ return 0;
+
+ // Ignore illegal types. We must do this before looking up the value
+ // in ValueMap because Arguments are given virtual registers regardless
+ // of whether FastISel can handle them.
+ MVT::SimpleValueType VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT)) {
+ // Promote MVT::i1 to a legal type though, because it's common and easy.
+ if (VT == MVT::i1)
+ VT = TLI.getTypeToTransformTo(VT).getSimpleVT();
+ else
+ return 0;
+ }
+
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominatess-use requirement enforced.
+ if (ValueMap.count(V))
+ return ValueMap[V];
+ unsigned Reg = LocalValueMap[V];
+ if (Reg != 0)
+ return Reg;
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().getActiveBits() <= 64)
+ Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V)) {
+ Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
+ } else if (isa<ConstantPointerNull>(V)) {
+ // Translate this as an integer zero so that it can be
+ // local-CSE'd with actual integer zeros.
+ Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType()));
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+
+ if (!Reg) {
+ const APFloat &Flt = CF->getValueAPF();
+ MVT IntVT = TLI.getPointerTy();
+
+ uint64_t x[2];
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ bool isExact;
+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
+ if (isExact) {
+ APInt IntVal(IntBitWidth, 2, x);
+
+ unsigned IntegerReg = getRegForValue(ConstantInt::get(IntVal));
+ if (IntegerReg != 0)
+ Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
+ }
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (!SelectOperator(CE, CE->getOpcode())) return 0;
+ Reg = LocalValueMap[CE];
+ } else if (isa<UndefValue>(V)) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(MBB, DL, TII.get(TargetInstrInfo::IMPLICIT_DEF), Reg);
+ }
+
+ // If target-independent code couldn't handle the value, give target-specific
+ // code a try.
+ if (!Reg && isa<Constant>(V))
+ Reg = TargetMaterializeConstant(cast<Constant>(V));
+
+ // Don't cache constant materializations in the general ValueMap.
+ // To do so would require tracking what uses they dominate.
+ if (Reg != 0)
+ LocalValueMap[V] = Reg;
+ return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(Value *V) {
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominatess-use requirement enforced.
+ if (ValueMap.count(V))
+ return ValueMap[V];
+ return LocalValueMap[V];
+}
+
+/// UpdateValueMap - Update the value map to include the new mapping for this
+/// instruction, or insert an extra copy to get the result in a previous
+/// determined register.
+/// NOTE: This is only necessary because we might select a block that uses
+/// a value before we select the block that defines the value. It might be
+/// possible to fix this by selecting blocks in reverse postorder.
+unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) {
+ if (!isa<Instruction>(I)) {
+ LocalValueMap[I] = Reg;
+ return Reg;
+ }
+
+ unsigned &AssignedReg = ValueMap[I];
+ if (AssignedReg == 0)
+ AssignedReg = Reg;
+ else if (Reg != AssignedReg) {
+ const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);
+ TII.copyRegToReg(*MBB, MBB->end(), AssignedReg,
+ Reg, RegClass, RegClass);
+ }
+ return AssignedReg;
+}
+
+unsigned FastISel::getRegForGEPIndex(Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return 0;
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy();
+ MVT IdxVT = MVT::getMVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT))
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(),
+ ISD::SIGN_EXTEND, IdxN);
+ else if (IdxVT.bitsGT(PtrVT))
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(),
+ ISD::TRUNCATE, IdxN);
+ return IdxN;
+}
+
+/// SelectBinaryOp - Select and emit code for a binary operator instruction,
+/// which has an opcode which directly corresponds to the given ISD opcode.
+///
+bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) {
+ MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ if (!TLI.isTypeLegal(VT)) {
+ // MVT::i1 is special. Allow AND, OR, or XOR because they
+ // don't require additional zeroing, which makes them easy.
+ if (VT == MVT::i1 &&
+ (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+ ISDOpcode == ISD::XOR))
+ VT = TLI.getTypeToTransformTo(VT);
+ else
+ return false;
+ }
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the second operand is a constant and handle it appropriately.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, CI->getZExtValue());
+ if (ResultReg != 0) {
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ // Check if the second operand is a constant float.
+ if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+ unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, CF);
+ if (ResultReg != 0) {
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // Now we have both operands in registers. Emit the instruction.
+ unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op1);
+ if (ResultReg == 0)
+ // Target-specific code wasn't able to find a machine opcode for
+ // the given ISD opcode and type. Halt "fast" selection and bail.
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectGetElementPtr(User *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ const Type *Ty = I->getOperand(0)->getType();
+ MVT::SimpleValueType VT = TLI.getPointerTy().getSimpleVT();
+ for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end();
+ OI != E; ++OI) {
+ Value *Idx = *OI;
+ if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
+ // FIXME: This can be optimized by combining the add with a
+ // subsequent one.
+ N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->getZExtValue() == 0) continue;
+ uint64_t Offs =
+ TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+ unsigned IdxN = getRegForGEPIndex(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (ElementSize != 1) {
+ IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
+ if (N == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ }
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, N);
+ return true;
+}
+
+bool FastISel::SelectCall(User *I) {
+ Function *F = cast<CallInst>(I)->getCalledFunction();
+ if (!F) return false;
+
+ unsigned IID = F->getIntrinsicID();
+ switch (IID) {
+ default: break;
+ case Intrinsic::dbg_stoppoint: {
+ DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
+ if (DIDescriptor::ValidDebugInfo(SPI->getContext(), CodeGenOpt::None)) {
+ DICompileUnit CU(cast<GlobalVariable>(SPI->getContext()));
+ unsigned Line = SPI->getLine();
+ unsigned Col = SPI->getColumn();
+ unsigned Idx = MF.getOrCreateDebugLocID(CU.getGV(), Line, Col);
+ setCurDebugLoc(DebugLoc::get(Idx));
+ }
+ return true;
+ }
+ case Intrinsic::dbg_region_start: {
+ DbgRegionStartInst *RSI = cast<DbgRegionStartInst>(I);
+ if (DIDescriptor::ValidDebugInfo(RSI->getContext(), CodeGenOpt::None) &&
+ DW && DW->ShouldEmitDwarfDebug()) {
+ unsigned ID =
+ DW->RecordRegionStart(cast<GlobalVariable>(RSI->getContext()));
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
+ BuildMI(MBB, DL, II).addImm(ID);
+ }
+ return true;
+ }
+ case Intrinsic::dbg_region_end: {
+ DbgRegionEndInst *REI = cast<DbgRegionEndInst>(I);
+ if (DIDescriptor::ValidDebugInfo(REI->getContext(), CodeGenOpt::None) &&
+ DW && DW->ShouldEmitDwarfDebug()) {
+ unsigned ID = 0;
+ DISubprogram Subprogram(cast<GlobalVariable>(REI->getContext()));
+ if (!Subprogram.isNull() && !Subprogram.describes(MF.getFunction())) {
+ // This is end of an inlined function.
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
+ ID = DW->RecordInlinedFnEnd(Subprogram);
+ if (ID)
+ // Returned ID is 0 if this is unbalanced "end of inlined
+ // scope". This could happen if optimizer eats dbg intrinsics
+ // or "beginning of inlined scope" is not recoginized due to
+ // missing location info. In such cases, do ignore this region.end.
+ BuildMI(MBB, DL, II).addImm(ID);
+ } else {
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
+ ID = DW->RecordRegionEnd(cast<GlobalVariable>(REI->getContext()));
+ BuildMI(MBB, DL, II).addImm(ID);
+ }
+ }
+ return true;
+ }
+ case Intrinsic::dbg_func_start: {
+ DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
+ Value *SP = FSI->getSubprogram();
+ if (!DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::None))
+ return true;
+
+ // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what
+ // (most?) gdb expects.
+ DebugLoc PrevLoc = DL;
+ DISubprogram Subprogram(cast<GlobalVariable>(SP));
+ DICompileUnit CompileUnit = Subprogram.getCompileUnit();
+
+ if (!Subprogram.describes(MF.getFunction())) {
+ // This is a beginning of an inlined function.
+
+ // If llvm.dbg.func.start is seen in a new block before any
+ // llvm.dbg.stoppoint intrinsic then the location info is unknown.
+ // FIXME : Why DebugLoc is reset at the beginning of each block ?
+ if (PrevLoc.isUnknown())
+ return true;
+ // Record the source line.
+ unsigned Line = Subprogram.getLineNumber();
+ setCurDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID(
+ CompileUnit.getGV(), Line, 0)));
+
+ if (DW && DW->ShouldEmitDwarfDebug()) {
+ DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
+ unsigned LabelID = DW->RecordInlinedFnStart(Subprogram,
+ DICompileUnit(PrevLocTpl.CompileUnit),
+ PrevLocTpl.Line,
+ PrevLocTpl.Col);
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
+ BuildMI(MBB, DL, II).addImm(LabelID);
+ }
+ } else {
+ // Record the source line.
+ unsigned Line = Subprogram.getLineNumber();
+ MF.setDefaultDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID(
+ CompileUnit.getGV(), Line, 0)));
+ if (DW && DW->ShouldEmitDwarfDebug()) {
+ // llvm.dbg.func_start also defines beginning of function scope.
+ DW->RecordRegionStart(cast<GlobalVariable>(FSI->getSubprogram()));
+ }
+ }
+
+ return true;
+ }
+ case Intrinsic::dbg_declare: {
+ DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+ Value *Variable = DI->getVariable();
+ if (DIDescriptor::ValidDebugInfo(Variable, CodeGenOpt::None) &&
+ DW && DW->ShouldEmitDwarfDebug()) {
+ // Determine the address of the declared object.
+ Value *Address = DI->getAddress();
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+ // Don't handle byval struct arguments or VLAs, for example.
+ if (!AI) break;
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ StaticAllocaMap.find(AI);
+ if (SI == StaticAllocaMap.end()) break; // VLAs.
+ int FI = SI->second;
+
+ // Determine the debug globalvariable.
+ GlobalValue *GV = cast<GlobalVariable>(Variable);
+
+ // Build the DECLARE instruction.
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE);
+ MachineInstr *DeclareMI
+ = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV);
+ DIVariable DV(cast<GlobalVariable>(GV));
+ if (!DV.isNull()) {
+ // This is a local variable
+ DW->RecordVariableScope(DV, DeclareMI);
+ }
+ }
+ return true;
+ }
+ case Intrinsic::eh_exception: {
+ MVT VT = TLI.getValueType(I->getType());
+ switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
+ default: break;
+ case TargetLowering::Expand: {
+ assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!");
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ Reg, RC, RC);
+ assert(InsertedCopy && "Can't copy address registers!");
+ InsertedCopy = InsertedCopy;
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ break;
+ }
+ case Intrinsic::eh_selector_i32:
+ case Intrinsic::eh_selector_i64: {
+ MVT VT = TLI.getValueType(I->getType());
+ switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
+ default: break;
+ case TargetLowering::Expand: {
+ MVT VT = (IID == Intrinsic::eh_selector_i32 ?
+ MVT::i32 : MVT::i64);
+
+ if (MMI) {
+ if (MBB->isLandingPad())
+ AddCatchInfo(*cast<CallInst>(I), MMI, MBB);
+ else {
+#ifndef NDEBUG
+ CatchInfoLost.insert(cast<CallInst>(I));
+#endif
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) MBB->addLiveIn(Reg);
+ }
+
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ Reg, RC, RC);
+ assert(InsertedCopy && "Can't copy address registers!");
+ InsertedCopy = InsertedCopy;
+ UpdateValueMap(I, ResultReg);
+ } else {
+ unsigned ResultReg =
+ getRegForValue(Constant::getNullValue(I->getType()));
+ UpdateValueMap(I, ResultReg);
+ }
+ return true;
+ }
+ }
+ break;
+ }
+ }
+ return false;
+}
+
+bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) {
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the destination type is legal. Or as a special case,
+ // it may be i1 if we're doing a truncate because that's
+ // easy and somewhat common.
+ if (!TLI.isTypeLegal(DstVT))
+ if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE)
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the source operand is legal. Or as a special case,
+ // it may be i1 if we're doing zero-extension because that's
+ // easy and somewhat common.
+ if (!TLI.isTypeLegal(SrcVT))
+ if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND)
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // If the operand is i1, arrange for the high bits in the register to be zero.
+ if (SrcVT == MVT::i1) {
+ SrcVT = TLI.getTypeToTransformTo(SrcVT);
+ InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg);
+ if (!InputReg)
+ return false;
+ }
+ // If the result is i1, truncate to the target's type for i1 first.
+ if (DstVT == MVT::i1)
+ DstVT = TLI.getTypeToTransformTo(DstVT);
+
+ unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
+ DstVT.getSimpleVT(),
+ Opcode,
+ InputReg);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::SelectBitCast(User *I) {
+ // If the bitcast doesn't change the type, just use the operand value.
+ if (I->getType() == I->getOperand(0)->getType()) {
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0)
+ return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+ DstVT == MVT::Other || !DstVT.isSimple() ||
+ !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (Op0 == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // First, try to perform the bitcast by inserting a reg-reg copy.
+ unsigned ResultReg = 0;
+ if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
+ TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+ TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ ResultReg = createResultReg(DstClass);
+
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ Op0, DstClass, SrcClass);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+
+ // If the reg-reg copy failed, select a BIT_CONVERT opcode.
+ if (!ResultReg)
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ ISD::BIT_CONVERT, Op0);
+
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool
+FastISel::SelectInstruction(Instruction *I) {
+ return SelectOperator(I, I->getOpcode());
+}
+
+/// FastEmitBranch - Emit an unconditional branch to the given block,
+/// unless it is the immediate (fall-through) successor, and update
+/// the CFG.
+void
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
+ MachineFunction::iterator NextMBB =
+ next(MachineFunction::iterator(MBB));
+
+ if (MBB->isLayoutSuccessor(MSucc)) {
+ // The unconditional fall-through case, which needs no instructions.
+ } else {
+ // The unconditional branch case.
+ TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>());
+ }
+ MBB->addSuccessor(MSucc);
+}
+
+bool
+FastISel::SelectOperator(User *I, unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Add: {
+ ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FADD : ISD::ADD;
+ return SelectBinaryOp(I, Opc);
+ }
+ case Instruction::Sub: {
+ ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FSUB : ISD::SUB;
+ return SelectBinaryOp(I, Opc);
+ }
+ case Instruction::Mul: {
+ ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FMUL : ISD::MUL;
+ return SelectBinaryOp(I, Opc);
+ }
+ case Instruction::SDiv:
+ return SelectBinaryOp(I, ISD::SDIV);
+ case Instruction::UDiv:
+ return SelectBinaryOp(I, ISD::UDIV);
+ case Instruction::FDiv:
+ return SelectBinaryOp(I, ISD::FDIV);
+ case Instruction::SRem:
+ return SelectBinaryOp(I, ISD::SREM);
+ case Instruction::URem:
+ return SelectBinaryOp(I, ISD::UREM);
+ case Instruction::FRem:
+ return SelectBinaryOp(I, ISD::FREM);
+ case Instruction::Shl:
+ return SelectBinaryOp(I, ISD::SHL);
+ case Instruction::LShr:
+ return SelectBinaryOp(I, ISD::SRL);
+ case Instruction::AShr:
+ return SelectBinaryOp(I, ISD::SRA);
+ case Instruction::And:
+ return SelectBinaryOp(I, ISD::AND);
+ case Instruction::Or:
+ return SelectBinaryOp(I, ISD::OR);
+ case Instruction::Xor:
+ return SelectBinaryOp(I, ISD::XOR);
+
+ case Instruction::GetElementPtr:
+ return SelectGetElementPtr(I);
+
+ case Instruction::Br: {
+ BranchInst *BI = cast<BranchInst>(I);
+
+ if (BI->isUnconditional()) {
+ BasicBlock *LLVMSucc = BI->getSuccessor(0);
+ MachineBasicBlock *MSucc = MBBMap[LLVMSucc];
+ FastEmitBranch(MSucc);
+ return true;
+ }
+
+ // Conditional branches are not handed yet.
+ // Halt "fast" selection and bail.
+ return false;
+ }
+
+ case Instruction::Unreachable:
+ // Nothing to emit.
+ return true;
+
+ case Instruction::PHI:
+ // PHI nodes are already emitted.
+ return true;
+
+ case Instruction::Alloca:
+ // FunctionLowering has the static-sized case covered.
+ if (StaticAllocaMap.count(cast<AllocaInst>(I)))
+ return true;
+
+ // Dynamic-sized alloca is not handled yet.
+ return false;
+
+ case Instruction::Call:
+ return SelectCall(I);
+
+ case Instruction::BitCast:
+ return SelectBitCast(I);
+
+ case Instruction::FPToSI:
+ return SelectCast(I, ISD::FP_TO_SINT);
+ case Instruction::ZExt:
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ case Instruction::SExt:
+ return SelectCast(I, ISD::SIGN_EXTEND);
+ case Instruction::Trunc:
+ return SelectCast(I, ISD::TRUNCATE);
+ case Instruction::SIToFP:
+ return SelectCast(I, ISD::SINT_TO_FP);
+
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return SelectCast(I, ISD::ZERO_EXTEND);
+ if (DstVT.bitsLT(SrcVT))
+ return SelectCast(I, ISD::TRUNCATE);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+
+ default:
+ // Unhandled instruction. Halt "fast" selection and bail.
+ return false;
+ }
+}
+
+FastISel::FastISel(MachineFunction &mf,
+ MachineModuleInfo *mmi,
+ DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &vm,
+ DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
+ DenseMap<const AllocaInst *, int> &am
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &cil
+#endif
+ )
+ : MBB(0),
+ ValueMap(vm),
+ MBBMap(bm),
+ StaticAllocaMap(am),
+#ifndef NDEBUG
+ CatchInfoLost(cil),
+#endif
+ MF(mf),
+ MMI(mmi),
+ DW(dw),
+ MRI(MF.getRegInfo()),
+ MFI(*MF.getFrameInfo()),
+ MCP(*MF.getConstantPool()),
+ TM(MF.getTarget()),
+ TD(*TM.getTargetData()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+}
+
+FastISel::~FastISel() {}
+
+unsigned FastISel::FastEmit_(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_r(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, unsigned /*Op0*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rr(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, unsigned /*Op0*/,
+ unsigned /*Op0*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_i(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_f(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_ri(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, unsigned /*Op0*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rf(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType, unsigned /*Op0*/,
+ ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType,
+ ISD::NodeType,
+ unsigned /*Op0*/, unsigned /*Op1*/,
+ uint64_t /*Imm*/) {
+ return 0;
+}
+
+/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// FastEmit_rr instead.
+unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
+ unsigned Op0, uint64_t Imm,
+ MVT::SimpleValueType ImmType) {
+ // First check if immediate type is legal. If not, we can't use the ri form.
+ unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm);
+ if (ResultReg != 0)
+ return ResultReg;
+ unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ if (MaterialReg == 0)
+ return 0;
+ return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+}
+
+/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with a floating-point immediate operand using
+/// FastEmit_rf. If that fails, it materializes the immediate into a register
+/// and try FastEmit_rr instead.
+unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode,
+ unsigned Op0, ConstantFP *FPImm,
+ MVT::SimpleValueType ImmType) {
+ // First check if immediate type is legal. If not, we can't use the rf form.
+ unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
+ if (ResultReg != 0)
+ return ResultReg;
+
+ // Materialize the constant in a register.
+ unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm);
+ if (MaterialReg == 0) {
+ // If the target doesn't have a way to directly enter a floating-point
+ // value into a register, use an alternate approach.
+ // TODO: The current approach only supports floating-point constants
+ // that can be constructed by conversion from integer values. This should
+ // be replaced by code that creates a load from a constant-pool entry,
+ // which will require some target-specific work.
+ const APFloat &Flt = FPImm->getValueAPF();
+ MVT IntVT = TLI.getPointerTy();
+
+ uint64_t x[2];
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ bool isExact;
+ (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
+ if (!isExact)
+ return 0;
+ APInt IntVal(IntBitWidth, 2, x);
+
+ unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(),
+ ISD::Constant, IntVal.getZExtValue());
+ if (IntegerReg == 0)
+ return 0;
+ MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
+ ISD::SINT_TO_FP, IntegerReg);
+ if (MaterialReg == 0)
+ return 0;
+ }
+ return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+ return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ BuildMI(MBB, DL, II, ResultReg);
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, unsigned Op1) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, unsigned Op1, uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addImm(Imm);
+ else {
+ BuildMI(MBB, DL, II).addImm(Imm);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT,
+ unsigned Op0, uint32_t Idx) {
+ const TargetRegisterClass* RC = MRI.getRegClass(Op0);
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::EXTRACT_SUBREG);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx);
+ else {
+ BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx);
+ bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ II.ImplicitDefs[0], RC, RC);
+ if (!InsertedCopy)
+ ResultReg = 0;
+ }
+ return ResultReg;
+}
+
+/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
+/// with all but the least significant bit set to zero.
+unsigned FastISel::FastEmitZExtFromI1(MVT::SimpleValueType VT, unsigned Op) {
+ return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 0000000..2cd67e6
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,3091 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <map>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it. This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing. For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class VISIBILITY_HIDDEN SelectionDAGLegalize {
+ TargetLowering &TLI;
+ SelectionDAG &DAG;
+ CodeGenOpt::Level OptLevel;
+
+ // Libcall insertion helpers.
+
+ /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+ /// legalized. We use this to ensure that calls are properly serialized
+ /// against each other, including inserted libcalls.
+ SDValue LastCALLSEQ_END;
+
+ /// IsLegalizingCall - This member is used *only* for purposes of providing
+ /// helpful assertions that a libcall isn't created while another call is
+ /// being legalized (which could lead to non-serialized call sequences).
+ bool IsLegalizingCall;
+
+ enum LegalizeAction {
+ Legal, // The target natively supports this operation.
+ Promote, // This operation should be executed in a larger type.
+ Expand // Try to expand this to other ops, otherwise use a libcall.
+ };
+
+ /// ValueTypeActions - This is a bitvector that contains two bits for each
+ /// value type, where the two bits correspond to the LegalizeAction enum.
+ /// This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// LegalizedNodes - For nodes that are of legal width, and that have more
+ /// than one use, this map indicates what regularized operand to use. This
+ /// allows us to avoid legalizing the same thing more than once.
+ DenseMap<SDValue, SDValue> LegalizedNodes;
+
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+public:
+ SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol);
+
+ /// getTypeAction - Return how we should legalize values of this type, either
+ /// it is already legal or we need to expand it into multiple registers of
+ /// smaller integer type, or we need to promote it to a larger type.
+ LegalizeAction getTypeAction(MVT VT) const {
+ return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+ }
+
+ /// isTypeLegal - Return true if this type is legal on this target.
+ ///
+ bool isTypeLegal(MVT VT) const {
+ return getTypeAction(VT) == Legal;
+ }
+
+ void LegalizeDAG();
+
+private:
+ /// LegalizeOp - We know that the specified value has a legal type.
+ /// Recursively ensure that the operands have legal types, then return the
+ /// result.
+ SDValue LegalizeOp(SDValue O);
+
+ /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+ /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+ /// is necessary to spill the vector being inserted into to memory, perform
+ /// the insert there, and then read the result back.
+ SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+ SDValue Idx, DebugLoc dl);
+
+ /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+ /// performs the same shuffe in terms of order or result bytes, but on a type
+ /// whose vector element type is narrower than the original shuffle type.
+ /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+ SDValue ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ SmallVectorImpl<int> &Mask) const;
+
+ bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+
+ void LegalizeSetCCCondCode(MVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+ DebugLoc dl);
+
+ SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128);
+
+ SDValue EmitStackConvert(SDValue SrcOp, MVT SlotVT, MVT DestVT, DebugLoc dl);
+ SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+ SDValue ExpandDBG_STOPPOINT(SDNode *Node);
+ void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results);
+ SDValue ExpandFCOPYSIGN(SDNode *Node);
+ SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, MVT DestVT,
+ DebugLoc dl);
+ SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, MVT DestVT, bool isSigned,
+ DebugLoc dl);
+ SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, MVT DestVT, bool isSigned,
+ DebugLoc dl);
+
+ SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);
+ SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
+
+ SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+
+ void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+};
+}
+
+/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+/// performs the same shuffe in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl,
+ SDValue N1, SDValue N2,
+ SmallVectorImpl<int> &Mask) const {
+ MVT EltVT = NVT.getVectorElementType();
+ unsigned NumMaskElts = VT.getVectorNumElements();
+ unsigned NumDestElts = NVT.getVectorNumElements();
+ unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+ assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+ if (NumEltsGrowth == 1)
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
+
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ int Idx = Mask[i];
+ for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+ if (Idx < 0)
+ NewMask.push_back(-1);
+ else
+ NewMask.push_back(Idx * NumEltsGrowth + j);
+ }
+ }
+ assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+ assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
+ CodeGenOpt::Level ol)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ assert(MVT::LAST_VALUETYPE <= 32 &&
+ "Too many value types for ValueTypeActions to hold!");
+}
+
+void SelectionDAGLegalize::LegalizeDAG() {
+ LastCALLSEQ_END = DAG.getEntryNode();
+ IsLegalizingCall = false;
+
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != next(E); ++I)
+ LegalizeOp(SDValue(I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+}
+
+
+/// FindCallEndFromCallStart - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_END node that terminates the call sequence.
+static SDNode *FindCallEndFromCallStart(SDNode *Node) {
+ if (Node->getOpcode() == ISD::CALLSEQ_END)
+ return Node;
+ if (Node->use_empty())
+ return 0; // No CallSeqEnd
+
+ // The chain is usually at the end.
+ SDValue TheChain(Node, Node->getNumValues()-1);
+ if (TheChain.getValueType() != MVT::Other) {
+ // Sometimes it's at the beginning.
+ TheChain = SDValue(Node, 0);
+ if (TheChain.getValueType() != MVT::Other) {
+ // Otherwise, hunt for it.
+ for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
+ if (Node->getValueType(i) == MVT::Other) {
+ TheChain = SDValue(Node, i);
+ break;
+ }
+
+ // Otherwise, we walked into a node without a chain.
+ if (TheChain.getValueType() != MVT::Other)
+ return 0;
+ }
+ }
+
+ for (SDNode::use_iterator UI = Node->use_begin(),
+ E = Node->use_end(); UI != E; ++UI) {
+
+ // Make sure to only follow users of our token chain.
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+ if (User->getOperand(i) == TheChain)
+ if (SDNode *Result = FindCallEndFromCallStart(User))
+ return Result;
+ }
+ return 0;
+}
+
+/// FindCallStartFromCallEnd - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_START node that initiates the call sequence.
+static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+ assert(Node && "Didn't find callseq_start for a call??");
+ if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
+
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ return FindCallStartFromCallEnd(Node->getOperand(0).getNode());
+}
+
+/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
+/// see if any uses can reach Dest. If no dest operands can get to dest,
+/// legalize them, legalize ourself, and return false, otherwise, return true.
+///
+/// Keep track of the nodes we fine that actually do lead to Dest in
+/// NodesLeadingTo. This avoids retraversing them exponential number of times.
+///
+bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
+ if (N == Dest) return true; // N certainly leads to Dest :)
+
+ // If we've already processed this node and it does lead to Dest, there is no
+ // need to reprocess it.
+ if (NodesLeadingTo.count(N)) return true;
+
+ // If the first result of this node has been already legalized, then it cannot
+ // reach N.
+ if (LegalizedNodes.count(SDValue(N, 0))) return false;
+
+ // Okay, this node has not already been legalized. Check and legalize all
+ // operands. If none lead to Dest, then we can legalize this node.
+ bool OperandsLeadToDest = false;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ OperandsLeadToDest |= // If an operand leads to Dest, so do we.
+ LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo);
+
+ if (OperandsLeadToDest) {
+ NodesLeadingTo.insert(N);
+ return true;
+ }
+
+ // Okay, this node looks safe, legalize it and return false.
+ LegalizeOp(SDValue(N, 0));
+ return false;
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ bool Extend = false;
+ DebugLoc dl = CFP->getDebugLoc();
+
+ // If a FP immediate is precise when represented as a float and if the
+ // target can do an extending load from float to double, we put it into
+ // the constant pool as a float, even if it's is statically typed as a
+ // double. This shrinks FP constants and canonicalizes them for targets where
+ // an FP extending load is the same cost as a normal load (such as on the x87
+ // fp stack or PPC FP unit).
+ MVT VT = CFP->getValueType(0);
+ ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+ if (!UseCP) {
+ assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+ return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),
+ (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+ }
+
+ MVT OrigVT = VT;
+ MVT SVT = VT;
+ while (SVT != MVT::f32) {
+ SVT = (MVT::SimpleValueType)(SVT.getSimpleVT() - 1);
+ if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) &&
+ // Only do this if the target has a native EXTLOAD instruction from
+ // smaller type.
+ TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
+ TLI.ShouldShrinkFPConstant(OrigVT)) {
+ const Type *SType = SVT.getTypeForMVT();
+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ VT = SVT;
+ Extend = true;
+ }
+ }
+
+ SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ if (Extend)
+ return DAG.getExtLoad(ISD::EXTLOAD, dl,
+ OrigVT, DAG.getEntryNode(),
+ CPIdx, PseudoSourceValue::getConstantPool(),
+ 0, VT, false, Alignment);
+ return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0, false, Alignment);
+}
+
+/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
+static
+SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Val = ST->getValue();
+ MVT VT = Val.getValueType();
+ int Alignment = ST->getAlignment();
+ int SVOffset = ST->getSrcValueOffset();
+ DebugLoc dl = ST->getDebugLoc();
+ if (ST->getMemoryVT().isFloatingPoint() ||
+ ST->getMemoryVT().isVector()) {
+ MVT intVT = MVT::getIntegerVT(VT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT)) {
+ // Expand to a bitconvert of the value to the integer type of the
+ // same size, then a (misaligned) int store.
+ // FIXME: Does not handle truncating floating point stores!
+ SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
+ return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
+ SVOffset, ST->isVolatile(), Alignment);
+ } else {
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ MVT StoredVT = ST->getMemoryVT();
+ MVT RegVT =
+ TLI.getRegisterType(MVT::getIntegerVT(StoredVT.getSizeInBits()));
+ unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(Chain, dl,
+ Val, StackPtr, NULL, 0, StoredVT);
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0);
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getSrcValue(), SVOffset + Offset,
+ ST->isVolatile(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ MVT MemVT = MVT::getIntegerVT(8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ NULL, 0, MemVT);
+
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getSrcValue(), SVOffset + Offset,
+ MemVT, ST->isVolatile(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+ }
+ }
+ assert(ST->getMemoryVT().isInteger() &&
+ !ST->getMemoryVT().isVector() &&
+ "Unaligned store of unknown type.");
+ // Get the half-size VT
+ MVT NewStoredVT =
+ (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT() - 1);
+ int NumBits = NewStoredVT.getSizeInBits();
+ int IncrementSize = NumBits / 8;
+
+ // Divide the stored value in two parts.
+ SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+ SDValue Lo = Val;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+ // Store the two parts
+ SDValue Store1, Store2;
+ Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
+ ST->getSrcValue(), SVOffset, NewStoredVT,
+ ST->isVolatile(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Alignment = MinAlign(Alignment, IncrementSize);
+ Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
+ ST->getSrcValue(), SVOffset + IncrementSize,
+ NewStoredVT, ST->isVolatile(), Alignment);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+}
+
+/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
+static
+SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ int SVOffset = LD->getSrcValueOffset();
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ MVT VT = LD->getValueType(0);
+ MVT LoadedVT = LD->getMemoryVT();
+ DebugLoc dl = LD->getDebugLoc();
+ if (VT.isFloatingPoint() || VT.isVector()) {
+ MVT intVT = MVT::getIntegerVT(LoadedVT.getSizeInBits());
+ if (TLI.isTypeLegal(intVT)) {
+ // Expand to a (misaligned) integer load of the same size,
+ // then bitconvert to floating point or vector.
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
+ SVOffset, LD->isVolatile(),
+ LD->getAlignment());
+ SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
+ if (VT.isFloatingPoint() && LoadedVT != VT)
+ Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+
+ SDValue Ops[] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, dl);
+ } else {
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ MVT RegVT = TLI.getRegisterType(intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
+ SVOffset + Offset, LD->isVolatile(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ NULL, 0));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ }
+
+ // The last copy may be partial. Do an extending load.
+ MVT MemVT = MVT::getIntegerVT(8 * (LoadedBytes - Offset));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getSrcValue(), SVOffset + Offset,
+ MemVT, LD->isVolatile(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ NULL, 0, MemVT));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ NULL, 0, LoadedVT);
+
+ // Callers expect a MERGE_VALUES node.
+ SDValue Ops[] = { Load, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+ }
+ assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+ "Unaligned load of unsupported type.");
+
+ // Compute the new VT that is half the size of the old one. This is an
+ // integer MVT.
+ unsigned NumBits = LoadedVT.getSizeInBits();
+ MVT NewLoadedVT;
+ NewLoadedVT = MVT::getIntegerVT(NumBits/2);
+ NumBits >>= 1;
+
+ unsigned Alignment = LD->getAlignment();
+ unsigned IncrementSize = NumBits / 8;
+ ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+ // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+ if (HiExtType == ISD::NON_EXTLOAD)
+ HiExtType = ISD::ZEXTLOAD;
+
+ // Load the value in two parts
+ SDValue Lo, Hi;
+ if (TLI.isLittleEndian()) {
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // aggregate the two parts
+ SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+ Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ SDValue Ops[] = { Result, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::
+PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+ DebugLoc dl) {
+ SDValue Tmp1 = Vec;
+ SDValue Tmp2 = Val;
+ SDValue Tmp3 = Idx;
+
+ // If the target doesn't support this, we have to spill the input vector
+ // to a temporary stack slot, update the element, then reload it. This is
+ // badness. We could also load the value into a vector register (either
+ // with a "move to register" or "extload into register" instruction, then
+ // permute it into place, if the idx is a constant and if the idx is
+ // supported by the target.
+ MVT VT = Tmp1.getValueType();
+ MVT EltVT = VT.getVectorElementType();
+ MVT IdxVT = Tmp3.getValueType();
+ MVT PtrVT = TLI.getPointerTy();
+ SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+ // Store the vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0);
+
+ // Truncate or zero extend offset to target pointer type.
+ unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+ // Add the offset to the index.
+ unsigned EltSize = EltVT.getSizeInBits()/8;
+ Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+ SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+ // Store the scalar value.
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
+ PseudoSourceValue::getFixedStack(SPFI), 0, EltVT);
+ // Load the updated vector.
+ return DAG.getLoad(VT, dl, Ch, StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0);
+}
+
+
+SDValue SelectionDAGLegalize::
+ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
+ if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+ // SCALAR_TO_VECTOR requires that the type of the value being inserted
+ // match the element type of the vector being created, except for
+ // integers in which case the inserted value can be over width.
+ MVT EltVT = Vec.getValueType().getVectorElementType();
+ if (Val.getValueType() == EltVT ||
+ (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+ SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ Vec.getValueType(), Val);
+
+ unsigned NumElts = Vec.getValueType().getVectorNumElements();
+ // We generate a shuffle of InVec and ScVec, so the shuffle mask
+ // should be 0,1,2,3,4,5... with the appropriate element replaced with
+ // elt 0 of the RHS.
+ SmallVector<int, 8> ShufOps;
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+ return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec,
+ &ShufOps[0]);
+ }
+ }
+ return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+/// LegalizeOp - We know that the specified value has a legal type, and
+/// that its operands are legal. Now ensure that the operation itself
+/// is legal, recursively ensuring that the operands' operations remain
+/// legal.
+SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
+ if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ return Op;
+
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ assert(getTypeAction(Node->getValueType(i)) == Legal &&
+ "Unexpected illegal type!");
+
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
+ Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
+ "Unexpected illegal type!");
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ SDValue Result = Op;
+ bool isCustom = false;
+
+ // Figure out the correct action; the way to query this varies by opcode
+ TargetLowering::LegalizeAction Action;
+ bool SimpleFinishLegalizing = true;
+ switch (Node->getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::VAARG:
+ case ISD::STACKSAVE:
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::EXTRACT_VECTOR_ELT:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+ break;
+ }
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::BR_CC: {
+ unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+ Node->getOpcode() == ISD::SETCC ? 2 : 1;
+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+ MVT OpVT = Node->getOperand(CompareOperand).getValueType();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal) {
+ if (Node->getOpcode() == ISD::SELECT_CC)
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ else
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ }
+ break;
+ }
+ case ISD::LOAD:
+ case ISD::STORE:
+ // FIXME: Model these properly. LOAD and STORE are complicated, and
+ // STORE expects the unlegalized operand in some cases.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ // FIXME: This shouldn't be necessary. These nodes have special properties
+ // dealing with the recursive nature of legalization. Removing this
+ // special case should be done as part of making LegalizeDAG non-recursive.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::CALL:
+ // FIXME: Legalization for calls requires custom-lowering the call before
+ // legalizing the operands! (I haven't looked into precisely why.)
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ case ISD::FLT_ROUNDS_:
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ case ISD::UADDO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ case ISD::FPOWI:
+ case ISD::MERGE_VALUES:
+ case ISD::EH_RETURN:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be expanded.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+ case ISD::TRAMPOLINE:
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FORMAL_ARGUMENTS:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be custom-lowered.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Custom;
+ break;
+ case ISD::BUILD_VECTOR:
+ // A weird case: legalization for BUILD_VECTOR never legalizes the
+ // operands!
+ // FIXME: This really sucks... changing it isn't semantically incorrect,
+ // but it massively pessimizes the code for floating-point BUILD_VECTORs
+ // because ConstantFP operands get legalized into constant pool loads
+ // before the BUILD_VECTOR code can see them. It doesn't usually bite,
+ // though, because BUILD_VECTORS usually get lowered into other nodes
+ // which get legalized properly.
+ SimpleFinishLegalizing = false;
+ break;
+ default:
+ if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+ Action = TargetLowering::Legal;
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ }
+ break;
+ }
+
+ if (SimpleFinishLegalizing) {
+ SmallVector<SDValue, 8> Ops, ResultVals;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::BR:
+ case ISD::BRIND:
+ case ISD::BR_JT:
+ case ISD::BR_CC:
+ case ISD::BRCOND:
+ case ISD::RET:
+ // Branches tweak the chain to include LastCALLSEQ_END
+ Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
+ LastCALLSEQ_END);
+ Ops[0] = LegalizeOp(Ops[0]);
+ LastCALLSEQ_END = DAG.getEntryNode();
+ break;
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Ops[1].getValueType().isVector())
+ Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));
+ break;
+ }
+
+ Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(),
+ Ops.size());
+ switch (Action) {
+ case TargetLowering::Legal:
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ ResultVals.push_back(Result.getValue(i));
+ break;
+ case TargetLowering::Custom:
+ // FIXME: The handling for custom lowering with multiple results is
+ // a complete mess.
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.getNode()) {
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+ if (e == 1)
+ ResultVals.push_back(Tmp1);
+ else
+ ResultVals.push_back(Tmp1.getValue(i));
+ }
+ break;
+ }
+
+ // FALL THROUGH
+ case TargetLowering::Expand:
+ ExpandNode(Result.getNode(), ResultVals);
+ break;
+ case TargetLowering::Promote:
+ PromoteNode(Result.getNode(), ResultVals);
+ break;
+ }
+ if (!ResultVals.empty()) {
+ for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
+ if (ResultVals[i] != SDValue(Node, i))
+ ResultVals[i] = LegalizeOp(ResultVals[i]);
+ AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
+ }
+ return ResultVals[Op.getResNo()];
+ }
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to legalize this operator!");
+ abort();
+ case ISD::CALL:
+ // The only option for this is to custom lower it.
+ Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG);
+ assert(Tmp3.getNode() && "Target didn't custom lower this node!");
+ // A call within a calling sequence must be legalized to something
+ // other than the normal CALLSEQ_END. Violating this gets Legalize
+ // into an infinite loop.
+ assert ((!IsLegalizingCall ||
+ Node->getOpcode() != ISD::CALL ||
+ Tmp3.getNode()->getOpcode() != ISD::CALLSEQ_END) &&
+ "Nested CALLSEQ_START..CALLSEQ_END not supported.");
+
+ // The number of incoming and outgoing values should match; unless the final
+ // outgoing value is a flag.
+ assert((Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() ||
+ (Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() + 1 &&
+ Tmp3.getNode()->getValueType(Tmp3.getNode()->getNumValues() - 1) ==
+ MVT::Flag)) &&
+ "Lowering call/formal_arguments produced unexpected # results!");
+
+ // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to
+ // remember that we legalized all of them, so it doesn't get relegalized.
+ for (unsigned i = 0, e = Tmp3.getNode()->getNumValues(); i != e; ++i) {
+ if (Tmp3.getNode()->getValueType(i) == MVT::Flag)
+ continue;
+ Tmp1 = LegalizeOp(Tmp3.getValue(i));
+ if (Op.getResNo() == i)
+ Tmp2 = Tmp1;
+ AddLegalizedOperand(SDValue(Node, i), Tmp1);
+ }
+ return Tmp2;
+ case ISD::BUILD_VECTOR:
+ switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Result = Tmp3;
+ break;
+ }
+ // FALLTHROUGH
+ case TargetLowering::Expand:
+ Result = ExpandBUILD_VECTOR(Result.getNode());
+ break;
+ }
+ break;
+ case ISD::CALLSEQ_START: {
+ SDNode *CallEnd = FindCallEndFromCallStart(Node);
+
+ // Recursively Legalize all of the inputs of the call end that do not lead
+ // to this call start. This ensures that any libcalls that need be inserted
+ // are inserted *before* the CALLSEQ_START.
+ {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
+ for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
+ LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
+ NodesLeadingTo);
+ }
+
+ // Now that we legalized all of the inputs (which may have inserted
+ // libcalls) create the new CALLSEQ_START node.
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+
+ // Merge in the last call, to ensure that this call start after the last
+ // call ended.
+ if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+ Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Tmp1, LastCALLSEQ_END);
+ Tmp1 = LegalizeOp(Tmp1);
+ }
+
+ // Do not try to legalize the target-specific arguments (#1+).
+ if (Tmp1 != Node->getOperand(0)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+
+ // Remember that the CALLSEQ_START is legalized.
+ AddLegalizedOperand(Op.getValue(0), Result);
+ if (Node->getNumValues() == 2) // If this has a flag result, remember it.
+ AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+
+ // Now that the callseq_start and all of the non-call nodes above this call
+ // sequence have been legalized, legalize the call itself. During this
+ // process, no libcalls can/will be inserted, guaranteeing that no calls
+ // can overlap.
+ assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+ // Note that we are selecting this call!
+ LastCALLSEQ_END = SDValue(CallEnd, 0);
+ IsLegalizingCall = true;
+
+ // Legalize the call, starting from the CALLSEQ_END.
+ LegalizeOp(LastCALLSEQ_END);
+ assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+ return Result;
+ }
+ case ISD::CALLSEQ_END:
+ // If the CALLSEQ_START node hasn't been legalized first, legalize it. This
+ // will cause this node to be legalized as well as handling libcalls right.
+ if (LastCALLSEQ_END.getNode() != Node) {
+ LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ assert(I != LegalizedNodes.end() &&
+ "Legalizing the call start should have legalized this node!");
+ return I->second;
+ }
+
+ // Otherwise, the call start has been legalized and everything is going
+ // according to plan. Just legalize ourselves normally here.
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ // Do not try to legalize the target-specific arguments (#1+), except for
+ // an optional flag input.
+ if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+ if (Tmp1 != Node->getOperand(0)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+ } else {
+ Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
+ if (Tmp1 != Node->getOperand(0) ||
+ Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
+ SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Ops.back() = Tmp2;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+ }
+ assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
+ // This finishes up call legalization.
+ IsLegalizingCall = false;
+
+ // If the CALLSEQ_END node has a flag, remember that we legalized it.
+ AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
+ if (Node->getNumValues() == 2)
+ AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
+ return Result.getValue(Op.getResNo());
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain.
+ Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ MVT VT = Node->getValueType(0);
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Tmp3 = Result.getValue(0);
+ Tmp4 = Result.getValue(1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses()) {
+ unsigned ABIAlignment = TLI.getTargetData()->
+ getABITypeAlignment(LD->getMemoryVT().getTypeForMVT());
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG,
+ TLI);
+ Tmp3 = Result.getOperand(0);
+ Tmp4 = Result.getOperand(1);
+ Tmp3 = LegalizeOp(Tmp3);
+ Tmp4 = LegalizeOp(Tmp4);
+ }
+ }
+ break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Tmp3, DAG);
+ if (Tmp1.getNode()) {
+ Tmp3 = LegalizeOp(Tmp1);
+ Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ }
+ break;
+ case TargetLowering::Promote: {
+ // Only promote a load of vector type to another.
+ assert(VT.isVector() && "Cannot promote this load!");
+ // Change base type to a different vector type.
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+ Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
+ Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ break;
+ }
+ }
+ // Since loads produce two values, make sure to remember that we
+ // legalized both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp3);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp4);
+ return Op.getResNo() ? Tmp4 : Tmp3;
+ } else {
+ MVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ MVT NVT = MVT::getIntegerVT(NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
+ NVT, isVolatile, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(SrcVT.isExtended() && !SrcVT.isVector() &&
+ "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ MVT RoundVT = MVT::getIntegerVT(RoundWidth);
+ MVT ExtraVT = MVT::getIntegerVT(ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
+ Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
+ Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset + IncrementSize,
+ ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
+ Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
+ Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), SVOffset + IncrementSize,
+ ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else {
+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+
+ if (isCustom) {
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Tmp1 = LegalizeOp(Tmp3);
+ Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses()) {
+ unsigned ABIAlignment = TLI.getTargetData()->
+ getABITypeAlignment(LD->getMemoryVT().getTypeForMVT());
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG,
+ TLI);
+ Tmp1 = Result.getOperand(0);
+ Tmp2 = Result.getOperand(1);
+ Tmp1 = LegalizeOp(Tmp1);
+ Tmp2 = LegalizeOp(Tmp2);
+ }
+ }
+ }
+ break;
+ case TargetLowering::Expand:
+ // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
+ if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ Result = DAG.getNode(ISD::FP_EXTEND, dl,
+ Node->getValueType(0), Load);
+ Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Load.getValue(1));
+ break;
+ }
+ assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(), SrcVT,
+ LD->isVolatile(), LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ break;
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+ return Op.getResNo() ? Tmp2 : Tmp1;
+ }
+ }
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
+ Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+
+ if (!ST->isTruncatingStore()) {
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ // FIXME: We shouldn't do this for TargetConstantFP's.
+ // FIXME: move this to the DAG Combiner! Note that we can't regress due
+ // to phase ordering between legalized code and the dag combiner. This
+ // probably means that we need to integrate dag combiner and legalizer
+ // together.
+ // We generally can't do this one for long doubles.
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+ if (CFP->getValueType(0) == MVT::f32 &&
+ getTypeAction(MVT::i32) == Legal) {
+ Tmp3 = DAG.getConstant(CFP->getValueAPF().
+ bitcastToAPInt().zextOrTrunc(32),
+ MVT::i32);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ break;
+ } else if (CFP->getValueType(0) == MVT::f64) {
+ // If this target supports 64-bit registers, do a single 64-bit store.
+ if (getTypeAction(MVT::i64) == Legal) {
+ Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ zextOrTrunc(64), MVT::i64);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ break;
+ } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+ // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+ // stores. If the target supports neither 32- nor 64-bits, this
+ // xform is certainly not worth it.
+ const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+ SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+ if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(4));
+ Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+ isVolatile, MinAlign(Alignment, 4U));
+
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ break;
+ }
+ }
+ }
+
+ {
+ Tmp3 = LegalizeOp(ST->getValue());
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+ ST->getOffset());
+
+ MVT VT = Tmp3.getValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses()) {
+ unsigned ABIAlignment = TLI.getTargetData()->
+ getABITypeAlignment(ST->getMemoryVT().getTypeForMVT());
+ if (ST->getAlignment() < ABIAlignment)
+ Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG,
+ TLI);
+ }
+ break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.getNode()) Result = Tmp1;
+ break;
+ case TargetLowering::Promote:
+ assert(VT.isVector() && "Unknown legal promote case!");
+ Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getSrcValue(), SVOffset, isVolatile,
+ Alignment);
+ break;
+ }
+ break;
+ }
+ } else {
+ Tmp3 = LegalizeOp(ST->getValue());
+
+ MVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ MVT NVT = MVT::getIntegerVT(StVT.getStoreSizeInBits());
+ Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
+ Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, NVT, isVolatile, Alignment);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(StVT.isExtended() && !StVT.isVector() &&
+ "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ MVT RoundVT = MVT::getIntegerVT(RoundWidth);
+ MVT ExtraVT = MVT::getIntegerVT(ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, RoundVT,
+ isVolatile, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+ DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
+ SVOffset + IncrementSize, ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+ DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
+ SVOffset, RoundVT, isVolatile, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset + IncrementSize, ExtraVT, isVolatile,
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // The order of the stores doesn't matter.
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ } else {
+ if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
+ Tmp2 != ST->getBasePtr())
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+ ST->getOffset());
+
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses()) {
+ unsigned ABIAlignment = TLI.getTargetData()->
+ getABITypeAlignment(ST->getMemoryVT().getTypeForMVT());
+ if (ST->getAlignment() < ABIAlignment)
+ Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG,
+ TLI);
+ }
+ break;
+ case TargetLowering::Custom:
+ Result = TLI.LowerOperation(Result, DAG);
+ break;
+ case Expand:
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
+ Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ break;
+ }
+ }
+ }
+ break;
+ }
+ }
+ assert(Result.getValueType() == Op.getValueType() &&
+ "Bad legalization!");
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op)
+ Result = LegalizeOp(Result);
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+ else
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+ assert((Tmp2.getValueType() == MVT::f32 ||
+ Tmp2.getValueType() == MVT::f64) &&
+ "Ugly special-cased code!");
+ // Get the sign bit of the RHS.
+ SDValue SignBit;
+ MVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32;
+ if (isTypeLegal(IVT)) {
+ SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
+ } else {
+ assert(isTypeLegal(TLI.getPointerTy()) &&
+ (TLI.getPointerTy() == MVT::i32 ||
+ TLI.getPointerTy() == MVT::i64) &&
+ "Legal type for load?!");
+ SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType());
+ SDValue StorePtr = StackPtr, LoadPtr = StackPtr;
+ SDValue Ch =
+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0);
+ if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian())
+ LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(),
+ LoadPtr, DAG.getIntPtrConstant(4));
+ SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(),
+ Ch, LoadPtr, NULL, 0, MVT::i32);
+ }
+ SignBit =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
+ SignBit, DAG.getConstant(0, SignBit.getValueType()),
+ ISD::SETLT);
+ // Get the absolute value of the result.
+ SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
+ // Select between the nabs and abs value based on the sign bit of
+ // the input.
+ return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit,
+ DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+ AbsVal);
+}
+
+SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ DwarfWriter *DW = DAG.getDwarfWriter();
+ bool useDEBUG_LOC = TLI.isOperationLegalOrCustom(ISD::DEBUG_LOC,
+ MVT::Other);
+ bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other);
+
+ const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(Node);
+ GlobalVariable *CU_GV = cast<GlobalVariable>(DSP->getCompileUnit());
+ if (DW && (useDEBUG_LOC || useLABEL) && !CU_GV->isDeclaration()) {
+ DICompileUnit CU(cast<GlobalVariable>(DSP->getCompileUnit()));
+
+ unsigned Line = DSP->getLine();
+ unsigned Col = DSP->getColumn();
+
+ if (OptLevel == CodeGenOpt::None) {
+ // A bit self-referential to have DebugLoc on Debug_Loc nodes, but it
+ // won't hurt anything.
+ if (useDEBUG_LOC) {
+ return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0),
+ DAG.getConstant(Line, MVT::i32),
+ DAG.getConstant(Col, MVT::i32),
+ DAG.getSrcValue(CU.getGV()));
+ } else {
+ unsigned ID = DW->RecordSourceLine(Line, Col, CU);
+ return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID);
+ }
+ }
+ }
+ return Node->getOperand(0);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+ " not tell us which reg is the stack pointer!");
+ DebugLoc dl = Node->getDebugLoc();
+ MVT VT = Node->getValueType(0);
+ SDValue Tmp1 = SDValue(Node, 0);
+ SDValue Tmp2 = SDValue(Node, 1);
+ SDValue Tmp3 = Node->getOperand(2);
+ SDValue Chain = Tmp1.getOperand(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+ SDValue Size = Tmp2.getOperand(1);
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+ unsigned StackAlign =
+ TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+ if (Align > StackAlign)
+ SP = DAG.getNode(ISD::AND, dl, VT, SP,
+ DAG.getConstant(-(uint64_t)Align, VT));
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
+
+ Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp2);
+}
+
+/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
+/// condition code CC on the current target. This routine assumes LHS and rHS
+/// have already been legalized by LegalizeSetCCOperands. It expands SETCC with
+/// illegal condition code into AND / OR of multiple SETCC values.
+void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT,
+ SDValue &LHS, SDValue &RHS,
+ SDValue &CC,
+ DebugLoc dl) {
+ MVT OpVT = LHS.getValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+ default: assert(0 && "Unknown condition code action!");
+ case TargetLowering::Legal:
+ // Nothing to do.
+ break;
+ case TargetLowering::Expand: {
+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ unsigned Opc = 0;
+ switch (CCCode) {
+ default: assert(0 && "Don't know how to expand this condition!"); abort();
+ case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break;
+ case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
+ // FIXME: Implement more expansions.
+ }
+
+ SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+ SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ RHS = SDValue();
+ CC = SDValue();
+ break;
+ }
+ }
+}
+
+/// EmitStackConvert - Emit a store/load combination to the stack. This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
+ MVT SlotVT,
+ MVT DestVT,
+ DebugLoc dl) {
+ // Create the stack frame object.
+ unsigned SrcAlign =
+ TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
+ getTypeForMVT());
+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+ int SPFI = StackPtrFI->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+
+ unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+ unsigned SlotSize = SlotVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+ unsigned DestAlign =
+ TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForMVT());
+
+ // Emit a store to the stack slot. Use a truncstore if the input value is
+ // later than DestVT.
+ SDValue Store;
+
+ if (SrcSize > SlotSize)
+ Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ SV, 0, SlotVT, false, SrcAlign);
+ else {
+ assert(SrcSize == SlotSize && "Invalid store");
+ Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+ SV, 0, false, SrcAlign);
+ }
+
+ // Result is a load from the stack slot.
+ if (SlotSize == DestSize)
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign);
+
+ assert(SlotSize < DestSize && "Unknown extension!");
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT,
+ false, DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+ DebugLoc dl = Node->getDebugLoc();
+ // Create a vector sized/aligned stack slot, store the value to element #0,
+ // then load the whole vector back out.
+ SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+ int SPFI = StackPtrFI->getIndex();
+
+ SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
+ StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0,
+ Node->getValueType(0).getVectorElementType());
+ return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
+ PseudoSourceValue::getFixedStack(SPFI), 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+ unsigned NumElems = Node->getNumOperands();
+ SDValue SplatValue = Node->getOperand(0);
+ DebugLoc dl = Node->getDebugLoc();
+ MVT VT = Node->getValueType(0);
+ MVT OpVT = SplatValue.getValueType();
+ MVT EltVT = VT.getVectorElementType();
+
+ // If the only non-undef value is the low element, turn this into a
+ // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
+ bool isOnlyLowElement = true;
+
+ // FIXME: it would be far nicer to change this into map<SDValue,uint64_t>
+ // and use a bitmask instead of a list of elements.
+ // FIXME: this doesn't treat <0, u, 0, u> for example, as a splat.
+ std::map<SDValue, std::vector<unsigned> > Values;
+ Values[SplatValue].push_back(0);
+ bool isConstant = true;
+ if (!isa<ConstantFPSDNode>(SplatValue) && !isa<ConstantSDNode>(SplatValue) &&
+ SplatValue.getOpcode() != ISD::UNDEF)
+ isConstant = false;
+
+ for (unsigned i = 1; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ Values[V].push_back(i);
+ if (V.getOpcode() != ISD::UNDEF)
+ isOnlyLowElement = false;
+ if (SplatValue != V)
+ SplatValue = SDValue(0, 0);
+
+ // If this isn't a constant element or an undef, we can't use a constant
+ // pool load.
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V) &&
+ V.getOpcode() != ISD::UNDEF)
+ isConstant = false;
+ }
+
+ if (isOnlyLowElement) {
+ // If the low element is an undef too, then this whole things is an undef.
+ if (Node->getOperand(0).getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+ // Otherwise, turn this into a scalar_to_vector node.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+ }
+
+ // If all elements are constants, create a load from the constant pool.
+ if (isConstant) {
+ std::vector<Constant*> CV;
+ for (unsigned i = 0, e = NumElems; i != e; ++i) {
+ if (ConstantFPSDNode *V =
+ dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+ CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+ } else if (ConstantSDNode *V =
+ dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ } else {
+ assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+ const Type *OpNTy = OpVT.getTypeForMVT();
+ CV.push_back(UndefValue::get(OpNTy));
+ }
+ }
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment);
+ }
+
+ if (SplatValue.getNode()) { // Splat of one value?
+ // Build the shuffle constant vector: <0, 0, 0, 0>
+ SmallVector<int, 8> ZeroVec(NumElems, 0);
+
+ // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
+ if (TLI.isShuffleMaskLegal(ZeroVec, Node->getValueType(0))) {
+ // Get the splatted value into the low element of a vector register.
+ SDValue LowValVec =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue);
+
+ // Return shuffle(LowValVec, undef, <0,0,0,0>)
+ return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT),
+ &ZeroVec[0]);
+ }
+ }
+
+ // If there are only two unique elements, we may be able to turn this into a
+ // vector shuffle.
+ if (Values.size() == 2) {
+ // Get the two values in deterministic order.
+ SDValue Val1 = Node->getOperand(1);
+ SDValue Val2;
+ std::map<SDValue, std::vector<unsigned> >::iterator MI = Values.begin();
+ if (MI->first != Val1)
+ Val2 = MI->first;
+ else
+ Val2 = (++MI)->first;
+
+ // If Val1 is an undef, make sure it ends up as Val2, to ensure that our
+ // vector shuffle has the undef vector on the RHS.
+ if (Val1.getOpcode() == ISD::UNDEF)
+ std::swap(Val1, Val2);
+
+ // Build the shuffle constant vector: e.g. <0, 4, 0, 4>
+ SmallVector<int, 8> ShuffleMask(NumElems, -1);
+
+ // Set elements of the shuffle mask for Val1.
+ std::vector<unsigned> &Val1Elts = Values[Val1];
+ for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i)
+ ShuffleMask[Val1Elts[i]] = 0;
+
+ // Set elements of the shuffle mask for Val2.
+ std::vector<unsigned> &Val2Elts = Values[Val2];
+ for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i)
+ if (Val2.getOpcode() != ISD::UNDEF)
+ ShuffleMask[Val2Elts[i]] = NumElems;
+
+ // If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) &&
+ TLI.isShuffleMaskLegal(ShuffleMask, VT)) {
+ Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1);
+ Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2);
+ return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]);
+ }
+ }
+
+ // Otherwise, we can't handle this case efficiently. Allocate a sufficiently
+ // aligned object on the stack, store each element into it, then load
+ // the result as a vector.
+ // Create the stack frame object.
+ SDValue FIPtr = DAG.CreateStackTemporary(VT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+ // Emit a store of each element to the stack slot.
+ SmallVector<SDValue, 8> Stores;
+ unsigned TypeByteSize = OpVT.getSizeInBits() / 8;
+ // Store (in the right endianness) the elements to memory.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ unsigned Offset = TypeByteSize*i;
+
+ SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
+ Idx, SV, Offset));
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty()) // Not all undef elements?
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+}
+
+// ExpandLibCall - Expand a node into a call to a libcall. If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument. If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ MVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForMVT();
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Node->getValueType(0).getTypeForMVT();
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ CallingConv::C, false, Callee, Args, DAG,
+ Node->getDebugLoc());
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+ return CallInfo.first;
+}
+
+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT()) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::f32: LC = Call_F32; break;
+ case MVT::f64: LC = Call_F64; break;
+ case MVT::f80: LC = Call_F80; break;
+ case MVT::ppcf128: LC = Call_PPCF128; break;
+ }
+ return ExpandLibCall(LC, Node, false);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128) {
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT()) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::i16: LC = Call_I16; break;
+ case MVT::i32: LC = Call_I32; break;
+ case MVT::i64: LC = Call_I64; break;
+ case MVT::i128: LC = Call_I128; break;
+ }
+ return ExpandLibCall(LC, Node, isSigned);
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it. At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+ SDValue Op0,
+ MVT DestVT,
+ DebugLoc dl) {
+ if (Op0.getValueType() == MVT::i32) {
+ // simple 32-bit [signed|unsigned] integer to float/double expansion
+
+ // Get the stack frame index of a 8 byte buffer.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+ // word offset constant for Hi/Lo address computation
+ SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+ // set up Hi and Lo (into buffer) address based on endian
+ SDValue Hi = StackSlot;
+ SDValue Lo = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(), StackSlot, WordOff);
+ if (TLI.isLittleEndian())
+ std::swap(Hi, Lo);
+
+ // if signed map to unsigned space
+ SDValue Op0Mapped;
+ if (isSigned) {
+ // constant used to invert sign bit (signed to unsigned mapping)
+ SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+ Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
+ } else {
+ Op0Mapped = Op0;
+ }
+ // store the lo of the constructed double - based on integer input
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
+ Op0Mapped, Lo, NULL, 0);
+ // initial hi portion of constructed double
+ SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+ // store the hi of the constructed double - biased exponent
+ SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0);
+ // load the constructed double
+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0);
+ // FP constant to bias correct the final result
+ SDValue Bias = DAG.getConstantFP(isSigned ?
+ BitsToDouble(0x4330000080000000ULL) :
+ BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+ // subtract the bias
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ // final result
+ SDValue Result;
+ // handle final rounding
+ if (DestVT == MVT::f64) {
+ // do nothing
+ Result = Sub;
+ } else if (DestVT.bitsLT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0));
+ } else if (DestVT.bitsGT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+ }
+ return Result;
+ }
+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+ SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+ SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETLT);
+ SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(),
+ SignSet, Four, Zero);
+
+ // If the sign bit of the integer is set, the large number will be treated
+ // as a negative number. To counteract this, the dynamic code adds an
+ // offset depending on the data type.
+ uint64_t FF;
+ switch (Op0.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unsupported integer type!");
+ case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
+ case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
+ case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
+ case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)
+ }
+ if (TLI.isLittleEndian()) FF <<= 32;
+ Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);
+
+ SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
+ Alignment = std::min(Alignment, 4u);
+ SDValue FudgeInReg;
+ if (DestVT == MVT::f32)
+ FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment);
+ else {
+ FudgeInReg =
+ LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+ DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ MVT::f32, false, Alignment));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
+ MVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate *INT_TO_FP operation to use.
+ MVT NewInTy = LegalOp.getValueType();
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1);
+ assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+ // If the target supports SINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::SINT_TO_FP;
+ break;
+ }
+ if (isSigned) continue;
+
+ // If the target supports UINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::UINT_TO_FP;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use. Zero extend our input to the
+ // desired type then run the operation on it.
+ return DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
+ MVT DestVT,
+ bool isSigned,
+ DebugLoc dl) {
+ // First step, figure out the appropriate FP_TO*INT operation to use.
+ MVT NewOutTy = DestVT;
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT()+1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_SINT;
+ break;
+ }
+
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_UINT;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+
+ // Okay, we found the operation and type to use.
+ SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+ // Truncate the result of the extended FP_TO_*INT operation to the desired
+ // size.
+ return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
+ MVT VT = Op.getValueType();
+ MVT SHVT = TLI.getShiftAmountTy();
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT()) {
+ default: assert(0 && "Unhandled Expand type in BSWAP!"); abort();
+ case MVT::i16:
+ Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+ Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+ Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+ }
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
+ DebugLoc dl) {
+ switch (Opc) {
+ default: assert(0 && "Cannot expand this yet!");
+ case ISD::CTPOP: {
+ static const uint64_t mask[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+ MVT VT = Op.getValueType();
+ MVT ShVT = TLI.getShiftAmountTy();
+ unsigned len = VT.getSizeInBits();
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
+ unsigned EltSize = VT.isVector() ?
+ VT.getVectorElementType().getSizeInBits() : len;
+ SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);
+ SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+ Op = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),
+ Tmp2));
+ }
+ return Op;
+ }
+ case ISD::CTLZ: {
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+ MVT VT = Op.getValueType();
+ MVT ShVT = TLI.getShiftAmountTy();
+ unsigned len = VT.getSizeInBits();
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ }
+ case ISD::CTTZ: {
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // see also http://www.hackersdelight.org/HDcode/ntz.cc
+ MVT VT = Op.getValueType();
+ SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNOT(dl, Op, VT),
+ DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getConstant(1, VT)));
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(VT.getSizeInBits(), VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
+ }
+ }
+}
+
+void SelectionDAGLegalize::ExpandNode(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ switch (Node->getOpcode()) {
+ case ISD::CTPOP:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BSWAP:
+ Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+ break;
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ Results.push_back(DAG.getConstant(0, Node->getValueType(0)));
+ break;
+ case ISD::FLT_ROUNDS_:
+ Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
+ break;
+ case ISD::EH_RETURN:
+ case ISD::DECLARE:
+ case ISD::DBG_LABEL:
+ case ISD::EH_LABEL:
+ case ISD::PREFETCH:
+ case ISD::MEMBARRIER:
+ case ISD::VAEND:
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::DBG_STOPPOINT:
+ Results.push_back(ExpandDBG_STOPPOINT(Node));
+ break;
+ case ISD::DYNAMIC_STACKALLOC:
+ ExpandDYNAMIC_STACKALLOC(Node, Results);
+ break;
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0; i < Node->getNumValues(); i++)
+ Results.push_back(Node->getOperand(i));
+ break;
+ case ISD::UNDEF: {
+ MVT VT = Node->getValueType(0);
+ if (VT.isInteger())
+ Results.push_back(DAG.getConstant(0, VT));
+ else if (VT.isFloatingPoint())
+ Results.push_back(DAG.getConstantFP(0, VT));
+ else
+ assert(0 && "Unknown value type!");
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ std::pair<SDValue, SDValue> CallResult =
+ TLI.LowerCallTo(Node->getOperand(0), Type::VoidTy,
+ false, false, false, false, CallingConv::C, false,
+ DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+ Args, DAG, dl);
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FP_ROUND:
+ case ISD::BIT_CONVERT:
+ Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_EXTEND:
+ Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getOperand(0).getValueType(),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ // NOTE: we could fall back on load/store here too for targets without
+ // SAR. However, it is doubtful that any exist.
+ MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ unsigned BitsDiff = Node->getValueType(0).getSizeInBits() -
+ ExtraVT.getSizeInBits();
+ SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy());
+ Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+ Node->getOperand(0), ShiftCst);
+ Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_ROUND_INREG: {
+ // The only way we can lower this is to turn it into a TRUNCSTORE,
+ // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+ // NOTE: there is a choice here between constantly creating new stack
+ // slots and always reusing the same one. We currently always create
+ // new ones, as reuse may inhibit scheduling.
+ MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
+ Node->getOperand(0), Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_TO_UINT: {
+ SDValue True, False;
+ MVT VT = Node->getOperand(0).getValueType();
+ MVT NVT = Node->getValueType(0);
+ const uint64_t zero[] = {0, 0};
+ APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+ APInt x = APInt::getSignBit(NVT.getSizeInBits());
+ (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
+ Tmp1 = DAG.getConstantFP(apf, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+ Node->getOperand(0),
+ Tmp1, ISD::SETLT);
+ True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+ False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
+ DAG.getNode(ISD::FSUB, dl, VT,
+ Node->getOperand(0), Tmp1));
+ False = DAG.getNode(ISD::XOR, dl, NVT, False,
+ DAG.getConstant(x, NVT));
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VAARG: {
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ MVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0);
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(TLI.getTargetData()->
+ getTypeAllocSize(VT.getTypeForMVT()),
+ TLI.getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0);
+ // Load the actual argument out of the pointer VAList
+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::VACOPY: {
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
+ Node->getOperand(2), VS, 0);
+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+ // This must be an access of the only element. Return it.
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ else
+ Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::CONCAT_VECTORS: {
+ // Use extract/insert/build vector for now. We might try to be
+ // more clever later.
+ SmallVector<SDValue, 8> Ops;
+ unsigned NumOperands = Node->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue SubOp = Node->getOperand(i);
+ MVT VVT = SubOp.getNode()->getValueType(0);
+ MVT EltVT = VVT.getVectorElementType();
+ unsigned NumSubElem = VVT.getVectorNumElements();
+ for (unsigned j=0; j < NumSubElem; ++j) {
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
+ DAG.getIntPtrConstant(j)));
+ }
+ }
+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+ &Ops[0], Ops.size());
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR:
+ Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+ break;
+ case ISD::INSERT_VECTOR_ELT:
+ Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getOperand(2), dl));
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 8> Mask;
+ cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+ MVT VT = Node->getValueType(0);
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+ unsigned Idx = Mask[i];
+ if (Idx < NumElems)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Node->getOperand(0),
+ DAG.getIntPtrConstant(Idx)));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Node->getOperand(1),
+ DAG.getIntPtrConstant(Idx - NumElems)));
+ }
+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ MVT OpTy = Node->getOperand(0).getValueType();
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ // 1 -> Hi
+ Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+ DAG.getConstant(OpTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy()));
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+ } else {
+ // 0 -> Lo
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::STACKSAVE:
+ // Expand to CopyFromReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ } else {
+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::STACKRESTORE:
+ // Expand to CopyToReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+ Node->getOperand(1)));
+ } else {
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::FCOPYSIGN:
+ Results.push_back(ExpandFCOPYSIGN(Node));
+ break;
+ case ISD::FNEG:
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+ Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
+ Node->getOperand(0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FABS: {
+ // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+ MVT VT = Node->getValueType(0);
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = DAG.getConstantFP(0.0, VT);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, ISD::SETUGT);
+ Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FSQRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128));
+ break;
+ case ISD::FSIN:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_PPCF128));
+ break;
+ case ISD::FCOS:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_PPCF128));
+ break;
+ case ISD::FLOG:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_PPCF128));
+ break;
+ case ISD::FLOG2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128));
+ break;
+ case ISD::FLOG10:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128));
+ break;
+ case ISD::FEXP:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_PPCF128));
+ break;
+ case ISD::FEXP2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128));
+ break;
+ case ISD::FTRUNC:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128));
+ break;
+ case ISD::FFLOOR:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128));
+ break;
+ case ISD::FCEIL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128));
+ break;
+ case ISD::FRINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_PPCF128));
+ break;
+ case ISD::FNEARBYINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128));
+ break;
+ case ISD::FPOWI:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_PPCF128));
+ break;
+ case ISD::FPOW:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_PPCF128));
+ break;
+ case ISD::FDIV:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_PPCF128));
+ break;
+ case ISD::FREM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_PPCF128));
+ break;
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+ // Check to see if this FP immediate is already legal.
+ bool isLegal = false;
+ for (TargetLowering::legal_fpimm_iterator I = TLI.legal_fpimm_begin(),
+ E = TLI.legal_fpimm_end(); I != E; ++I) {
+ if (CFP->isExactlyValue(*I)) {
+ isLegal = true;
+ break;
+ }
+ }
+ // If this is a legal constant, turn it into a TargetConstantFP node.
+ if (isLegal)
+ Results.push_back(SDValue(Node, 0));
+ else
+ Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+ break;
+ }
+ case ISD::EHSELECTION: {
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::EXCEPTIONADDR: {
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ assert(Reg && "Can't expand to unknown register!");
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ }
+ case ISD::SUB: {
+ MVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+ "Don't know how to expand this subtraction!");
+ Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+ break;
+ }
+ case ISD::UREM:
+ case ISD::SREM: {
+ MVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ bool isSigned = Node->getOpcode() == ISD::SREM;
+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ Tmp2 = Node->getOperand(0);
+ Tmp3 = Node->getOperand(1);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+ } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
+ // X % Y -> X-X/Y*Y
+ Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+ } else if (isSigned) {
+ Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128);
+ } else {
+ Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128);
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::UDIV:
+ case ISD::SDIV: {
+ bool isSigned = Node->getOpcode() == ISD::SDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ MVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ else if (isSigned)
+ Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128);
+ else
+ Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
+ ISD::SMUL_LOHI;
+ MVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
+ "If this wasn't legal, it shouldn't have been created!");
+ Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1.getValue(1));
+ break;
+ }
+ case ISD::MUL: {
+ MVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ // See if multiply or divide can be lowered using two-result operations.
+ // We just need the low half of the multiply; try both the signed
+ // and unsigned forms. If the target supports both SMUL_LOHI and
+ // UMUL_LOHI, form a preference by checking which forms of plain
+ // MULH it supports.
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+ unsigned OpToUse = 0;
+ if (HasSMUL_LOHI && !HasMULHS) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI && !HasMULHU) {
+ OpToUse = ISD::UMUL_LOHI;
+ } else if (HasSMUL_LOHI) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI) {
+ OpToUse = ISD::UMUL_LOHI;
+ }
+ if (OpToUse) {
+ Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1)));
+ break;
+ }
+ Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SADDO:
+ case ISD::SSUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ MVT OType = Node->getValueType(1);
+
+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Results.push_back(Cmp);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
+ Node->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT));
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ MVT PairTy = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+ Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
+ DAG.getConstant(PairTy.getSizeInBits()/2,
+ TLI.getShiftAmountTy()));
+ Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+ break;
+ }
+ case ISD::SELECT:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ if (Tmp1.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+ Tmp2, Tmp3,
+ cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+ } else {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1,
+ DAG.getConstant(0, Tmp1.getValueType()),
+ Tmp2, Tmp3, ISD::SETNE);
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BR_JT: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Table = Node->getOperand(1);
+ SDValue Index = Node->getOperand(2);
+
+ MVT PTy = TLI.getPointerTy();
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize();
+ Index= DAG.getNode(ISD::MUL, dl, PTy,
+ Index, DAG.getConstant(EntrySize, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+
+ MVT MemVT = MVT::getIntegerVT(EntrySize * 8);
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ PseudoSourceValue::getJumpTable(), 0, MemVT);
+ Addr = LD;
+ if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+ TLI.getPICJumpTableRelocBase(Table, DAG));
+ }
+ Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BRCOND:
+ // Expand brcond's setcc into its constituent parts and create a BR_CC
+ // Node.
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ if (Tmp2.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
+ Tmp1, Tmp2.getOperand(2),
+ Tmp2.getOperand(0), Tmp2.getOperand(1),
+ Node->getOperand(2));
+ } else {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+ DAG.getCondCode(ISD::SETNE), Tmp2,
+ DAG.getConstant(0, Tmp2.getValueType()),
+ Node->getOperand(2));
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SETCC: {
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+
+ // If we expanded the SETCC into an AND/OR, return the new node
+ if (Tmp2.getNode() == 0) {
+ Results.push_back(Tmp1);
+ break;
+ }
+
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ MVT VT = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SELECT_CC: {
+ Tmp1 = Node->getOperand(0); // LHS
+ Tmp2 = Node->getOperand(1); // RHS
+ Tmp3 = Node->getOperand(2); // True
+ Tmp4 = Node->getOperand(3); // False
+ SDValue CC = Node->getOperand(4);
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, CC, dl);
+
+ assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
+ Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, Tmp4, CC);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BR_CC: {
+ Tmp1 = Node->getOperand(0); // Chain
+ Tmp2 = Node->getOperand(2); // LHS
+ Tmp3 = Node->getOperand(3); // RHS
+ Tmp4 = Node->getOperand(1); // CC
+
+ LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
+ Tmp2, Tmp3, Tmp4, dl);
+ LastCALLSEQ_END = DAG.getEntryNode();
+
+ assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
+ Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+ Tmp3, Node->getOperand(4));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::GLOBAL_OFFSET_TABLE:
+ case ISD::GlobalAddress:
+ case ISD::GlobalTLSAddress:
+ case ISD::ExternalSymbol:
+ case ISD::ConstantPool:
+ case ISD::JumpTable:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // FIXME: Custom lowering for these operations shouldn't return null!
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ Results.push_back(SDValue(Node, i));
+ break;
+ }
+}
+void SelectionDAGLegalize::PromoteNode(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT OVT = Node->getValueType(0);
+ if (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP) {
+ OVT = Node->getOperand(0).getValueType();
+ }
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Tmp1, Tmp2, Tmp3;
+ switch (Node->getOpcode()) {
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ // Zero extend the argument.
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ // Perform the larger operation.
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1);
+ if (Node->getOpcode() == ISD::CTTZ) {
+ //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
+ ISD::SETEQ);
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
+ DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+ } else if (Node->getOpcode() == ISD::CTLZ) {
+ // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+ Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BSWAP: {
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::FP_TO_SINT, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::SINT_TO_FP, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ assert(OVT.isVector() && "Don't know how to promote scalar logic ops");
+ // Bit convert each of the values to the new type.
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ // Bit convert the result back the original type.
+ Results.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1));
+ break;
+ case ISD::SELECT:
+ unsigned ExtOp, TruncOp;
+ if (Node->getValueType(0).isVector()) {
+ ExtOp = ISD::BIT_CONVERT;
+ TruncOp = ISD::BIT_CONVERT;
+ } else if (Node->getValueType(0).isInteger()) {
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ } else {
+ ExtOp = ISD::FP_EXTEND;
+ TruncOp = ISD::FP_ROUND;
+ }
+ Tmp1 = Node->getOperand(0);
+ // Promote each of the values to the new type.
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ // Perform the larger operation, then round down.
+ Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3);
+ if (TruncOp != ISD::FP_ROUND)
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+ else
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+ DAG.getIntPtrConstant(0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 8> Mask;
+ cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+ // Cast the two input vectors.
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+
+ // Convert the shuffle mask to the right # elements.
+ Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SETCC: {
+ // First step, figure out the appropriate operation to use.
+ // Allow SETCC to not be supported for all legal data types
+ // Mostly this targets FP
+ MVT NewInTy = Node->getOperand(0).getValueType();
+ MVT OldVT = NewInTy; OldVT = OldVT;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1);
+
+ assert(NewInTy.isInteger() == OldVT.isInteger() &&
+ "Fell off of the edge of the integer world");
+ assert(NewInTy.isFloatingPoint() == OldVT.isFloatingPoint() &&
+ "Fell off of the edge of the floating point world");
+
+ // If the target supports SETCC of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SETCC, NewInTy))
+ break;
+ }
+ if (NewInTy.isInteger())
+ assert(0 && "Cannot promote Legal Integer SETCC yet");
+ else {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp1);
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp2);
+ }
+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Node->getOperand(2)));
+ break;
+ }
+ }
+}
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize(bool TypesNeedLegalizing,
+ CodeGenOpt::Level OptLevel) {
+ /// run - This is the main entry point to this class.
+ ///
+ SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
+}
+
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 0000000..c3c1bea
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,1388 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float". For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+using namespace llvm;
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+static RTLIB::Libcall GetFPLibCall(MVT VT,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_PPCF128) {
+ return
+ VT == MVT::f32 ? Call_F32 :
+ VT == MVT::f64 ? Call_F64 :
+ VT == MVT::f80 ? Call_F80 :
+ VT == MVT::ppcf128 ? Call_PPCF128 :
+ RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+// Result Float to Integer Conversion.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "SoftenFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to soften the result of this operator!");
+ abort();
+
+ case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
+ case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
+ case ISD::ConstantFP:
+ R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
+ case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
+ case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
+ case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
+ case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
+ case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
+ case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
+ case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
+ case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
+ case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
+ case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
+ case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
+ case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetSoftenedFloat(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+ // Convert the inputs to integers, and build a new pair out of them.
+ return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(N->getValueType(0)),
+ BitConvertToInteger(N->getOperand(0)),
+ BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
+ return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
+ TLI.getTypeToTransformTo(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ NewOp.getValueType().getVectorElementType(),
+ NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned Size = NVT.getSizeInBits();
+
+ // Mask = ~(1 << (Size-1))
+ SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1),
+ NVT);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT LVT = LHS.getValueType();
+ MVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
+ DAG.getConstant(RSize - 1,
+ TLI.getShiftAmountTy()));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff, TLI.getShiftAmountTy()));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy()));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
+ DAG.getConstant(LSize - 1,
+ TLI.getShiftAmountTy()));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
+ GetSoftenedFloat(N->getOperand(0)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+ return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::i32 &&
+ "Unsupported power type!");
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_PPCF128),
+ NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewL;
+ if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+ NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(),
+ NVT, L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getSrcValue(), L->getSrcValueOffset(), NVT,
+ L->isVolatile(), L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+ }
+
+ // Do a non-extending load followed by FP_EXTEND.
+ NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD,
+ L->getMemoryVT(), L->getChain(),
+ L->getBasePtr(), L->getOffset(),
+ L->getSrcValue(), L->getSrcValueOffset(),
+ L->getMemoryVT(),
+ L->isVolatile(), L->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue NewVAARG;
+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+ MVT SVT = N->getOperand(0).getValueType();
+ MVT RVT = N->getValueType(0);
+ MVT NVT = MVT();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+ // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly
+ // match. Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, N->getOperand(0));
+ return MakeLibCall(LC, TLI.getTypeToTransformTo(RVT), &Op, 1, false, dl);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Float to Integer Conversion..
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "SoftenFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to soften this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
+ case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// SoftenSetCCOperands - Soften the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl) {
+ SDValue LHSInt = GetSoftenedFloat(NewLHS);
+ SDValue RHSInt = GetSoftenedFloat(NewRHS);
+ MVT VT = NewLHS.getValueType();
+
+ assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");
+
+ // Expand into one or more soft-fp libcall(s).
+ RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ switch (CCCode) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+ break;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ break;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+ break;
+ case ISD::SETUO:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+ break;
+ case ISD::SETO:
+ LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
+ break;
+ default:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+ switch (CCCode) {
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ // Fallthrough
+ case ISD::SETUGT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+ break;
+ case ISD::SETUGE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+ break;
+ case ISD::SETULT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ break;
+ case ISD::SETULE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+ break;
+ case ISD::SETUEQ:
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+ break;
+ default: assert(false && "Do not know how to soften this setcc!");
+ }
+ }
+
+ MVT RetVT = MVT::i32; // FIXME: is this the correct return type?
+ SDValue Ops[2] = { LHSInt, RHSInt };
+ NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewRHS = DAG.getConstant(0, RetVT);
+ CCCode = TLI.getCmpLibcallCC(LC1);
+ if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+ SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(CCCode));
+ NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS,
+ NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ NewRHS = SDValue();
+ }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+ GetSoftenedFloat(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+ MVT SVT = N->getOperand(0).getValueType();
+ MVT RVT = N->getValueType(0);
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+ MVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
+ MVT RVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If SoftenSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ DebugLoc dl = N->getDebugLoc();
+
+ if (ST->isTruncatingStore())
+ // Do an FP_ROUND followed by a non-truncating store.
+ Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
+ Val, DAG.getIntPtrConstant(0)));
+ else
+ Val = GetSoftenedFloat(Val);
+
+ return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->isVolatile(), ST->getAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Expand float result: "; N->dump(&DAG); cerr << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "ExpandFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to expand the result of this operator!");
+ abort();
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+ case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+ case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+ case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
+ case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
+ case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+ case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
+ case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+ case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
+ case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+ case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
+ case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+ case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
+ case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+ case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ assert(NVT.getSizeInBits() == integerPartWidth &&
+ "Do not know how to expand this float constant!");
+ APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+ Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+ &C.getRawData()[1])), NVT);
+ Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+ &C.getRawData()[0])), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Tmp;
+ GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+ Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+ // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo,
+ DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+ DAG.getCondCode(ISD::SETEQ));
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_PPCF128),
+ N->getValueType(0), Ops, 2, false,
+ N->getDebugLoc());
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0));
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getMemoryVT(),
+ LD->isVolatile(), LD->getAlignment());
+
+ // Remember the chain.
+ Chain = Hi.getValue(1);
+
+ // The low part is zero.
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+
+ // Modified the chain - switch anything that used the old chain to use the
+ // new one.
+ ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ SDValue Src = N->getOperand(0);
+ MVT SrcVT = Src.getValueType();
+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+ DebugLoc dl = N->getDebugLoc();
+
+ // First do an SINT_TO_FP, whether the original was signed or unsigned.
+ // When promoting partial word types to i32 we must honor the signedness,
+ // though.
+ if (SrcVT.bitsLE(MVT::i32)) {
+ // The integer can be represented exactly in an f64.
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i32, Src);
+ Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+ Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+ } else {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (SrcVT.bitsLE(MVT::i64)) {
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i64, Src);
+ LC = RTLIB::SINTTOFP_I64_PPCF128;
+ } else if (SrcVT.bitsLE(MVT::i128)) {
+ Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+ LC = RTLIB::SINTTOFP_I128_PPCF128;
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ Hi = MakeLibCall(LC, VT, &Src, 1, true, dl);
+ GetPairElements(Hi, Lo, Hi);
+ }
+
+ if (isSigned)
+ return;
+
+ // Unsigned - fix up the SINT_TO_FP value just calculated.
+ Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+ SrcVT = Src.getValueType();
+
+ // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+ static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };
+ static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };
+ static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+ const uint64_t *Parts = 0;
+
+ switch (SrcVT.getSimpleVT()) {
+ default:
+ assert(false && "Unsupported UINT_TO_FP!");
+ case MVT::i32:
+ Parts = TwoE32;
+ break;
+ case MVT::i64:
+ Parts = TwoE64;
+ break;
+ case MVT::i128:
+ Parts = TwoE128;
+ break;
+ }
+
+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
+ DAG.getConstantFP(APFloat(APInt(128, 2, Parts)),
+ MVT::ppcf128));
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
+ Lo, Hi, DAG.getCondCode(ISD::SETLT));
+ GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion. At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Expand float operand: "; N->dump(&DAG); cerr << "\n");
+ SDValue Res = SDValue();
+
+ if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
+ == TargetLowering::Custom)
+ Res = TLI.LowerOperation(SDValue(N, 0), DAG);
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ cerr << "ExpandFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+ #endif
+ assert(0 && "Do not know how to expand this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+ GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+ MVT VT = NewLHS.getValueType();
+ assert(VT == MVT::ppcf128 && "Unsupported setcc type!");
+
+ // FIXME: This generated code sucks. We want to generate
+ // FCMPU crN, hi1, hi2
+ // BNE crN, L:
+ // FCMPU crN, lo1, lo2
+ // The following can be improved, but not that much.
+ SDValue Tmp1, Tmp2, Tmp3;
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETOEQ);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, CCCode);
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETUNE);
+ Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode);
+ Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+ NewRHS = SDValue(); // LHS is the result, not a compare.
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ // Round it the rest of the way (e.g. to f32) if needed.
+ return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+ N->getValueType(0), Hi, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+ MVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
+ N->getOperand(0), DAG.getValueType(MVT::f64));
+ Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
+ MVT RVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
+ APFloat APF = APFloat(APInt(128, 2, TwoE31));
+ SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+ // FIXME: generated code sucks.
+ return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+ DAG.getNode(ISD::FSUB, dl,
+ MVT::ppcf128,
+ N->getOperand(0),
+ Tmp)),
+ DAG.getConstant(0x80000000, MVT::i32)),
+ DAG.getNode(ISD::FP_TO_SINT, dl,
+ MVT::i32, N->getOperand(0)),
+ DAG.getCondCode(ISD::SETGE));
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ MVT NVT = TLI.getTypeToTransformTo(ST->getValue().getValueType());
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ SDValue Lo, Hi;
+ GetExpandedOp(ST->getValue(), Lo, Hi);
+
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getMemoryVT(),
+ ST->isVolatile(), ST->getAlignment());
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 0000000..eb9342c
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,2382 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type. For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type. For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Promote integer result: "; N->dump(&DAG); cerr << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "PromoteIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to promote this operator!");
+ abort();
+ case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
+ case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
+ case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
+ case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
+ case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+ case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
+ case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SIGN_EXTEND_INREG:
+ Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+ case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
+ case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
+ case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
+ case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+ case ISD::SDIV:
+ case ISD::SREM: Res = PromoteIntRes_SDIV(N); break;
+
+ case ISD::UDIV:
+ case ISD::UREM: Res = PromoteIntRes_UDIV(N); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+ case ISD::UADDO:
+ case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+ case ISD::SMULO:
+ case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_CMP_SWAP:
+ Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+ }
+
+ // If the result is null then the sub-method took care of registering it.
+ if (Res.getNode())
+ SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+ // Sign-extend the new bits, and continue the assertion.
+ SDValue Op = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertSext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+ // Zero the new bits, and continue the assertion.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertZext, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(),
+ Op2, N->getSrcValue(), N->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+ N->getMemoryVT(), N->getChain(), N->getBasePtr(),
+ Op2, Op3, N->getSrcValue(), N->getAlignment());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ MVT InVT = InOp.getValueType();
+ MVT NInVT = TLI.getTypeToTransformTo(InVT);
+ MVT OutVT = N->getValueType(0);
+ MVT NOutVT = TLI.getTypeToTransformTo(OutVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ break;
+ case Legal:
+ break;
+ case PromoteInteger:
+ if (NOutVT.bitsEq(NInVT))
+ // The input promotes to the same size. Convert the promoted value.
+ return DAG.getNode(ISD::BIT_CONVERT, dl,
+ NOutVT, GetPromotedInteger(InOp));
+ break;
+ case SoftenFloat:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+ case ExpandInteger:
+ case ExpandFloat:
+ break;
+ case ScalarizeVector:
+ // Convert the element to an integer and promote it by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ BitConvertToInteger(GetScalarizedVector(InOp)));
+ case SplitVector: {
+ // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split
+ // pieces of the input into integers and reassemble in the final type.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+ MVT::getIntegerVT(NOutVT.getSizeInBits()),
+ JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
+ }
+ case WidenVector:
+ if (OutVT.bitsEq(NInVT))
+ // The input is widened to the same size. Convert to the widened value.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
+ }
+
+ // Otherwise, lower the bit-convert to a store/load from the stack.
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and destination types.
+ SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
+
+ // Result is an extending load from the stack slot.
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, NOutVT, Store, FIPtr, SV, 0, OutVT);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ MVT OVT = N->getValueType(0);
+ MVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getConstant(DiffBits, TLI.getPointerTy()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+ // The pair element type may be legal, or may not promote to the same type as
+ // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(N->getValueType(0)),
+ JoinIntegers(N->getOperand(0), N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+ MVT VT = N->getValueType(0);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = N->getDebugLoc();
+ // Zero extend things like i1, sign extend everything else. It shouldn't
+ // matter in theory which one we pick, but this tends to give better code?
+ unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(VT),
+ SDValue(N, 0));
+ assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
+ "can only promote integers");
+ MVT OutVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ MVT OVT = N->getValueType(0);
+ MVT NVT = Op.getValueType();
+ Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+ // Subtract off the extra leading bits in the bigger type.
+ return DAG.getNode(ISD::SUB, dl, NVT, Op,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ MVT OVT = N->getValueType(0);
+ MVT NVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ APInt TopBit(NVT.getSizeInBits(), 0);
+ TopBit.set(OVT.getSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+ return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ MVT OldVT = N->getValueType(0);
+ SDValue OldVec = N->getOperand(0);
+ if (getTypeAction(OldVec.getValueType()) == WidenVector)
+ OldVec = GetWidenedVector(N->getOperand(0));
+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ if (OldElts == 1) {
+ assert(!isTypeLegal(OldVec.getValueType()) &&
+ "Legal one-element vector of a type needing promotion!");
+ // It is tempting to follow GetScalarizedVector by a call to
+ // GetPromotedInteger, but this would be wrong because the
+ // scalarized value may not yet have been processed.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT),
+ GetScalarizedVector(OldVec));
+ }
+
+ // Convert to a vector half as long with an element type of twice the width,
+ // for example <4 x i16> -> <2 x i32>.
+ assert(!(OldElts & 1) && "Odd length vectors not supported!");
+ MVT NewVT = MVT::getIntegerVT(2 * OldVT.getSizeInBits());
+ assert(OldVT.isSimple() && NewVT.isSimple());
+
+ SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::getVectorVT(NewVT, OldElts / 2),
+ OldVec);
+
+ // Extract the element at OldIdx / 2 from the new vector.
+ SDValue OldIdx = N->getOperand(1);
+ SDValue NewIdx = DAG.getNode(ISD::SRL, dl, OldIdx.getValueType(), OldIdx,
+ DAG.getConstant(1, TLI.getPointerTy()));
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, NewIdx);
+
+ // Select the appropriate half of the element: Lo if OldIdx was even,
+ // Hi if it was odd.
+ SDValue Lo = Elt;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, NewVT, Elt,
+ DAG.getConstant(OldVT.getSizeInBits(),
+ TLI.getPointerTy()));
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Extend to the promoted type.
+ SDValue Odd = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, OldIdx);
+ SDValue Res = DAG.getNode(ISD::SELECT, dl, NewVT, Odd, Hi, Lo);
+ return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned NewOpc = N->getOpcode();
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we're promoting a UINT to a larger size, check to see if the new node
+ // will be legal. If it isn't, check to see if FP_TO_SINT is legal, since
+ // we can use that instead. This allows us to generate better code for
+ // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not
+ // legal, such as PowerPC.
+ if (N->getOpcode() == ISD::FP_TO_UINT &&
+ !TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
+
+ SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+ ISD::AssertZext : ISD::AssertSext, dl,
+ NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) {
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+ // If the result and operand types are the same after promotion, simplify
+ // to an in-register extension.
+ if (NVT == Res.getValueType()) {
+ // The high bits are not guaranteed to be anything. Insert an extend.
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(N->getOperand(0).getValueType()));
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
+ assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+ return Res;
+ }
+ }
+
+ // Otherwise, just extend the original operand all the way to the larger type.
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ ISD::LoadExtType ExtType =
+ ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getSrcValue(), N->getSrcValueOffset(),
+ N->getMemoryVT(), N->isVolatile(),
+ N->getAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+ // Simply change the return type of the boolean result.
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(1));
+ MVT ValueVTs[] = { N->getValueType(0), NVT };
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ DAG.getVTList(ValueVTs, 2), Ops, 2);
+
+ // Modified the sum result - switch anything that used the old sum to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 0), Res);
+
+ return SDValue(Res.getNode(), 1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // sign extension of its truncation to the original type.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ MVT OVT = N->getOperand(0).getValueType();
+ MVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: sign extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(OVT));
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(2));
+ SDValue RHS = GetPromotedInteger(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+ MVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
+ assert(isTypeLegal(SVT) && "Illegal SetCC type!");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Get the SETCC result using the canonical SETCC type.
+ SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+
+ // Convert to the expected type.
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(),
+ TLI.getTypeToTransformTo(N->getValueType(0)),
+ GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+ // The input may have strange things in the top bits of the registers, but
+ // these operations don't care. They may have weird bits going out, but
+ // that too is okay if they are integer operations.
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
+ SDValue RHS = GetPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+ // The input value must be properly sign extended.
+ SDValue Res = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(),
+ Res.getValueType(), Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+ // The input value must be properly zero extended.
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ SDValue Res = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Res;
+
+ switch (getTypeAction(N->getOperand(0).getValueType())) {
+ default: assert(0 && "Unknown type action!");
+ case Legal:
+ case ExpandInteger:
+ Res = N->getOperand(0);
+ break;
+ case PromoteInteger:
+ Res = GetPromotedInteger(N->getOperand(0));
+ break;
+ }
+
+ // Truncate to NVT instead of VT
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // zero extension of its truncation to the original type.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ MVT OVT = N->getOperand(0).getValueType();
+ MVT NVT = LHS.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: zero extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT RegVT = TLI.getRegisterType(VT);
+ unsigned NumRegs = TLI.getNumRegisters(VT);
+ // The argument is passed as NumRegs registers of type RegVT.
+
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2));
+ Chain = Parts[i].getValue(1);
+ }
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::reverse(Parts.begin(), Parts.end());
+
+ // Assemble the parts in the promoted type.
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+ for (unsigned i = 1; i < NumRegs; ++i) {
+ SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+ // Shift it to the right position and "or" it in.
+ Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+ DAG.getConstant(i * RegVT.getSizeInBits(),
+ TLI.getPointerTy()));
+ Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+ }
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ assert(ResNo == 1 && "Only boolean result promotion currently supported!");
+ return PromoteIntRes_Overflow(N);
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Promote integer operand: "; N->dump(&DAG); cerr << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ cerr << "PromoteIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+ #endif
+ assert(0 && "Do not know how to promote this operator's operand!");
+ abort();
+
+ case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
+ case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break;
+ case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
+ case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
+ case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
+ case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
+ case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
+ case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
+ case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+ ISD::CondCode CCCode) {
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions, but zero extend is cheaper on
+ // many machines (an AND instead of two shifts), so prefer it.
+ switch (CCCode) {
+ default: assert(0 && "Unknown integer comparison!");
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGE:
+ case ISD::SETUGT:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ // ALL of these operations will work if we either sign or zero extend
+ // the operands (including the unsigned comparisons!). Zero extend is
+ // usually a simpler/cheaper operation, so prefer it.
+ NewLHS = ZExtPromotedInteger(NewLHS);
+ NewRHS = ZExtPromotedInteger(NewRHS);
+ break;
+ case ISD::SETGE:
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ NewLHS = SExtPromotedInteger(NewLHS);
+ NewRHS = SExtPromotedInteger(NewRHS);
+ break;
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) {
+ // This should only occur in unusual situations like bitcasting to an
+ // x86_fp80, so just turn it into a store+load
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+ // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+ // legal types.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ MVT SVT = TLI.getSetCCResultType(MVT::Other);
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+
+ // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond,
+ N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+ // Since the result type is legal, the operands must promote to it.
+ MVT OVT = N->getOperand(0).getValueType();
+ SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+ SDValue Hi = GetPromotedInteger(N->getOperand(1));
+ assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+ DebugLoc dl = N->getDebugLoc();
+
+ Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+ DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type is not. This implies
+ // that the vector is a power-of-two in length and that the element
+ // type does not have a strange size (eg: it is not i1).
+ MVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+
+ // Promote the inserted value. The type does not need to match the
+ // vector element type. Check that any extra bits introduced will be
+ // truncated away.
+ assert(N->getOperand(0).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+
+ SmallVector<SDValue, 16> NewOps;
+ for (unsigned i = 0; i < NumElts; ++i)
+ NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+ return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
+ "can only promote integer arguments");
+ SDValue InOp = GetPromotedInteger(N->getOperand(0));
+ return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp,
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+ unsigned OpNo) {
+ if (OpNo == 1) {
+ // Promote the inserted value. This is valid because the type does not
+ // have to match the vector element type.
+
+ // Check that any extra bits introduced will be truncated away.
+ assert(N->getOperand(1).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ GetPromotedInteger(N->getOperand(1)),
+ N->getOperand(2));
+ }
+
+ assert(OpNo == 2 && "Different operand and result vector types?");
+
+ // Promote the index.
+ SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ N->getOperand(1), Idx);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
+ SDValue NewOps[6];
+ DebugLoc dl = N->getDebugLoc();
+ NewOps[0] = N->getOperand(0);
+ for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
+ SDValue Flag = GetPromotedInteger(N->getOperand(i));
+ NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
+ }
+ return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps,
+ array_lengthof(NewOps));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
+ // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
+ // the operand in place.
+ return DAG.UpdateNodeOperands(SDValue(N, 0),
+ GetPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ MVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
+
+ return DAG.UpdateNodeOperands(SDValue(N, 0), Cond,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+ // The CC (#4) and the possible return values (#2 and #3) have legal types.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+ // The CC (#2) is always legal.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ DebugLoc dl = N->getDebugLoc();
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+ Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+ return DAG.UpdateNodeOperands(SDValue(N, 0),
+ SExtPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+ int SVOffset = N->getSrcValueOffset();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
+
+ // Truncate the value and store the result.
+ return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
+ SVOffset, N->getMemoryVT(),
+ isVolatile, Alignment);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+ return DAG.UpdateNodeOperands(SDValue(N, 0),
+ ZExtPromotedInteger(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Expand integer result: "; N->dump(&DAG); cerr << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "ExpandIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to expand the result of this operator!");
+ abort();
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+
+ case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+ case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
+ case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
+ case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
+ case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+ case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+ case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;
+ case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+ case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
+ case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
+ case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+ case ISD::ADD:
+ case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+ case ISD::ADDC:
+ case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+ case ISD::ADDE:
+ case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'. Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ MVT NVT = InL.getValueType();
+ unsigned VTBits = N->getValueType(0).getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ MVT ShTy = N->getOperand(1).getValueType();
+
+ if (N->getOpcode() == ISD::SHL) {
+ if (Amt > VTBits) {
+ Lo = Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getNode(ISD::SHL, dl,
+ NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = InL;
+ } else if (Amt == 1 &&
+ TLI.isOperationLegalOrCustom(ISD::ADDC,
+ TLI.getTypeToExpandTo(NVT))) {
+ // Emit this X << 1 as X+X.
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ SDValue LoOps[2] = { InL, InL };
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::SRL) {
+ if (Amt > VTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRL, dl,
+ NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ Lo = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+ return;
+ }
+
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ if (Amt > VTBits) {
+ Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt > NVTBits) {
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(Amt-NVTBits, ShTy));
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else {
+ Lo = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getConstant(Amt, ShTy)),
+ DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getConstant(NVTBits-Amt, ShTy)));
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount. If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ MVT ShTy = Amt.getValueType();
+ unsigned ShBits = ShTy.getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+
+ // If we don't know anything about the high bits, exit.
+ if (((KnownZero|KnownOne) & HighBitMask) == 0)
+ return false;
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // If we know that any of the high bits of the shift amount are one, then we
+ // can do this as a couple of simple shifts.
+ if (KnownOne.intersects(HighBitMask)) {
+ // Mask out the high bit, which we know is set.
+ Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+ DAG.getConstant(~HighBitMask, ShTy));
+
+ switch (N->getOpcode()) {
+ default: assert(0 && "Unknown shift");
+ case ISD::SHL:
+ Lo = DAG.getConstant(0, NVT); // Low part is zero.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+ return true;
+ case ISD::SRL:
+ Hi = DAG.getConstant(0, NVT); // Hi part is zero.
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ case ISD::SRA:
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ }
+ }
+
+#if 0
+ // FIXME: This code is broken for shifts with a zero amount!
+ // If we know that all of the high bits of the shift amount are zero, then we
+ // can do this as a couple of simple shifts.
+ if ((KnownZero & HighBitMask) == HighBitMask) {
+ // Compute 32-amt.
+ SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
+ DAG.getConstant(NVTBits, ShTy),
+ Amt);
+ unsigned Op1, Op2;
+ switch (N->getOpcode()) {
+ default: assert(0 && "Unknown shift");
+ case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+ case ISD::SRL:
+ case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, NVT,
+ DAG.getNode(Op1, NVT, InH, Amt),
+ DAG.getNode(Op2, NVT, InL, Amt2));
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ MVT ShTy = Amt.getValueType();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
+ SDValue Amt2 = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+ SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
+ Amt, NVBitsNode, ISD::SETULT);
+
+ SDValue Lo1, Hi1, Lo2, Hi2;
+ switch (N->getOpcode()) {
+ default: assert(0 && "Unknown shift");
+ case ISD::SHL:
+ // ShAmt < NVTBits
+ Lo1 = DAG.getConstant(0, NVT); // Low part is zero.
+ Hi1 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+
+ // ShAmt >= NVTBits
+ Lo2 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+ Hi2 = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt2));
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+ return true;
+ case ISD::SRL:
+ // ShAmt < NVTBits
+ Hi1 = DAG.getConstant(0, NVT); // Hi part is zero.
+ Lo1 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+
+ // ShAmt >= NVTBits
+ Hi2 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+ Lo2 = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2));
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+ return true;
+ case ISD::SRA:
+ // ShAmt < NVTBits
+ Hi1 = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ Lo1 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+
+ // ShAmt >= NVTBits
+ Hi2 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+ Lo2 = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2));
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+ return true;
+ }
+
+ return false;
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ MVT NVT = LHSL.getValueType();
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+ // them. TODO: Teach operation legalization how to expand unsupported
+ // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
+ // a carry of type MVT::Flag, but there doesn't seem to be any way to
+ // generate a value of this type in the expanded code sequence.
+ bool hasCarry =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::ADDC : ISD::SUBC,
+ TLI.getTypeToExpandTo(NVT));
+
+ if (hasCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+ } else {
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+ SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+ SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+ DAG.getConstant(1, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+ } else {
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+ SDValue Cmp =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+ SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+ }
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ if (N->getOpcode() == ISD::ADDC) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is any extension of the input (which degenerates to a copy).
+ Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+ Hi = DAG.getUNDEF(NVT); // The high part is undefined.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT NVT = Lo.getValueType();
+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+ DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part replicates the sign bit of Lo, make it explicit.
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(NVTBits-1, TLI.getPointerTy()));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT NVT = Lo.getValueType();
+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+ DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part must be zero, make it explicit.
+ Hi = DAG.getConstant(0, NVT);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+ const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
+ Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
+ Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT NVT = Lo.getValueType();
+
+ SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT NVT = Lo.getValueType();
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+ DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT NVT = Lo.getValueType();
+
+ SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+ DAG.getConstant(NVT.getSizeInBits(), NVT)));
+ Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+ SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+ SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ int SVOffset = N->getSrcValueOffset();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ MVT EVT = N->getMemoryVT();
+
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+ EVT, isVolatile, Alignment);
+
+ // Remember the chain.
+ Ch = Lo.getValue(1);
+
+ if (ExtType == ISD::SEXTLOAD) {
+ // The high part is obtained by SRA'ing all but one of the bits of the
+ // lo part.
+ unsigned LoSize = Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ // The high part is just a zero.
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+ // The high part is undefined.
+ Hi = DAG.getUNDEF(NVT);
+ }
+ } else if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ MVT NEVT = MVT::getIntegerVT(ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize, NEVT,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned loads at
+ // the cost of some bit-fiddling.
+ MVT EVT = N->getMemoryVT();
+ unsigned EBytes = EVT.getStoreSizeInBits()/8;
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+ // Load both the high bits and maybe some of the low bits.
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+ MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits),
+ isVolatile, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Load the rest of the low bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize,
+ MVT::getIntegerVT(ExcessBits),
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer low bits from the bottom of Hi to the top of Lo.
+ Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ // Move high bits to the right position in Hi.
+ Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
+ NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ }
+ }
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT);
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT);
+ if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ unsigned OuterBitSize = VT.getSizeInBits();
+ unsigned InnerBitSize = NVT.getSizeInBits();
+ unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
+
+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+ if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
+ DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
+ // The inputs are both zero-extended.
+ if (HasUMUL_LOHI) {
+ // We can emit a umul_lohi.
+ Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHU) {
+ // We can emit a mulhu+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
+ // The input values are both sign-extended.
+ if (HasSMUL_LOHI) {
+ // We can emit a smul_lohi.
+ Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return;
+ }
+ if (HasMULHS) {
+ // We can emit a mulhs+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL);
+ return;
+ }
+ }
+ if (HasUMUL_LOHI) {
+ // Lo,Hi = umul LHS, RHS.
+ SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
+ DAG.getVTList(NVT, NVT), LL, RL);
+ Lo = UMulLOHI;
+ Hi = UMulLOHI.getValue(1);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ if (HasMULHU) {
+ Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+ RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+ return;
+ }
+ }
+
+ // If nothing else, we can make a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // If we can emit an efficient shift operation, do so now. Check to see if
+ // the RHS is a constant.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);
+
+ // If we can determine that the high bit of the shift is zero or one, even if
+ // the low bits are variable, emit this shift in an optimized form.
+ if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+ return;
+
+ // If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
+ unsigned PartsOpc;
+ if (N->getOpcode() == ISD::SHL) {
+ PartsOpc = ISD::SHL_PARTS;
+ } else if (N->getOpcode() == ISD::SRL) {
+ PartsOpc = ISD::SRL_PARTS;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ PartsOpc = ISD::SRA_PARTS;
+ }
+
+ // Next check to see if the target supports this SHL_PARTS operation or if it
+ // will custom expand it.
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+ if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+
+ SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };
+ MVT VT = LHSL.getValueType();
+ Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
+ Hi = Lo.getValue(1);
+ return;
+ }
+
+ // Otherwise, emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ bool isSigned;
+ if (N->getOpcode() == ISD::SHL) {
+ isSigned = false; /*sign irrelevant*/
+ if (VT == MVT::i16)
+ LC = RTLIB::SHL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SHL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SHL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SHL_I128;
+ } else if (N->getOpcode() == ISD::SRL) {
+ isSigned = false;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRL_I128;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ isSigned = true;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRA_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRA_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRA_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRA_I128;
+ }
+
+ if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+ return;
+ }
+
+ if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+ assert(0 && "Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is sign extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+ // The high part is obtained by SRA'ing all but one of the bits of low part.
+ unsigned LoSize = NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(MVT::getIntegerVT(ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+ if (EVT.bitsLE(Lo.getValueType())) {
+ // sext_inreg the low part if needed.
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+ N->getOperand(1));
+
+ // The high part gets the sign extension from the lo-part. This handles
+ // things like sextinreg V:i64 from i8.
+ Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+ DAG.getConstant(Hi.getValueType().getSizeInBits()-1,
+ TLI.getPointerTy()));
+ } else {
+ // For example, extension of an i48 to an i64. Leave the low part alone,
+ // sext_inreg the high part.
+ unsigned ExcessBits =
+ EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(MVT::getIntegerVT(ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+ Hi = DAG.getNode(ISD::SRL, dl,
+ N->getOperand(0).getValueType(), N->getOperand(0),
+ DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::UDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::UREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is zero extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+ Hi = DAG.getConstant(0, NVT); // The high part is just a zero.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getZeroExtendInReg(Hi, dl, MVT::getIntegerVT(ExcessBits));
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Expand integer operand: "; N->dump(&DAG); cerr << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ cerr << "ExpandIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+ #endif
+ assert(0 && "Do not know how to expand this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+ case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
+ case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ DebugLoc dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+ GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+ MVT VT = NewLHS.getValueType();
+
+ if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+ if (RHSLo == RHSHi) {
+ if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+ if (RHSCST->isAllOnesValue()) {
+ // Equality comparison to -1.
+ NewLHS = DAG.getNode(ISD::AND, dl,
+ LHSLo.getValueType(), LHSLo, LHSHi);
+ NewRHS = RHSLo;
+ return;
+ }
+ }
+ }
+
+ NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+ NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ return;
+ }
+
+ // If this is a comparison of the sign bit, just look at the top part.
+ // X > -1, x < 0
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+ if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ NewLHS = LHSHi;
+ NewRHS = RHSHi;
+ return;
+ }
+
+ // FIXME: This generated code sucks.
+ ISD::CondCode LowCC;
+ switch (CCCode) {
+ default: assert(0 && "Unknown integer setcc!");
+ case ISD::SETLT:
+ case ISD::SETULT: LowCC = ISD::SETULT; break;
+ case ISD::SETGT:
+ case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: LowCC = ISD::SETULE; break;
+ case ISD::SETGE:
+ case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+ }
+
+ // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison
+ // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+ // NOTE: on targets without efficient SELECT of bools, we can always use
+ // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL);
+ SDValue Tmp1, Tmp2;
+ Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+ if (!Tmp1.getNode())
+ Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC);
+ Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+ if (!Tmp2.getNode())
+ Tmp2 = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+ ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
+ ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
+ if ((Tmp1C && Tmp1C->isNullValue()) ||
+ (Tmp2C && Tmp2C->isNullValue() &&
+ (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+ CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+ (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
+ (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+ CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+ // low part is known false, returns high part.
+ // For LE / GE, if high part is known false, ignore the low part.
+ // For LT / GT, if high part is known true, ignore the low part.
+ NewLHS = Tmp2;
+ NewRHS = SDValue();
+ return;
+ }
+
+ NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ, false,
+ DagCombineInfo, dl);
+ if (!NewLHS.getNode())
+ NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ);
+ NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(),
+ NewLHS, Tmp1, Tmp2);
+ NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (NewRHS.getNode() == 0) {
+ NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (NewRHS.getNode() == 0) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ DAG.getCondCode(CCCode));
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+ // The value being shifted is legal, but the shift amount is too big.
+ // It follows that either the result of the shift is undefined, or the
+ // upper half of the shift amount is zero. Just use the lower half.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(1), Lo, Hi);
+ return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ MVT DstVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this SINT_TO_FP!");
+ return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+
+ MVT VT = N->getOperand(1).getValueType();
+ MVT NVT = TLI.getTypeToTransformTo(VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ int SVOffset = N->getSrcValueOffset();
+ unsigned Alignment = N->getAlignment();
+ bool isVolatile = N->isVolatile();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ N->getMemoryVT(), isVolatile, Alignment);
+ } else if (TLI.isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ MVT NEVT = MVT::getIntegerVT(ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize, NEVT,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ MVT EVT = N->getMemoryVT();
+ unsigned EBytes = EVT.getStoreSizeInBits()/8;
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ MVT HiVT = MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ }
+
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
+ SVOffset, HiVT, isVolatile, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
+ SVOffset+IncrementSize,
+ MVT::getIntegerVT(ExcessBits),
+ isVolatile, MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ // Just truncate the low part of the source.
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ MVT SrcVT = Op.getValueType();
+ MVT DstVT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+ // Do a signed conversion then adjust the result.
+ SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
+ SignedConv = TLI.LowerOperation(SignedConv, DAG);
+
+ // The result of the signed conversion needs adjusting if the 'sign bit' of
+ // the incoming integer was set. To handle this, we dynamically test to see
+ // if it is set, and, if so, add a fudge factor.
+
+ const uint64_t F32TwoE32 = 0x4F800000ULL;
+ const uint64_t F32TwoE64 = 0x5F800000ULL;
+ const uint64_t F32TwoE128 = 0x7F800000ULL;
+
+ APInt FF(32, 0);
+ if (SrcVT == MVT::i32)
+ FF = APInt(32, F32TwoE32);
+ else if (SrcVT == MVT::i64)
+ FF = APInt(32, F32TwoE64);
+ else if (SrcVT == MVT::i128)
+ FF = APInt(32, F32TwoE128);
+ else
+ assert(false && "Unsupported UINT_TO_FP!");
+
+ // Check whether the sign bit is set.
+ SDValue Lo, Hi;
+ GetExpandedInteger(Op, Lo, Hi);
+ SDValue SignSet = DAG.getSetCC(dl,
+ TLI.getSetCCResultType(Hi.getValueType()),
+ Hi, DAG.getConstant(0, Hi.getValueType()),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(ConstantInt::get(FF.zext(64)),
+ TLI.getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ if (TLI.isBigEndian()) std::swap(Zero, Four);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+ FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
+ Alignment = std::min(Alignment, 4u);
+
+ // Load the value out, extending it from f32 to the destination float type.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ FudgePtr, NULL, 0, MVT::f32,
+ false, Alignment);
+ return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
+ }
+
+ // Otherwise, use a libcall.
+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this UINT_TO_FP!");
+ return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 0000000..00d71e1
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1074 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method. It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types. This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// PerformExpensiveChecks - Do extensive, expensive, sanity checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+ // If a node is not processed, then none of its values should be mapped by any
+ // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+ // If a node is processed, then each value with an illegal type must be mapped
+ // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+ // Values with a legal type may be mapped by ReplacedValues, but not by any of
+ // the other maps.
+
+ // Note that these invariants may not hold momentarily when processing a node:
+ // the node being processed may be put in a map before being marked Processed.
+
+ // Note that it is possible to have nodes marked NewNode in the DAG. This can
+ // occur in two ways. Firstly, a node may be created during legalization but
+ // never passed to the legalization core. This is usually due to the implicit
+ // folding that occurs when using the DAG.getNode operators. Secondly, a new
+ // node may be passed to the legalization core, but when analyzed may morph
+ // into a different node, leaving the original node as a NewNode in the DAG.
+ // A node may morph if one of its operands changes during analysis. Whether
+ // it actually morphs or not depends on whether, after updating its operands,
+ // it is equivalent to an existing node: if so, it morphs into that existing
+ // node (CSE). An operand can change during analysis if the operand is a new
+ // node that morphs, or it is a processed value that was mapped to some other
+ // value (as recorded in ReplacedValues) in which case the operand is turned
+ // into that other value. If a node morphs then the node it morphed into will
+ // be used instead of it for legalization, however the original node continues
+ // to live on in the DAG.
+ // The conclusion is that though there may be nodes marked NewNode in the DAG,
+ // all uses of such nodes are also marked NewNode: the result is a fungus of
+ // NewNodes growing on top of the useful nodes, and perhaps using them, but
+ // not used by them.
+
+ // If a value is mapped by ReplacedValues, then it must have no uses, except
+ // by nodes marked NewNode (see above).
+
+ // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+ // Note that ReplacedValues should be applied iteratively.
+
+ // Note that the ReplacedValues map may also map deleted nodes. By iterating
+ // over the DAG we only consider non-deleted nodes.
+ SmallVector<SDNode*, 16> NewNodes;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ // Remember nodes marked NewNode - they are subject to extra checking below.
+ if (I->getNodeId() == NewNode)
+ NewNodes.push_back(I);
+
+ for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
+ SDValue Res(I, i);
+ bool Failed = false;
+
+ unsigned Mapped = 0;
+ if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+ Mapped |= 1;
+ // Check that remapped values are only used by nodes marked NewNode.
+ for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().getResNo() == i)
+ assert(UI->getNodeId() == NewNode &&
+ "Remapped value has non-trivial use!");
+
+ // Check that the final result of applying ReplacedValues is not
+ // marked NewNode.
+ SDValue NewVal = ReplacedValues[Res];
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+ while (I != ReplacedValues.end()) {
+ NewVal = I->second;
+ I = ReplacedValues.find(NewVal);
+ }
+ assert(NewVal.getNode()->getNodeId() != NewNode &&
+ "ReplacedValues maps to a new node!");
+ }
+ if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+ Mapped |= 2;
+ if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+ Mapped |= 4;
+ if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+ Mapped |= 8;
+ if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+ Mapped |= 16;
+ if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+ Mapped |= 32;
+ if (SplitVectors.find(Res) != SplitVectors.end())
+ Mapped |= 64;
+ if (WidenedVectors.find(Res) != WidenedVectors.end())
+ Mapped |= 128;
+
+ if (I->getNodeId() != Processed) {
+ if (Mapped != 0) {
+ cerr << "Unprocessed value in a map!";
+ Failed = true;
+ }
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+ if (Mapped > 1) {
+ cerr << "Value with legal type was transformed!";
+ Failed = true;
+ }
+ } else {
+ if (Mapped == 0) {
+ cerr << "Processed value not in any map!";
+ Failed = true;
+ } else if (Mapped & (Mapped - 1)) {
+ cerr << "Value in multiple maps!";
+ Failed = true;
+ }
+ }
+
+ if (Failed) {
+ if (Mapped & 1)
+ cerr << " ReplacedValues";
+ if (Mapped & 2)
+ cerr << " PromotedIntegers";
+ if (Mapped & 4)
+ cerr << " SoftenedFloats";
+ if (Mapped & 8)
+ cerr << " ScalarizedVectors";
+ if (Mapped & 16)
+ cerr << " ExpandedIntegers";
+ if (Mapped & 32)
+ cerr << " ExpandedFloats";
+ if (Mapped & 64)
+ cerr << " SplitVectors";
+ if (Mapped & 128)
+ cerr << " WidenedVectors";
+ cerr << "\n";
+ abort();
+ }
+ }
+ }
+
+ // Checked that NewNodes are only used by other NewNodes.
+ for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+ SDNode *N = NewNodes[i];
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+ }
+}
+
+/// run - This is the main entry point for the type legalizer. This does a
+/// top-down traversal of the dag, legalizing types as it goes. Returns "true"
+/// if it made any changes.
+bool DAGTypeLegalizer::run() {
+ bool Changed = false;
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+ Dummy.setNodeId(Unanalyzed);
+
+ // The root of the dag may dangle to deleted nodes until the type legalizer is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+ // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+ // non-leaves.
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ if (I->getNumOperands() == 0) {
+ I->setNodeId(ReadyToProcess);
+ Worklist.push_back(I);
+ } else {
+ I->setNodeId(Unanalyzed);
+ }
+ }
+
+ // Now that we have a set of nodes to process, handle them all.
+ while (!Worklist.empty()) {
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+ assert(N->getNodeId() == ReadyToProcess &&
+ "Node should be ready if on worklist!");
+
+ if (IgnoreNodeResults(N))
+ goto ScanOperands;
+
+ // Scan the values produced by the node, checking to see if any result
+ // types are illegal.
+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+ MVT ResultVT = N->getValueType(i);
+ switch (getTypeAction(ResultVT)) {
+ default:
+ assert(false && "Unknown action!");
+ case Legal:
+ break;
+ // The following calls must take care of *all* of the node's results,
+ // not just the illegal result they were passed (this includes results
+ // with a legal type). Results can be remapped using ReplaceValueWith,
+ // or their promoted/expanded/etc values registered in PromotedIntegers,
+ // ExpandedIntegers etc.
+ case PromoteInteger:
+ PromoteIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case ExpandInteger:
+ ExpandIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case SoftenFloat:
+ SoftenFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case ExpandFloat:
+ ExpandFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case ScalarizeVector:
+ ScalarizeVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case SplitVector:
+ SplitVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case WidenVector:
+ WidenVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ }
+ }
+
+ScanOperands:
+ // Scan the operand list for the node, handling any nodes with operands that
+ // are illegal.
+ {
+ unsigned NumOperands = N->getNumOperands();
+ bool NeedsReanalyzing = false;
+ unsigned i;
+ for (i = 0; i != NumOperands; ++i) {
+ if (IgnoreNodeResults(N->getOperand(i).getNode()))
+ continue;
+
+ MVT OpVT = N->getOperand(i).getValueType();
+ switch (getTypeAction(OpVT)) {
+ default:
+ assert(false && "Unknown action!");
+ case Legal:
+ continue;
+ // The following calls must either replace all of the node's results
+ // using ReplaceValueWith, and return "false"; or update the node's
+ // operands in place, and return "true".
+ case PromoteInteger:
+ NeedsReanalyzing = PromoteIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case ExpandInteger:
+ NeedsReanalyzing = ExpandIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case SoftenFloat:
+ NeedsReanalyzing = SoftenFloatOperand(N, i);
+ Changed = true;
+ break;
+ case ExpandFloat:
+ NeedsReanalyzing = ExpandFloatOperand(N, i);
+ Changed = true;
+ break;
+ case ScalarizeVector:
+ NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+ Changed = true;
+ break;
+ case SplitVector:
+ NeedsReanalyzing = SplitVectorOperand(N, i);
+ Changed = true;
+ break;
+ case WidenVector:
+ NeedsReanalyzing = WidenVectorOperand(N, i);
+ Changed = true;
+ break;
+ }
+ break;
+ }
+
+ // The sub-method updated N in place. Check to see if any operands are new,
+ // and if so, mark them. If the node needs revisiting, don't add all users
+ // to the worklist etc.
+ if (NeedsReanalyzing) {
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(NewNode);
+ // Recompute the NodeId and correct processed operands, adding the node to
+ // the worklist if ready.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M == N)
+ // The node didn't morph - nothing special to do, it will be revisited.
+ continue;
+
+ // The node morphed - this is equivalent to legalizing by replacing every
+ // value of N with the corresponding value of M. So do that now. However
+ // there is no need to remember the replacement - morphing will make sure
+ // it is never used non-trivially.
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ // Replacing the value takes care of remapping the new value. Do the
+ // replacement without recording it in ReplacedValues. This does not
+ // expunge From but that is fine - it is not really a new node.
+ ReplaceValueWithHelper(SDValue(N, i), SDValue(M, i));
+ assert(N->getNodeId() == NewNode && "Unexpected node state!");
+ // The node continues to live on as part of the NewNode fungus that
+ // grows on top of the useful nodes. Nothing more needs to be done
+ // with it - move on to the next node.
+ continue;
+ }
+
+ if (i == NumOperands) {
+ DEBUG(cerr << "Legally typed node: "; N->dump(&DAG); cerr << "\n");
+ }
+ }
+NodeDone:
+
+ // If we reach here, the node was processed, potentially creating new nodes.
+ // Mark it as processed and add its users to the worklist as appropriate.
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(Processed);
+
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ int NodeId = User->getNodeId();
+
+ // This node has two options: it can either be a new node or its Node ID
+ // may be a count of the number of operands it has that are not ready.
+ if (NodeId > 0) {
+ User->setNodeId(NodeId-1);
+
+ // If this was the last use it was waiting on, add it to the ready list.
+ if (NodeId-1 == ReadyToProcess)
+ Worklist.push_back(User);
+ continue;
+ }
+
+ // If this is an unreachable new node, then ignore it. If it ever becomes
+ // reachable by being used by a newly created node then it will be handled
+ // by AnalyzeNewNode.
+ if (NodeId == NewNode)
+ continue;
+
+ // Otherwise, this node is new: this is the first operand of it that
+ // became ready. Its new NodeId is the number of operands it has minus 1
+ // (as this node is now processed).
+ assert(NodeId == Unanalyzed && "Unknown node ID!");
+ User->setNodeId(User->getNumOperands() - 1);
+
+ // If the node only has a single operand, it is now ready.
+ if (User->getNumOperands() == 1)
+ Worklist.push_back(User);
+ }
+ }
+
+#ifndef XDEBUG
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ // If the root changed (e.g. it was a dead load) update the root.
+ DAG.setRoot(Dummy.getValue());
+
+ // Remove dead nodes. This is important to do for cleanliness but also before
+ // the checking loop below. Implicit folding by the DAG.getNode operators and
+ // node morphing can cause unreachable nodes to be around with their flags set
+ // to new.
+ DAG.RemoveDeadNodes();
+
+ // In a debug build, scan all the nodes to make sure we found them all. This
+ // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ bool Failed = false;
+
+ // Check that all result types are legal.
+ if (!IgnoreNodeResults(I))
+ for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(I->getValueType(i))) {
+ cerr << "Result type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ // Check that all operand types are legal.
+ for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
+ !isTypeLegal(I->getOperand(i).getValueType())) {
+ cerr << "Operand type " << i << " illegal!\n";
+ Failed = true;
+ }
+
+ if (I->getNodeId() != Processed) {
+ if (I->getNodeId() == NewNode)
+ cerr << "New node not analyzed?\n";
+ else if (I->getNodeId() == Unanalyzed)
+ cerr << "Unanalyzed node not noticed?\n";
+ else if (I->getNodeId() > 0)
+ cerr << "Operand not processed?\n";
+ else if (I->getNodeId() == ReadyToProcess)
+ cerr << "Not added to worklist?\n";
+ Failed = true;
+ }
+
+ if (Failed) {
+ I->dump(&DAG); cerr << "\n";
+ abort();
+ }
+ }
+#endif
+
+ return Changed;
+}
+
+/// AnalyzeNewNode - The specified node is the root of a subtree of potentially
+/// new nodes. Correct any processed operands (this may change the node) and
+/// calculate the NodeId. If the node itself changes to a processed node, it
+/// is not remapped - the caller needs to take care of this.
+/// Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+ // If this was an existing node that is already done, we're done.
+ if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+ return N;
+
+ // Remove any stale map entries.
+ ExpungeNode(N);
+
+ // Okay, we know that this node is new. Recursively walk all of its operands
+ // to see if they are new also. The depth of this walk is bounded by the size
+ // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+ // about revisiting of nodes.
+ //
+ // As we walk the operands, keep track of the number of nodes that are
+ // processed. If non-zero, this will become the new nodeid of this node.
+ // Operands may morph when they are analyzed. If so, the node will be
+ // updated after all operands have been analyzed. Since this is rare,
+ // the code tries to minimize overhead in the non-morphing case.
+
+ SmallVector<SDValue, 8> NewOps;
+ unsigned NumProcessed = 0;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue OrigOp = N->getOperand(i);
+ SDValue Op = OrigOp;
+
+ AnalyzeNewValue(Op); // Op may morph.
+
+ if (Op.getNode()->getNodeId() == Processed)
+ ++NumProcessed;
+
+ if (!NewOps.empty()) {
+ // Some previous operand changed. Add this one to the list.
+ NewOps.push_back(Op);
+ } else if (Op != OrigOp) {
+ // This is the first operand to change - add all operands so far.
+ for (unsigned j = 0; j < i; ++j)
+ NewOps.push_back(N->getOperand(j));
+ NewOps.push_back(Op);
+ }
+ }
+
+ // Some operands changed - update the node.
+ if (!NewOps.empty()) {
+ SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0],
+ NewOps.size()).getNode();
+ if (M != N) {
+ // The node morphed into a different node. Normally for this to happen
+ // the original node would have to be marked NewNode. However this can
+ // in theory momentarily not be the case while ReplaceValueWith is doing
+ // its stuff. Mark the original node NewNode to help sanity checking.
+ N->setNodeId(NewNode);
+ if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+ // It morphed into a previously analyzed node - nothing more to do.
+ return M;
+
+ // It morphed into a different new node. Do the equivalent of passing
+ // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need
+ // to remap the operands, since they are the same as the operands we
+ // remapped above.
+ N = M;
+ ExpungeNode(N);
+ }
+ }
+
+ // Calculate the NodeId.
+ N->setNodeId(N->getNumOperands() - NumProcessed);
+ if (N->getNodeId() == ReadyToProcess)
+ Worklist.push_back(N);
+
+ return N;
+}
+
+/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+ Val.setNode(AnalyzeNewNode(Val.getNode()));
+ if (Val.getNode()->getNodeId() == Processed)
+ // We were passed a processed node, or it morphed into one - remap it.
+ RemapValue(Val);
+}
+
+/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it.
+/// This can occur when a node is deleted then reallocated as a new node -
+/// the mapping in ReplacedValues applies to the deleted node, not the new
+/// one.
+/// The only map that can have a deleted node as a source is ReplacedValues.
+/// Other maps can have deleted nodes as targets, but since their looked-up
+/// values are always immediately remapped using RemapValue, resulting in a
+/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
+/// always performs correct mappings. In order to keep the mapping correct,
+/// ExpungeNode should be called on any new nodes *before* adding them as
+/// either source or target to ReplacedValues (which typically means calling
+/// Expunge when a new node is first seen, since it may no longer be marked
+/// NewNode by the time it is added to ReplacedValues).
+void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
+ if (N->getNodeId() != NewNode)
+ return;
+
+ // If N is not remapped by ReplacedValues then there is nothing to do.
+ unsigned i, e;
+ for (i = 0, e = N->getNumValues(); i != e; ++i)
+ if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
+ break;
+
+ if (i == e)
+ return;
+
+ // Remove N from all maps - this is expensive but rare.
+
+ for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
+ E = PromotedIntegers.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
+ E = SoftenedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
+ E = ScalarizedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
+ E = WidenedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
+ E = ReplacedValues.end(); I != E; ++I)
+ RemapValue(I->second);
+
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ ReplacedValues.erase(SDValue(N, i));
+}
+
+/// RemapValue - If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &N) {
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+ if (I != ReplacedValues.end()) {
+ // Use path compression to speed up future lookups if values get multiply
+ // replaced with other values.
+ RemapValue(I->second);
+ N = I->second;
+ assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!");
+ }
+}
+
+namespace {
+ /// NodeUpdateListener - This class is a DAGUpdateListener that listens for
+ /// updates to nodes and recomputes their ready state.
+ class VISIBILITY_HIDDEN NodeUpdateListener :
+ public SelectionDAG::DAGUpdateListener {
+ DAGTypeLegalizer &DTL;
+ SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+ public:
+ explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+ SmallSetVector<SDNode*, 16> &nta)
+ : DTL(dtl), NodesToAnalyze(nta) {}
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ // It is possible, though rare, for the deleted node N to occur as a
+ // target in a map, so note the replacement N -> E in ReplacedValues.
+ assert(E && "Node not replaced?");
+ DTL.NoteDeletion(N, E);
+
+ // In theory the deleted node could also have been scheduled for analysis.
+ // So remove it from the set of nodes which will be analyzed.
+ NodesToAnalyze.remove(N);
+
+ // In general nothing needs to be done for E, since it didn't change but
+ // only gained new uses. However N -> E was just added to ReplacedValues,
+ // and the result of a ReplacedValues mapping is not allowed to be marked
+ // NewNode. So if E is marked NewNode, then it needs to be analyzed.
+ if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+ NodesToAnalyze.insert(E);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Node updates can mean pretty much anything. It is possible that an
+ // operand was set to something already processed (f.e.) in which case
+ // this node could become ready. Recompute its flags.
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ N->setNodeId(DAGTypeLegalizer::NewNode);
+ NodesToAnalyze.insert(N);
+ }
+ };
+}
+
+
+/// ReplaceValueWithHelper - Internal helper for ReplaceValueWith. Updates the
+/// DAG causing any uses of From to use To instead, but without expunging From
+/// or recording the replacement in ReplacedValues. Do not call directly unless
+/// you really know what you are doing!
+void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) {
+ assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+ // If expansion produced new nodes, make sure they are properly marked.
+ AnalyzeNewValue(To); // Expunges To.
+
+ // Anything that used the old node should now use the new one. Note that this
+ // can potentially cause recursive merging.
+ SmallSetVector<SDNode*, 16> NodesToAnalyze;
+ NodeUpdateListener NUL(*this, NodesToAnalyze);
+ DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
+
+ // Process the list of nodes that need to be reanalyzed.
+ while (!NodesToAnalyze.empty()) {
+ SDNode *N = NodesToAnalyze.back();
+ NodesToAnalyze.pop_back();
+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+ // The node was analyzed while reanalyzing an earlier node - it is safe to
+ // skip. Note that this is not a morphing node - otherwise it would still
+ // be marked NewNode.
+ continue;
+
+ // Analyze the node's operands and recalculate the node ID.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M != N) {
+ // The node morphed into a different node. Make everyone use the new node
+ // instead.
+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ SDValue OldVal(N, i);
+ SDValue NewVal(M, i);
+ if (M->getNodeId() == Processed)
+ RemapValue(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ }
+ // The original node continues to exist in the DAG, marked NewNode.
+ }
+ }
+}
+
+/// ReplaceValueWith - The specified value was legalized to the specified other
+/// value. Update the DAG and NodeIds replacing any uses of From to use To
+/// instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+ assert(From.getNode()->getNodeId() == ReadyToProcess &&
+ "Only the node being processed may be remapped!");
+
+ // If expansion produced new nodes, make sure they are properly marked.
+ ExpungeNode(From.getNode());
+ AnalyzeNewValue(To); // Expunges To.
+
+ // The old node may still be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ ReplacedValues[From] = To;
+
+ // Do the replacement.
+ ReplaceValueWithHelper(From, To);
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = PromotedIntegers[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = SoftenedFloats[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = ScalarizedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ assert(Entry.first.getNode() == 0 && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't split");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ assert(Entry.first.getNode() == 0 && "Node already split");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = WidenedVectors[Op];
+ assert(OpEntry.getNode() == 0 && "Node already widened!");
+ OpEntry = Result;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// BitConvertToInteger - Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getSizeInBits();
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ MVT::getIntegerVT(BitWidth), Op);
+}
+
+/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
+/// same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Only applies to vectors!");
+ unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+ MVT EltNVT = MVT::getIntegerVT(EltWidth);
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ MVT::getVectorVT(EltNVT, NumElts), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+ MVT DestVT) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and destination types.
+ SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0);
+ // Result is a load from the stack slot.
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0);
+}
+
+/// CustomLowerNode - Replace the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ if (LegalizeResult)
+ TLI.ReplaceNodeResults(N, Results, DAG);
+ else
+ TLI.LowerOperationWrapper(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom lower it after all.
+ return false;
+
+ // Make everything that once used N's values now use those in Results instead.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ return true;
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split into two not necessarily identical pieces.
+void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) {
+ if (!InVT.isVector()) {
+ LoVT = HiVT = TLI.getTypeToTransformTo(InVT);
+ } else {
+ MVT NewEltVT = InVT.getVectorElementType();
+ unsigned NumElements = InVT.getVectorNumElements();
+ if ((NumElements & (NumElements-1)) == 0) { // Simple power of two vector.
+ NumElements >>= 1;
+ LoVT = HiVT = MVT::getVectorVT(NewEltVT, NumElements);
+ } else { // Non-power-of-two vectors.
+ unsigned NewNumElts_Lo = 1 << Log2_32(NumElements);
+ unsigned NewNumElts_Hi = NumElements - NewNumElts_Lo;
+ LoVT = MVT::getVectorVT(NewEltVT, NewNumElts_Lo);
+ HiVT = MVT::getVectorVT(NewEltVT, NewNumElts_Hi);
+ }
+ }
+}
+
+/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+/// high parts of the given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Pair.getDebugLoc();
+ MVT NVT = TLI.getTypeToTransformTo(Pair.getValueType());
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(1));
+}
+
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, MVT EltVT,
+ SDValue Index) {
+ DebugLoc dl = Index.getDebugLoc();
+ // Make sure the index type is big enough to compute in.
+ if (Index.getValueType().bitsGT(TLI.getPointerTy()))
+ Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);
+ else
+ Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EltSize, Index.getValueType()));
+ return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
+}
+
+/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+ // Arbitrarily use dlHi for result DebugLoc
+ DebugLoc dlHi = Hi.getDebugLoc();
+ DebugLoc dlLo = Lo.getDebugLoc();
+ MVT LVT = Lo.getValueType();
+ MVT HVT = Hi.getValueType();
+ MVT NVT = MVT::getIntegerVT(LVT.getSizeInBits() + HVT.getSizeInBits());
+
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+ DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));
+ return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// LibCallify - Convert the node into a libcall with the same prototype.
+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
+ bool isSigned) {
+ unsigned NumOps = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ if (NumOps == 0) {
+ return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl);
+ } else if (NumOps == 1) {
+ SDValue Op = N->getOperand(0);
+ return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl);
+ } else if (NumOps == 2) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl);
+ }
+ SmallVector<SDValue, 8> Ops(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ Ops[i] = N->getOperand(i);
+
+ return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl);
+}
+
+/// MakeLibCall - Generate a libcall taking the given operands as arguments and
+/// returning a result of type RetVT.
+SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForMVT();
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ const Type *RetTy = RetVT.getTypeForMVT();
+ std::pair<SDValue,SDValue> CallInfo =
+ TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, CallingConv::C, false, Callee, Args, DAG, dl);
+ return CallInfo.first;
+}
+
+/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
+/// of the given type. A target boolean is an integer value, not necessarily of
+/// type i1, the bits of which conform to getBooleanContents.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) {
+ DebugLoc dl = Bool.getDebugLoc();
+ ISD::NodeType ExtendCode;
+ switch (TLI.getBooleanContents()) {
+ default:
+ assert(false && "Unknown BooleanContent!");
+ case TargetLowering::UndefinedBooleanContent:
+ // Extend to VT by adding rubbish bits.
+ ExtendCode = ISD::ANY_EXTEND;
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ // Extend to VT by adding zero bits.
+ ExtendCode = ISD::ZERO_EXTEND;
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent: {
+ // Extend to VT by copying the sign bit.
+ ExtendCode = ISD::SIGN_EXTEND;
+ break;
+ }
+ }
+ return DAG.getNode(ExtendCode, dl, VT, Bool);
+}
+
+/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
+/// bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ MVT LoVT, MVT HiVT,
+ SDValue &Lo, SDValue &Hi) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+ Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+ Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+ DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// SplitInteger - Return the lower and upper halves of Op's bits in a value
+/// type half the size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ SDValue &Lo, SDValue &Hi) {
+ MVT HalfVT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()/2);
+ SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Entry Point
+//===----------------------------------------------------------------------===//
+
+/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
+/// only uses types natively supported by the target. Returns "true" if it made
+/// any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+ return DAGTypeLegalizer(*this).run();
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 0000000..75c8924
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,736 @@
+//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class. This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAG_LEGALIZETYPES_H
+#define SELECTIONDAG_LEGALIZETYPES_H
+
+#define DEBUG_TYPE "legalize-types"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks
+/// on it until only value types the target machine can handle are left. This
+/// involves promoting small sizes to large sizes or splitting up large values
+/// into small values.
+///
+class VISIBILITY_HIDDEN DAGTypeLegalizer {
+ TargetLowering &TLI;
+ SelectionDAG &DAG;
+public:
+ // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
+ // about the state of the node. The enum has all the values.
+ enum NodeIdFlags {
+ /// ReadyToProcess - All operands have been processed, so this node is ready
+ /// to be handled.
+ ReadyToProcess = 0,
+
+ /// NewNode - This is a new node, not before seen, that was created in the
+ /// process of legalizing some other node.
+ NewNode = -1,
+
+ /// Unanalyzed - This node's ID needs to be set to the number of its
+ /// unprocessed operands.
+ Unanalyzed = -2,
+
+ /// Processed - This is a node that has already been processed.
+ Processed = -3
+
+ // 1+ - This is a node which has this many unprocessed operands.
+ };
+private:
+ enum LegalizeAction {
+ Legal, // The target natively supports this type.
+ PromoteInteger, // Replace this integer type with a larger one.
+ ExpandInteger, // Split this integer type into two of half the size.
+ SoftenFloat, // Convert this float type to a same size integer type.
+ ExpandFloat, // Split this float type into two of half the size.
+ ScalarizeVector, // Replace this one-element vector with its element type.
+ SplitVector, // This vector type should be split into smaller vectors.
+ WidenVector // This vector type should be widened into a larger vector.
+ };
+
+ /// ValueTypeActions - This is a bitvector that contains two bits for each
+ /// simple value type, where the two bits correspond to the LegalizeAction
+ /// enum from TargetLowering. This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// getTypeAction - Return how we should legalize values of this type.
+ LegalizeAction getTypeAction(MVT VT) const {
+ switch (ValueTypeActions.getTypeAction(VT)) {
+ default:
+ assert(false && "Unknown legalize action!");
+ case TargetLowering::Legal:
+ return Legal;
+ case TargetLowering::Promote:
+ // Promote can mean
+ // 1) For integers, use a larger integer type (e.g. i8 -> i32).
+ // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
+ if (!VT.isVector())
+ return PromoteInteger;
+ else
+ return WidenVector;
+ case TargetLowering::Expand:
+ // Expand can mean
+ // 1) split scalar in half, 2) convert a float to an integer,
+ // 3) scalarize a single-element vector, 4) split a vector in two.
+ if (!VT.isVector()) {
+ if (VT.isInteger())
+ return ExpandInteger;
+ else if (VT.getSizeInBits() ==
+ TLI.getTypeToTransformTo(VT).getSizeInBits())
+ return SoftenFloat;
+ else
+ return ExpandFloat;
+ } else if (VT.getVectorNumElements() == 1) {
+ return ScalarizeVector;
+ } else {
+ return SplitVector;
+ }
+ }
+ }
+
+ /// isTypeLegal - Return true if this type is legal on this target.
+ bool isTypeLegal(MVT VT) const {
+ return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal;
+ }
+
+ /// IgnoreNodeResults - Pretend all of this node's results are legal.
+ bool IgnoreNodeResults(SDNode *N) const {
+ return N->getOpcode() == ISD::TargetConstant;
+ }
+
+ /// PromotedIntegers - For integer nodes that are below legal width, this map
+ /// indicates what promoted value to use.
+ DenseMap<SDValue, SDValue> PromotedIntegers;
+
+ /// ExpandedIntegers - For integer nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers;
+
+ /// SoftenedFloats - For floating point nodes converted to integers of
+ /// the same size, this map indicates the converted value to use.
+ DenseMap<SDValue, SDValue> SoftenedFloats;
+
+ /// ExpandedFloats - For float nodes that need to be expanded this map
+ /// indicates which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats;
+
+ /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
+ /// scalar value of type 'ty' to use.
+ DenseMap<SDValue, SDValue> ScalarizedVectors;
+
+ /// SplitVectors - For nodes that need to be split this map indicates
+ /// which operands are the expanded version of the input.
+ DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;
+
+ /// WidenedVectors - For vector nodes that need to be widened, indicates
+ /// the widened value to use.
+ DenseMap<SDValue, SDValue> WidenedVectors;
+
+ /// ReplacedValues - For values that have been replaced with another,
+ /// indicates the replacement value to use.
+ DenseMap<SDValue, SDValue> ReplacedValues;
+
+ /// Worklist - This defines a worklist of nodes to process. In order to be
+ /// pushed onto this worklist, all operands of a node must have already been
+ /// processed.
+ SmallVector<SDNode*, 128> Worklist;
+
+public:
+ explicit DAGTypeLegalizer(SelectionDAG &dag)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ assert(MVT::LAST_VALUETYPE <= 32 &&
+ "Too many value types for ValueTypeActions to hold!");
+ }
+
+ /// run - This is the main entry point for the type legalizer. This does a
+ /// top-down traversal of the dag, legalizing types as it goes. Returns
+ /// "true" if it made any changes.
+ bool run();
+
+ void NoteDeletion(SDNode *Old, SDNode *New) {
+ ExpungeNode(Old);
+ ExpungeNode(New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+ ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+ }
+
+private:
+ SDNode *AnalyzeNewNode(SDNode *N);
+ void AnalyzeNewValue(SDValue &Val);
+ void ExpungeNode(SDNode *N);
+ void PerformExpensiveChecks();
+ void RemapValue(SDValue &N);
+
+ // Common routines.
+ SDValue BitConvertToInteger(SDValue Op);
+ SDValue BitConvertVectorToIntegerVector(SDValue Op);
+ SDValue CreateStackStoreLoad(SDValue Op, MVT DestVT);
+ bool CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult);
+ SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index);
+ SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+ SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
+ SDValue MakeLibCall(RTLIB::Libcall LC, MVT RetVT,
+ const SDValue *Ops, unsigned NumOps, bool isSigned,
+ DebugLoc dl);
+ SDValue PromoteTargetBoolean(SDValue Bool, MVT VT);
+ void ReplaceValueWith(SDValue From, SDValue To);
+ void ReplaceValueWithHelper(SDValue From, SDValue To);
+ void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SplitInteger(SDValue Op, MVT LoVT, MVT HiVT,
+ SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Promotion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetPromotedInteger - Given a processed operand Op which was promoted to a
+ /// larger integer type, this returns the promoted value. The low bits of the
+ /// promoted value corresponding to the original type are exactly equal to Op.
+ /// The extra bits contain rubbish, so the promoted value may need to be zero-
+ /// or sign-extended from the original type before it is usable (the helpers
+ /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+ /// For example, if Op is an i16 and was promoted to an i32, then this method
+ /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+ /// 16 bits of which contain rubbish.
+ SDValue GetPromotedInteger(SDValue Op) {
+ SDValue &PromotedOp = PromotedIntegers[Op];
+ RemapValue(PromotedOp);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedInteger(SDValue Op, SDValue Result);
+
+ /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
+ /// final size.
+ SDValue SExtPromotedInteger(SDValue Op) {
+ MVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ }
+
+ /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
+ /// final size.
+ SDValue ZExtPromotedInteger(SDValue Op) {
+ MVT OldVT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ Op = GetPromotedInteger(Op);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT);
+ }
+
+ // Integer Result Promotion.
+ void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_AssertSext(SDNode *N);
+ SDValue PromoteIntRes_AssertZext(SDNode *N);
+ SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+ SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+ SDValue PromoteIntRes_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntRes_Constant(SDNode *N);
+ SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntRes_CTLZ(SDNode *N);
+ SDValue PromoteIntRes_CTPOP(SDNode *N);
+ SDValue PromoteIntRes_CTTZ(SDNode *N);
+ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+ SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+ SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+ SDValue PromoteIntRes_Overflow(SDNode *N);
+ SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SDIV(SDNode *N);
+ SDValue PromoteIntRes_SELECT(SDNode *N);
+ SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+ SDValue PromoteIntRes_SETCC(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N);
+ SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+ SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UDIV(SDNode *N);
+ SDValue PromoteIntRes_UNDEF(SDNode *N);
+ SDValue PromoteIntRes_VAARG(SDNode *N);
+ SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+
+ // Integer Operand Promotion.
+ bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
+ SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+ SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+
+ void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Expansion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedInteger - Given a processed operand Op which was expanded into
+ /// two integers of half the size, this returns the two halves. The low bits
+ /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi.
+ /// For example, if Op is an i64 which was expanded into two i32's, then this
+ /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+ /// Op, and Hi being equal to the upper 32 bits.
+ void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Integer Result Expansion.
+ void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+ void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandShiftByConstant(SDNode *N, unsigned Amt,
+ SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Integer Operand Expansion.
+ bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandIntOp_BIT_CONVERT(SDNode *N);
+ SDValue ExpandIntOp_BR_CC(SDNode *N);
+ SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
+ SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+ SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_Shift(SDNode *N);
+ SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+ SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+
+ void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+ /// integer of the same size, this returns the integer. The integer contains
+ /// exactly the same bits as Op - only the type changed. For example, if Op
+ /// is an f32 which was softened to an i32, then this method returns an i32,
+ /// the bits of which coincide with those of Op.
+ SDValue GetSoftenedFloat(SDValue Op) {
+ SDValue &SoftenedOp = SoftenedFloats[Op];
+ RemapValue(SoftenedOp);
+ assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
+ return SoftenedOp;
+ }
+ void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+ // Result Float to Integer Conversion.
+ void SoftenFloatResult(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FADD(SDNode *N);
+ SDValue SoftenFloatRes_FCEIL(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatRes_FCOS(SDNode *N);
+ SDValue SoftenFloatRes_FDIV(SDNode *N);
+ SDValue SoftenFloatRes_FEXP(SDNode *N);
+ SDValue SoftenFloatRes_FEXP2(SDNode *N);
+ SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+ SDValue SoftenFloatRes_FLOG(SDNode *N);
+ SDValue SoftenFloatRes_FLOG2(SDNode *N);
+ SDValue SoftenFloatRes_FLOG10(SDNode *N);
+ SDValue SoftenFloatRes_FMUL(SDNode *N);
+ SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N);
+ SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+ SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatRes_FPOW(SDNode *N);
+ SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_FREM(SDNode *N);
+ SDValue SoftenFloatRes_FRINT(SDNode *N);
+ SDValue SoftenFloatRes_FSIN(SDNode *N);
+ SDValue SoftenFloatRes_FSQRT(SDNode *N);
+ SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatRes_UNDEF(SDNode *N);
+ SDValue SoftenFloatRes_VAARG(SDNode *N);
+ SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+
+ // Operand Float to Integer Conversion.
+ bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatOp_SETCC(SDNode *N);
+ SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float Expansion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetExpandedFloat - Given a processed operand Op which was expanded into
+ /// two floating point values of half the size, this returns the two halves.
+ /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+ /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
+ /// into two f64's, then this method returns the two f64's, with Lo being
+ /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+ void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Float Result Expansion.
+ void ExpandFloatResult(SDNode *N, unsigned ResNo);
+ void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Float Operand Expansion.
+ bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandFloatOp_BR_CC(SDNode *N);
+ SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+ SDValue ExpandFloatOp_SETCC(SDNode *N);
+ SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, DebugLoc dl);
+
+ //===--------------------------------------------------------------------===//
+ // Scalarization Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetScalarizedVector - Given a processed one-element vector Op which was
+ /// scalarized to its element type, this returns the element. For example,
+ /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32.
+ SDValue GetScalarizedVector(SDValue Op) {
+ SDValue &ScalarizedOp = ScalarizedVectors[Op];
+ RemapValue(ScalarizedOp);
+ assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+ return ScalarizedOp;
+ }
+ void SetScalarizedVector(SDValue Op, SDValue Result);
+
+ // Vector Result Scalarization: <1 x ty> -> ty.
+ void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_ShiftOp(SDNode *N);
+ SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+
+ SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+ SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+ SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+ SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_VSETCC(SDNode *N);
+
+ // Vector Operand Scalarization: <1 x ty> -> ty.
+ bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Splitting Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetSplitVector - Given a processed vector Op which was split into smaller
+ /// vectors, this method returns the smaller vectors. The first elements of
+ /// Op coincide with the elements of Lo; the remaining elements of Op coincide
+ /// with the elements of Hi: Op is what you would get by concatenating Lo and
+ /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then
+ /// this method returns the two v4i32's, with Lo corresponding to the first 4
+ /// elements of Op, and Hi to the last 4 elements.
+ void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+ void SplitVectorResult(SDNode *N, unsigned OpNo);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+ SDValue &Hi);
+ void SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+ bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_UnaryOp(SDNode *N);
+
+ SDValue SplitVecOp_BIT_CONVERT(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// GetWidenedVector - Given a processed vector Op which was widened into a
+ /// larger vector, this method returns the larger vector. The elements of
+ /// the returned vector consist of the elements of Op followed by elements
+ /// containing rubbish. For example, if Op is a v2i32 that was widened to a
+ /// v4i32, then this method returns a v4i32 for which the first two elements
+ /// are the same as those of Op, while the last two elements contain rubbish.
+ SDValue GetWidenedVector(SDValue Op) {
+ SDValue &WidenedOp = WidenedVectors[Op];
+ RemapValue(WidenedOp);
+ assert(WidenedOp.getNode() && "Operand wasn't widened?");
+ return WidenedOp;
+ }
+ void SetWidenedVector(SDValue Op, SDValue Result);
+
+ // Widen Vector Result Promotion.
+ void WidenVectorResult(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+ SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+ SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+ SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+ SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+ SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVecRes_SELECT_CC(SDNode* N);
+ SDValue WidenVecRes_UNDEF(SDNode *N);
+ SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+ SDValue WidenVecRes_VSETCC(SDNode* N);
+
+ SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_Shift(SDNode *N);
+ SDValue WidenVecRes_Unary(SDNode *N);
+
+ // Widen Vector Operand.
+ bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+ SDValue WidenVecOp_BIT_CONVERT(SDNode *N);
+ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue WidenVecOp_STORE(SDNode* N);
+
+ SDValue WidenVecOp_Convert(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Helper genWidenVectorLoads - Helper function to generate a set of
+ /// loads to load a vector with a resulting wider type. It takes
+ /// ExtType: Extension type
+ /// LdChain: list of chains for the load we have generated.
+ /// Chain: incoming chain for the ld vector.
+ /// BasePtr: base pointer to load from.
+ /// SV: memory disambiguation source value.
+ /// SVOffset: memory disambiugation offset.
+ /// Alignment: alignment of the memory.
+ /// isVolatile: volatile load.
+ /// LdWidth: width of memory that we want to load.
+ /// ResType: the wider result result type for the resulting vector.
+ /// dl: DebugLoc to be applied to new nodes
+ SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain,
+ SDValue BasePtr, const Value *SV,
+ int SVOffset, unsigned Alignment,
+ bool isVolatile, unsigned LdWidth,
+ MVT ResType, DebugLoc dl);
+
+ /// Helper genWidenVectorStores - Helper function to generate a set of
+ /// stores to store a widen vector into non widen memory
+ /// It takes
+ /// StChain: list of chains for the stores we have generated
+ /// Chain: incoming chain for the ld vector
+ /// BasePtr: base pointer to load from
+ /// SV: memory disambiguation source value
+ /// SVOffset: memory disambiugation offset
+ /// Alignment: alignment of the memory
+ /// isVolatile: volatile lod
+ /// ValOp: value to store
+ /// StWidth: width of memory that we want to store
+ /// dl: DebugLoc to be applied to new nodes
+ void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain,
+ SDValue BasePtr, const Value *SV,
+ int SVOffset, unsigned Alignment,
+ bool isVolatile, SDValue ValOp,
+ unsigned StWidth, DebugLoc dl);
+
+ /// Modifies a vector input (widen or narrows) to a vector of NVT. The
+ /// input vector must have the same element type as NVT.
+ SDValue ModifyToType(SDValue InOp, MVT WidenVT);
+
+
+ //===--------------------------------------------------------------------===//
+ // Generic Splitting: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // not necessarily identical types. As such they can be used for splitting
+ // vectors and expanding integers and floats.
+
+ void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isVector())
+ GetSplitVector(Op, Lo, Hi);
+ else if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+ /// which is split (or expanded) into two not necessarily identical pieces.
+ void GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT);
+
+ /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+ /// high parts of the given value.
+ void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+ // Generic Result Splitting.
+ void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Expansion: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // identical types of half the size, and that the Lo/Hi part is stored first
+ // in memory on little/big-endian machines, followed by the Hi/Lo part. As
+ // such they can be used for expanding integers and floats.
+
+ void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ // Generic Result Expansion.
+ void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Generic Operand Expansion.
+ SDValue ExpandOp_BIT_CONVERT (SDNode *N);
+ SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
+ SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
+ SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+ SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 0000000..e8ff3fc
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,453 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part. This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+
+void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT OutVT = N->getValueType(0);
+ MVT NOutVT = TLI.getTypeToTransformTo(OutVT);
+ SDValue InOp = N->getOperand(0);
+ MVT InVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ case Legal:
+ case PromoteInteger:
+ break;
+ case SoftenFloat:
+ // Convert the integer operand instead.
+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ case ExpandInteger:
+ case ExpandFloat:
+ // Convert the expanded pieces of the input.
+ GetExpandedOp(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ case SplitVector:
+ // Convert the split parts of the input if it was split in two.
+ GetSplitVector(InOp, Lo, Hi);
+ if (Lo.getValueType() == Hi.getValueType()) {
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ }
+ break;
+ case ScalarizeVector:
+ // Convert the element instead.
+ SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ case WidenVector: {
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
+ InOp = GetWidenedVector(InOp);
+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ InVT.getVectorNumElements()/2);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ return;
+ }
+ }
+
+ // Lower the bit-convert to a store/load from the stack.
+ assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and expanded destination types.
+ unsigned Alignment =
+ TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT());
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0);
+
+ // Load the first half from the stack slot.
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the second half from the stack slot.
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
+ MinAlign(Alignment, IncrementSize));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Return the operands.
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
+ Hi : Lo;
+
+ assert(Part.getValueType() == N->getValueType(0) &&
+ "Type twice as big as expanded type not itself expanded!");
+
+ GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue OldVec = N->getOperand(0);
+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Convert to a vector of the expanded element type, for example
+ // <3 x i64> -> <6 x i32>.
+ MVT OldVT = N->getValueType(0);
+ MVT NewVT = TLI.getTypeToTransformTo(OldVT);
+
+ SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::getVectorVT(NewVT, 2*OldElts),
+ OldVec);
+
+ // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+ SDValue Idx = N->getOperand(1);
+
+ // Make sure the type of Idx is big enough to hold the new values.
+ if (Idx.getValueType().bitsLT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(1, Idx.getValueType()));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+ DebugLoc dl = N->getDebugLoc();
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0));
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
+ SVOffset+IncrementSize,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2));
+
+ // Handle endianness of the load.
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0).isVector()) {
+ // An illegal expanding type is being converted to a legal vector type.
+ // Make a two element vector out of the expanded parts and convert that
+ // instead, but only if the new vector type is legal (otherwise there
+ // is no point, and it might create expansion loops). For example, on
+ // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
+ MVT OVT = N->getOperand(0).getValueType();
+ MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OVT), 2);
+
+ if (isTypeLegal(NVT)) {
+ SDValue Parts[2];
+ GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
+
+ if (TLI.isBigEndian())
+ std::swap(Parts[0], Parts[1]);
+
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec);
+ }
+ }
+
+ // Otherwise, store to a temporary and load out again as the new type.
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ MVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ MVT OldVT = N->getOperand(0).getValueType();
+ MVT NewVT = TLI.getTypeToTransformTo(OldVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ assert(OldVT == VecVT.getVectorElementType() &&
+ "BUILD_VECTOR operand type doesn't match vector element type!");
+
+ // Build a vector of twice the length out of the expanded elements.
+ // For example <3 x i64> -> <6 x i32>.
+ std::vector<SDValue> NewElts;
+ NewElts.reserve(NumElts*2);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(i), Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ NewElts.push_back(Lo);
+ NewElts.push_back(Hi);
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::getVectorVT(NewVT, NewElts.size()),
+ &NewElts[0], NewElts.size());
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ MVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Val = N->getOperand(1);
+ MVT OldEVT = Val.getValueType();
+ MVT NewEVT = TLI.getTypeToTransformTo(OldEVT);
+
+ assert(OldEVT == VecVT.getVectorElementType() &&
+ "Inserted element type doesn't match vector element type!");
+
+ // Bitconvert to a vector of twice the length with elements of the expanded
+ // type, insert the expanded vector elements, and then convert back.
+ MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2);
+ SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ NewVecVT, N->getOperand(0));
+
+ SDValue Lo, Hi;
+ GetExpandedOp(Val, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue Idx = N->getOperand(2);
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl,
+ Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT VT = N->getValueType(0);
+ assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+ "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ Ops[0] = N->getOperand(0);
+ SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+ for (unsigned i = 1; i < NumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+ assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ DebugLoc dl = N->getDebugLoc();
+
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType());
+ SDValue Chain = St->getChain();
+ SDValue Ptr = St->getBasePtr();
+ int SVOffset = St->getSrcValueOffset();
+ unsigned Alignment = St->getAlignment();
+ bool isVolatile = St->isVolatile();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(St->getValue(), Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
+ SVOffset + IncrementSize,
+ isVolatile, MinAlign(Alignment, IncrementSize));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // A MERGE_VALUES node can produce any number of values. We know that the
+ // first illegal one needs to be expanded into Lo/Hi.
+ unsigned i;
+
+ // The string of legal results gets turned into input operands, which have
+ // the same type.
+ for (i = 0; isTypeLegal(N->getValueType(i)); ++i)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+
+ // The first illegal result must be the one that needs to be expanded.
+ GetSplitOp(N->getOperand(i), Lo, Hi);
+
+ // Legalize the rest of the results into the input operands whether they are
+ // legal or not.
+ unsigned e = N->getNumValues();
+ for (++i; i != e; ++i)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(1), LL, LH);
+ GetSplitOp(N->getOperand(2), RL, RH);
+
+ SDValue Cond = N->getOperand(0);
+ Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL);
+ Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitOp(N->getOperand(2), LL, LH);
+ GetSplitOp(N->getOperand(3), RL, RH);
+
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+ N->getOperand(1), LL, RL, N->getOperand(4));
+ Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+ N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getUNDEF(LoVT);
+ Hi = DAG.getUNDEF(HiVT);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 0000000..df9af21
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,335 @@
+//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types. For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered like
+// ISD::CALL; the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+class VectorLegalizer {
+ SelectionDAG& DAG;
+ TargetLowering& TLI;
+ bool Changed; // Keep track of whether anything changed
+
+ /// LegalizedNodes - For nodes that are of legal width, and that have more
+ /// than one use, this map indicates what regularized operand to use. This
+ /// allows us to avoid legalizing the same thing more than once.
+ DenseMap<SDValue, SDValue> LegalizedNodes;
+
+ // Adds a node to the translation cache
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+ // Legalizes the given node
+ SDValue LegalizeOp(SDValue Op);
+ // Assuming the node is legal, "legalize" the results
+ SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+ // Implements unrolling a generic vector operation, i.e. turning it into
+ // scalar operations.
+ SDValue UnrollVectorOp(SDValue Op);
+ // Implements unrolling a VSETCC.
+ SDValue UnrollVSETCC(SDValue Op);
+ // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
+ // isn't legal.
+ SDValue ExpandFNEG(SDValue Op);
+ // Implements vector promotion; this is essentially just bitcasting the
+ // operands to a different type and bitcasting the result back to the
+ // original type.
+ SDValue PromoteVectorOp(SDValue Op);
+
+ public:
+ bool Run();
+ VectorLegalizer(SelectionDAG& dag) :
+ DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
+};
+
+bool VectorLegalizer::Run() {
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = prior(DAG.allnodes_end()); I != next(E); ++I)
+ LegalizeOp(SDValue(I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+
+ return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+ // Generic legalization: just pass the operand through.
+ for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+ return Result.getValue(Op.getResNo());
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDNode* Node = Op.getNode();
+
+ // Legalize the operands
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+ SDValue Result =
+ DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size());
+
+ bool HasVectorValue = false;
+ for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
+ J != E;
+ ++J)
+ HasVectorValue |= J->isVector();
+ if (!HasVectorValue)
+ return TranslateLegalizeResults(Op, Result);
+
+ switch (Op.getOpcode()) {
+ default:
+ return TranslateLegalizeResults(Op, Result);
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::SELECT:
+ case ISD::SELECT_CC:
+ case ISD::VSETCC:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FPOWI:
+ case ISD::FPOW:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FFLOOR:
+ break;
+ }
+
+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+ case TargetLowering::Promote:
+ // "Promote" the operation by bitcasting
+ Result = PromoteVectorOp(Op);
+ Changed = true;
+ break;
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom: {
+ SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
+ if (Tmp1.getNode()) {
+ Result = Tmp1;
+ break;
+ }
+ // FALL THROUGH
+ }
+ case TargetLowering::Expand:
+ if (Node->getOpcode() == ISD::FNEG)
+ Result = ExpandFNEG(Op);
+ else if (Node->getOpcode() == ISD::VSETCC)
+ Result = UnrollVSETCC(Op);
+ else
+ Result = UnrollVectorOp(Op);
+ break;
+ }
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op) {
+ Result = LegalizeOp(Result);
+ Changed = true;
+ }
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
+ // Vector "promotion" is basically just bitcasting and doing the operation
+ // in a different type. For example, x86 promotes ISD::AND on v2i32 to
+ // v1i64.
+ MVT VT = Op.getValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
+}
+
+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
+ SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
+ return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+ Zero, Op.getOperand(0));
+ }
+ return UnrollVectorOp(Op);
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
+ MVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+ MVT EltVT = VT.getVectorElementType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+ MVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 8> Ops(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getIntPtrConstant(i));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT),
+ LHSElem, RHSElem, CC);
+ Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],
+ DAG.getConstant(APInt::getAllOnesValue
+ (EltVT.getSizeInBits()), EltVT),
+ DAG.getConstant(0, EltVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
+}
+
+/// UnrollVectorOp - We know that the given vector has a legal type, however
+/// the operation it performs is not legal, and the target has requested that
+/// the operation be expanded. "Unroll" the vector, splitting out the scalars
+/// and operating on each element individually.
+SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) {
+ MVT VT = Op.getValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't unroll a vector with multiple results!");
+ unsigned NE = VT.getVectorNumElements();
+ MVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+ for (unsigned i = 0; i != NE; ++i) {
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ SDValue Operand = Op.getOperand(j);
+ MVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ // A vector operand; extract a single element.
+ MVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OperandEltVT,
+ Operand,
+ DAG.getConstant(i, MVT::i32));
+ } else {
+ // A scalar operand; just use it as is.
+ Operands[j] = Operand;
+ }
+ }
+
+ switch (Op.getOpcode()) {
+ default:
+ Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT,
+ &Operands[0], Operands.size()));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, Operands[0],
+ DAG.getShiftAmountOperand(Operands[1])));
+ break;
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Scalars[0], Scalars.size());
+}
+
+}
+
+bool SelectionDAG::LegalizeVectors() {
+ return VectorLegalizer(*this).Run();
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 0000000..68967cc
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,2151 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type. For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in multiple vectors of a smaller type. For example,
+// implementing <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Scalarize node result " << ResNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "ScalarizeVectorResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to scalarize the result of this operator!");
+ abort();
+
+ case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break;
+ case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
+ case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
+ case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
+ case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break;
+
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCOS:
+ case ISD::FNEG:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP: R = ScalarizeVecRes_UnaryOp(N); break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::XOR: R = ScalarizeVecRes_BinOp(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: R = ScalarizeVecRes_ShiftOp(N); break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ShiftOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue ShiftAmt = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ LHS.getValueType(), LHS, ShiftAmt);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
+ MVT NewVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ NewVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
+ MVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),
+ Op0, DAG.getValueType(NewVT),
+ DAG.getValueType(Op0.getValueType()),
+ N->getOperand(3),
+ N->getOperand(4),
+ cast<CvtRndSatSDNode>(N)->getCvtCode());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ N->getValueType(0).getVectorElementType(),
+ N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ // The value to insert may have a wider type than the vector element type,
+ // so be sure to truncate it to the element type if necessary.
+ SDValue Op = N->getOperand(1);
+ MVT EltVT = N->getValueType(0).getVectorElementType();
+ if (Op.getValueType() != EltVT)
+ // FIXME: Can this happen for floating point types?
+ Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);
+ return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+ assert(N->isUnindexed() && "Indexed vector load?");
+
+ SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(),
+ N->getExtensionType(),
+ N->getValueType(0).getVectorElementType(),
+ N->getChain(), N->getBasePtr(),
+ DAG.getUNDEF(N->getBasePtr().getValueType()),
+ N->getSrcValue(), N->getSrcValueOffset(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->getAlignment());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+ // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+ MVT DestVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ MVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ LHS.getValueType(), N->getOperand(0), LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(),
+ N->getOperand(0), N->getOperand(1),
+ LHS, GetScalarizedVector(N->getOperand(3)),
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+ // Figure out if the scalar is the LHS or RHS and return it.
+ SDValue Arg = N->getOperand(2).getOperand(0);
+ if (Arg.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ MVT NVT = N->getValueType(0).getVectorElementType();
+ MVT SVT = TLI.getSetCCResultType(LHS.getValueType());
+ DebugLoc dl = N->getDebugLoc();
+
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, dl, SVT, LHS, RHS, N->getOperand(2));
+
+ // VSETCC always returns a sign-extended value, while SETCC may not. The
+ // SETCC result type may not match the vector element type. Correct these.
+ if (NVT.bitsLE(SVT)) {
+ // The SETCC result type is bigger than the vector element type.
+ // Ensure the SETCC result is sign-extended.
+ if (TLI.getBooleanContents() !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, SVT, Res,
+ DAG.getValueType(MVT::i1));
+ // Truncate to the final type.
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
+ } else {
+ // The SETCC result type is smaller than the vector element type.
+ // If the SetCC result is not sign-extended, chop it down to MVT::i1.
+ if (TLI.getBooleanContents() !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Res);
+ // Sign extend to the final type.
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, Res);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to scalarize this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT:
+ Res = ScalarizeVecOp_BIT_CONVERT(N); break;
+
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N); break;
+
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break;
+
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ N->getValueType(0), Elt);
+}
+
+/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
+/// use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands());
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ Ops[i] = GetScalarizedVector(N->getOperand(i));
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+ &Ops[0], Ops.size());
+}
+
+/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
+/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
+/// index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ return GetScalarizedVector(N->getOperand(0));
+}
+
+/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
+/// scalarized, it must be <1 x ty>. Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(N->isUnindexed() && "Indexed store of one-element vector?");
+ assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isTruncatingStore())
+ return DAG.getTruncStore(N->getChain(), dl,
+ GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(),
+ N->getSrcValue(), N->getSrcValueOffset(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->getAlignment());
+
+ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
+ N->isVolatile(), N->getAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorResult - This method is called when the specified result of the
+/// specified node is found to need vector splitting. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// legalization, we just know that (at least) one result needs vector
+/// splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Split node result: "; N->dump(&DAG); cerr << "\n");
+ SDValue Lo, Hi;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "SplitVectorResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to split the result of this operator!");
+ abort();
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+
+ case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+ case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
+ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break;
+ case ISD::VECTOR_SHUFFLE:
+ SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break;
+ case ISD::VSETCC: SplitVecRes_VSETCC(N, Lo, Hi); break;
+
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP: SplitVecRes_UnaryOp(N, Lo, Hi); break;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::FDIV:
+ case ISD::FPOW:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // We know the result is a vector. The input may be either a vector or a
+ // scalar value.
+ MVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue InOp = N->getOperand(0);
+ MVT InVT = InOp.getValueType();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ case Legal:
+ case PromoteInteger:
+ case SoftenFloat:
+ case ScalarizeVector:
+ break;
+ case ExpandInteger:
+ case ExpandFloat:
+ // A scalar to vector conversion, where the scalar needs expansion.
+ // If the vector is being split in two then we can just convert the
+ // expanded pieces.
+ if (LoVT == HiVT) {
+ GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ return;
+ }
+ break;
+ case SplitVector:
+ // If the input is a vector that needs to be split, convert each split
+ // piece of the input now.
+ GetSplitVector(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ return;
+ }
+
+ // In the general case, convert the input to an integer and split it by hand.
+ MVT LoIntVT = MVT::getIntegerVT(LoVT.getSizeInBits());
+ MVT HiIntVT = MVT::getIntegerVT(HiVT.getSizeInBits());
+ if (TLI.isBigEndian())
+ std::swap(LoIntVT, HiIntVT);
+
+ SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ unsigned LoNumElts = LoVT.getVectorNumElements();
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+ Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+ Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumSubvectors = N->getNumOperands() / 2;
+ if (NumSubvectors == 1) {
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+ return;
+ }
+
+ MVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ SDValue DTyOpLo = DAG.getValueType(LoVT);
+ SDValue DTyOpHi = DAG.getValueType(HiVT);
+
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+ // Split the input.
+ SDValue VLo, VHi;
+ MVT InVT = N->getOperand(0).getValueType();
+ switch (getTypeAction(InVT)) {
+ default: assert(0 && "Unexpected type action!");
+ case Legal: {
+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ case SplitVector:
+ GetSplitVector(N->getOperand(0), VLo, VHi);
+ break;
+ case WidenVector: {
+ // If the result needs to be split and the input needs to be widened,
+ // the two types must have different lengths. Use the widened result
+ // and extract from it to do the split.
+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ }
+
+ SDValue STyOpLo = DAG.getValueType(VLo.getValueType());
+ SDValue STyOpHi = DAG.getValueType(VHi.getValueType());
+
+ Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp,
+ CvtCode);
+ Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp,
+ CvtCode);
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ MVT IdxVT = Idx.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT LoVT, HiVT;
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ // The indices are not guaranteed to be a multiple of the new vector
+ // size unless the original vector type was split in two.
+ assert(LoVT == HiVT && "Non power-of-two vectors not supported!");
+
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
+ DAG.getConstant(LoVT.getVectorNumElements(), IdxVT));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx);
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Elt = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(Vec, Lo, Hi);
+
+ if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+ if (IdxVal < LoNumElts)
+ Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ Lo.getValueType(), Lo, Elt, Idx);
+ else
+ Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getIntPtrConstant(IdxVal - LoNumElts));
+ return;
+ }
+
+ // Spill the vector to the stack.
+ MVT VecVT = Vec.getValueType();
+ MVT EltVT = VecVT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+
+ // Store the new element. This may be larger than the vector element type,
+ // so use a truncating store.
+ SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ unsigned Alignment =
+ TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForMVT());
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0);
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
+ MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+ Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+ MVT LoVT, HiVT;
+ DebugLoc dl = LD->getDebugLoc();
+ GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ const Value *SV = LD->getSrcValue();
+ int SVOffset = LD->getSrcValueOffset();
+ MVT MemoryVT = LD->getMemoryVT();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+
+ MVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
+ SV, SVOffset, LoMemVT, isVolatile, Alignment);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ SVOffset += IncrementSize;
+ Alignment = MinAlign(Alignment, IncrementSize);
+ Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
+ SV, SVOffset, HiMemVT, isVolatile, Alignment);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Get the dest types - they may not match the input types, e.g. int_to_fp.
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ // Split the input.
+ MVT InVT = N->getOperand(0).getValueType();
+ switch (getTypeAction(InVT)) {
+ default: assert(0 && "Unexpected type action!");
+ case Legal: {
+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ case SplitVector:
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ break;
+ case WidenVector: {
+ // If the result needs to be split and the input needs to be widened,
+ // the two types must have different lengths. Use the widened result
+ // and extract from it to do the split.
+ assert(LoVT == HiVT && "Legal non-power-of-two vector type?");
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // The low and high parts of the original input give four input vectors.
+ SDValue Inputs[4];
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+ GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+ MVT NewVT = Inputs[0].getValueType();
+ unsigned NewElts = NewVT.getVectorNumElements();
+ assert(NewVT == Inputs[1].getValueType() &&
+ "Non power-of-two vectors not supported!");
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int, 16> Ops;
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+ unsigned FirstMaskIdx = High * NewElts;
+ bool useBuildVector = false;
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element does not index into any input vector.
+ Ops.push_back(-1);
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input) {
+ // This input vector is already an operand.
+ break;
+ } else if (InputUsed[OpNo] == -1U) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ useBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Ops.push_back(Idx + OpNo * NewElts);
+ }
+
+ if (useBuildVector) {
+ MVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue, 16> SVOps;
+
+ // Extract the input elements by hand.
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element is "undef" or indexes off the end of the input.
+ SVOps.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Inputs[Input], DAG.getIntPtrConstant(Idx)));
+ }
+
+ // Construct the Lo/Hi output using a BUILD_VECTOR.
+ Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
+ } else if (InputUsed[0] == -1U) {
+ // No input vectors were used! The result is undefined.
+ Output = DAG.getUNDEF(NewVT);
+ } else {
+ SDValue Op0 = Inputs[InputUsed[0]];
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = InputUsed[1] == -1U ?
+ DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+ // At least one input vector was used. Create a new shuffle vector.
+ Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
+ }
+
+ Ops.clear();
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ MVT LoVT, HiVT;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+ SDValue LL, LH, RL, RH;
+ GetSplitVector(N->getOperand(0), LL, LH);
+ GetSplitVector(N->getOperand(1), RL, RH);
+
+ Lo = DAG.getNode(ISD::VSETCC, dl, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(ISD::VSETCC, dl, HiVT, LH, RH, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorOperand - This method is called when the specified operand of the
+/// specified node is found to need vector splitting. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(cerr << "Split node operand: "; N->dump(&DAG); cerr << "\n");
+ SDValue Res = SDValue();
+
+ if (Res.getNode() == 0) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to split this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP: Res = SplitVecOp_UnaryOp(N); break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ MVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc dl = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ assert(Lo.getValueType() == Hi.getValueType() &&
+ "Returns legal non-power-of-two vector type?");
+ MVT InVT = Lo.getValueType();
+
+ MVT OutVT = MVT::getVectorVT(ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
+ // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will
+ // end up being split all the way down to individual components. Convert the
+ // split pieces into integers and reassemble.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+ JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ // We know that the extracted result type is legal. For now, assume the index
+ // is a constant.
+ MVT SubVT = N->getValueType(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ if (IdxVal < LoElts) {
+ assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+ "Extracted subvector crosses vector split!");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ } else {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+ }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ MVT VecVT = Vec.getValueType();
+
+ if (isa<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
+
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+ if (IdxVal < LoElts)
+ return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx);
+ else
+ return DAG.UpdateNodeOperands(SDValue(N, 0), Hi,
+ DAG.getConstant(IdxVal - LoElts,
+ Idx.getValueType()));
+ }
+
+ // Store the vector to the stack.
+ MVT EltVT = VecVT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0);
+
+ // Load back the required element.
+ StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ return DAG.getLoad(EltVT, dl, Store, StackPtr, SV, 0);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed store of vector?");
+ assert(OpNo == 1 && "Can only split the stored value");
+ DebugLoc dl = N->getDebugLoc();
+
+ bool isTruncating = N->isTruncatingStore();
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ int SVOffset = N->getSrcValueOffset();
+ MVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getAlignment();
+ bool isVol = N->isVolatile();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(1), Lo, Hi);
+
+ MVT LoMemVT, HiMemVT;
+ GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+ if (isTruncating)
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ LoMemVT, isVol, Alignment);
+ else
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ isVol, Alignment);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+
+ if (isTruncating)
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getSrcValue(), SVOffset+IncrementSize,
+ HiMemVT,
+ isVol, MinAlign(Alignment, IncrementSize));
+ else
+ Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize,
+ isVol, MinAlign(Alignment, IncrementSize));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "WidenVectorResult #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to widen the result of this operator!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break;
+ case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break;
+ case ISD::VSETCC: Res = WidenVecRes_VSETCC(N); break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::BSWAP:
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FPOWI:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::SUB:
+ case ISD::XOR: Res = WidenVecRes_Binary(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: Res = WidenVecRes_Shift(N); break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::ZERO_EXTEND:
+ case ISD::UINT_TO_FP: Res = WidenVecRes_Convert(N); break;
+
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCOS:
+ case ISD::FNEG:
+ case ISD::FSIN:
+ case ISD::FSQRT: Res = WidenVecRes_Unary(N); break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (Res.getNode())
+ SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+ // Binary op widening.
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ MVT InVT = InOp.getValueType();
+ MVT InEltVT = InVT.getVectorElementType();
+ MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);
+
+ unsigned Opcode = N->getOpcode();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
+ if (getTypeAction(InVT) == WidenVector) {
+ InOp = GetWidenedVector(N->getOperand(0));
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts)
+ return DAG.getNode(Opcode, dl, WidenVT, InOp);
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(Opcode, dl, WidenVT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT,
+ &Ops[0], NumConcat));
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ // Extract the input and convert the shorten input vector.
+ return DAG.getNode(Opcode, dl, WidenVT,
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT,
+ InOp, DAG.getIntPtrConstant(0)));
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ MVT EltVT = WidenVT.getVectorElementType();
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i)));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+
+ MVT ShVT = ShOp.getValueType();
+ if (getTypeAction(ShVT) == WidenVector) {
+ ShOp = GetWidenedVector(ShOp);
+ ShVT = ShOp.getValueType();
+ }
+ MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ if (ShVT != ShWidenVT)
+ ShOp = ModifyToType(ShOp, ShWidenVT);
+
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+ // Unary op widening.
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ MVT InVT = InOp.getValueType();
+ MVT VT = N->getValueType(0);
+ MVT WidenVT = TLI.getTypeToTransformTo(VT);
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (getTypeAction(InVT)) {
+ default:
+ assert(false && "Unknown type action!");
+ break;
+ case Legal:
+ break;
+ case PromoteInteger:
+ // If the InOp is promoted to the same size, convert it. Otherwise,
+ // fall out of the switch and widen the promoted input.
+ InOp = GetPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ break;
+ case SoftenFloat:
+ case ExpandInteger:
+ case ExpandFloat:
+ case ScalarizeVector:
+ case SplitVector:
+ break;
+ case WidenVector:
+ // If the InOp is widened to the same size, convert it. Otherwise, fall
+ // out of the switch and widen the widened input.
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ // The input widens to the same size. Convert to the widen value.
+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ break;
+ }
+
+ unsigned WidenSize = WidenVT.getSizeInBits();
+ unsigned InSize = InVT.getSizeInBits();
+ if (WidenSize % InSize == 0) {
+ // Determine new input vector type. The new input vector type will use
+ // the same element type (if its a vector) or use the input type as a
+ // vector. It is the same size as the type to widen to.
+ MVT NewInVT;
+ unsigned NewNumElts = WidenSize / InSize;
+ if (InVT.isVector()) {
+ MVT InEltVT = InVT.getVectorElementType();
+ NewInVT= MVT::getVectorVT(InEltVT, WidenSize / InEltVT.getSizeInBits());
+ } else {
+ NewInVT = MVT::getVectorVT(InVT, NewNumElts);
+ }
+
+ if (TLI.isTypeLegal(NewInVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ SmallVector<SDValue, 16> Ops(NewNumElts);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i < NewNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ SDValue NewVec;
+ if (InVT.isVector())
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ else
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ NewInVT, &Ops[0], NewNumElts);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec);
+ }
+ }
+
+ // This should occur rarely. Lower the bit-convert to a store/load
+ // from the stack. Create the stack frame object. Make sure it is aligned
+ // for both the source and destination types.
+ SDValue FIPtr = DAG.CreateStackTemporary(InVT, WidenVT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(WidenVT, dl, Store, FIPtr, SV, 0);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ // Build a vector with undefined for the new nodes.
+ MVT VT = N->getValueType(0);
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ MVT WidenVT = TLI.getTypeToTransformTo(VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+ NewOps.reserve(WidenNumElts);
+ for (unsigned i = NumElts; i < WidenNumElts; ++i)
+ NewOps.push_back(DAG.getUNDEF(EltVT));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+ MVT InVT = N->getOperand(0).getValueType();
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ DebugLoc dl = N->getDebugLoc();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+
+ bool InputWidened = false; // Indicates we need to widen the input.
+ if (getTypeAction(InVT) != WidenVector) {
+ if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+ // Add undef vectors to widen to correct length.
+ unsigned NumConcat = WidenVT.getVectorNumElements() /
+ InVT.getVectorNumElements();
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ for (unsigned i=0; i < NumOperands; ++i)
+ Ops[i] = N->getOperand(i);
+ for (unsigned i = NumOperands; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat);
+ }
+ } else {
+ InputWidened = true;
+ if (WidenVT == TLI.getTypeToTransformTo(InVT)) {
+ // The inputs and the result are widen to the same value.
+ unsigned i;
+ for (i=1; i < NumOperands; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ break;
+
+ if (i > NumOperands)
+ // Everything but the first operand is an UNDEF so just return the
+ // widened first operand.
+ return GetWidenedVector(N->getOperand(0));
+
+ if (NumOperands == 2) {
+ // Replace concat of two operands with a shuffle.
+ SmallVector<int, 16> MaskOps(WidenNumElts);
+ for (unsigned i=0; i < WidenNumElts/2; ++i) {
+ MaskOps[i] = i;
+ MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
+ }
+ return DAG.getVectorShuffle(WidenVT, dl,
+ GetWidenedVector(N->getOperand(0)),
+ GetWidenedVector(N->getOperand(1)),
+ &MaskOps[0]);
+ }
+ }
+ }
+
+ // Fall back to use extracts and build vector.
+ MVT EltVT = WidenVT.getVectorElementType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (InputWidened)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue InOp = N->getOperand(0);
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ MVT InVT = InOp.getValueType();
+ MVT InEltVT = InVT.getVectorElementType();
+ MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts);
+
+ SDValue DTyOp = DAG.getValueType(WidenVT);
+ SDValue STyOp = DAG.getValueType(InWidenVT);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+ if (getTypeAction(InVT) == WidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts)
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i) {
+ Ops[i] = UndefVal;
+ }
+ InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ // Extract the input and convert the shorten input vector.
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+ DAG.getIntPtrConstant(0));
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ MVT EltVT = WidenVT.getVectorElementType();
+ DTyOp = DAG.getValueType(EltVT);
+ STyOp = DAG.getValueType(InEltVT);
+
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ MVT VT = N->getValueType(0);
+ MVT WidenVT = TLI.getTypeToTransformTo(VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ if (getTypeAction(InOp.getValueType()) == WidenVector)
+ InOp = GetWidenedVector(InOp);
+
+ MVT InVT = InOp.getValueType();
+
+ ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
+ if (CIdx) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ // Check if we can just return the input vector after widening.
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+ }
+
+ // We could try widening the input to the right length but for now, extract
+ // the original elements, fill the rest with undefs and build a vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ MVT EltVT = VT.getVectorElementType();
+ MVT IdxVT = Idx.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i;
+ if (CIdx) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ for (i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(IdxVal+i, IdxVT));
+ } else {
+ Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx);
+ for (i=1; i < NumElts; ++i) {
+ SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(i, IdxVT));
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx);
+ }
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(),
+ InOp.getValueType(), InOp,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ MVT WidenVT = TLI.getTypeToTransformTo(LD->getValueType(0));
+ MVT LdVT = LD->getMemoryVT();
+ DebugLoc dl = N->getDebugLoc();
+ assert(LdVT.isVector() && WidenVT.isVector());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ const Value *SV = LD->getSrcValue();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD) {
+ // For extension loads, we can not play the tricks of chopping legal
+ // vector types and bit cast it to the right type. Instead, we unroll
+ // the load and build a vector.
+ MVT EltVT = WidenVT.getVectorElementType();
+ MVT LdEltVT = LdVT.getVectorElementType();
+ unsigned NumElts = LdVT.getVectorNumElements();
+
+ // Load each element and widen
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Increment = LdEltVT.getSizeInBits() / 8;
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
+ LdEltVT, isVolatile, Align);
+ LdChain.push_back(Ops[0].getValue(1));
+ unsigned i = 0, Offset = Increment;
+ for (i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
+ SVOffset + Offset, LdEltVT, isVolatile, Align);
+ LdChain.push_back(Ops[i].getValue(1));
+ }
+
+ // Fill the rest with undefs
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i != WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ Result = DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+ } else {
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+ unsigned int LdWidth = LdVT.getSizeInBits();
+ Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset,
+ Align, isVolatile, LdWidth, WidenVT, dl);
+ }
+
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LdChain[0],
+ LdChain.size());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),
+ WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue Cond1 = N->getOperand(0);
+ MVT CondVT = Cond1.getValueType();
+ if (CondVT.isVector()) {
+ MVT CondEltVT = CondVT.getVectorElementType();
+ MVT CondWidenVT = MVT::getVectorVT(CondEltVT, WidenNumElts);
+ if (getTypeAction(CondVT) == WidenVector)
+ Cond1 = GetWidenedVector(Cond1);
+
+ if (Cond1.getValueType() != CondWidenVT)
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
+ }
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+ WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+ InOp1.getValueType(), N->getOperand(0),
+ N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT WidenVT = TLI.getTypeToTransformTo(VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx < (int)NumElts)
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned i = NumElts; i != WidenNumElts; ++i)
+ NewMask.push_back(-1);
+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+ MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = N->getOperand(0);
+ MVT InVT = InOp1.getValueType();
+ assert(InVT.isVector() && "can not widen non vector type");
+ MVT WidenInVT = MVT::getVectorVT(InVT.getVectorElementType(), WidenNumElts);
+ InOp1 = GetWidenedVector(InOp1);
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Assume that the input and output will be widen appropriately. If not,
+ // we will have to unroll it at some point.
+ assert(InOp1.getValueType() == WidenInVT &&
+ InOp2.getValueType() == WidenInVT &&
+ "Input not widened to expected type!");
+ return DAG.getNode(ISD::VSETCC, N->getDebugLoc(),
+ WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
+ DEBUG(cerr << "Widen node operand " << ResNo << ": "; N->dump(&DAG);
+ cerr << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ cerr << "WidenVectorOperand op #" << ResNo << ": ";
+ N->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to widen this operator's operand!");
+ abort();
+
+ case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP: Res = WidenVecOp_Convert(N); break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+ // Since the result is legal and the input is illegal, it is unlikely
+ // that we can fix the input to a legal type so unroll the convert
+ // into some scalar code and create a nasty build vector.
+ MVT VT = N->getValueType(0);
+ MVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ if (getTypeAction(InOp.getValueType()) == WidenVector)
+ InOp = GetWidenedVector(InOp);
+ MVT InVT = InOp.getValueType();
+ MVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ for (unsigned i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getIntPtrConstant(i)));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
+ MVT VT = N->getValueType(0);
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ MVT InWidenVT = InOp.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check if we can convert between two legal vector types and extract.
+ unsigned InWidenSize = InWidenVT.getSizeInBits();
+ unsigned Size = VT.getSizeInBits();
+ if (InWidenSize % Size == 0 && !VT.isVector()) {
+ unsigned NewNumElts = InWidenSize / Size;
+ MVT NewVT = MVT::getVectorVT(VT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getIntPtrConstant(0));
+ }
+ }
+
+ // Lower the bit-convert to a store/load from the stack. Create the stack
+ // frame object. Make sure it is aligned for both the source and destination
+ // types.
+ SDValue FIPtr = DAG.CreateStackTemporary(InWidenVT, VT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ const Value *SV = PseudoSourceValue::getFixedStack(FI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0);
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, dl, Store, FIPtr, SV, 0);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+ // If the input vector is not legal, it is likely that we will not find a
+ // legal vector of the same size. Replace the concatenate vector with a
+ // nasty build vector.
+ MVT VT = N->getValueType(0);
+ MVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+
+ MVT InVT = N->getOperand(0).getValueType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ unsigned Idx = 0;
+ unsigned NumOperands = N->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (getTypeAction(InOp.getValueType()) == WidenVector)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ MVT EltVT = InOp.getValueType().getVectorElementType();
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+ EltVT, InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+ // We have to widen the value but we want only to store the original
+ // vector type.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ const Value *SV = ST->getSrcValue();
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ DebugLoc dl = N->getDebugLoc();
+
+ MVT StVT = ST->getMemoryVT();
+ MVT ValVT = ValOp.getValueType();
+ // It must be true that we the widen vector type is bigger than where
+ // we need to store.
+ assert(StVT.isVector() && ValOp.getValueType().isVector());
+ assert(StVT.bitsLT(ValOp.getValueType()));
+
+ SmallVector<SDValue, 16> StChain;
+ if (ST->isTruncatingStore()) {
+ // For truncating stores, we can not play the tricks of chopping legal
+ // vector types and bit cast it to the right type. Instead, we unroll
+ // the store.
+ MVT StEltVT = StVT.getVectorElementType();
+ MVT ValEltVT = ValVT.getVectorElementType();
+ unsigned Increment = ValEltVT.getSizeInBits() / 8;
+ unsigned NumElts = StVT.getVectorNumElements();
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
+ SVOffset, StEltVT,
+ isVolatile, Align));
+ unsigned Offset = Increment;
+ for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getIntPtrConstant(Offset));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getIntPtrConstant(0));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
+ SVOffset + Offset, StEltVT,
+ isVolatile, MinAlign(Align, Offset)));
+ }
+ }
+ else {
+ assert(StVT.getVectorElementType() == ValVT.getVectorElementType());
+ // Store value
+ GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset,
+ Align, isVolatile, ValOp, StVT.getSizeInBits(), dl);
+ }
+ if (StChain.size() == 1)
+ return StChain[0];
+ else
+ return DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other,&StChain[0],StChain.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+
+// Utility function to find a vector type and its associated element
+// type from a preferred width and whose vector type must be the same size
+// as the VecVT.
+// TLI: Target lowering used to determine legal types.
+// Width: Preferred width to store.
+// VecVT: Vector value type whose size we must match.
+// Returns NewVecVT and NewEltVT - the vector type and its associated
+// element type.
+static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width,
+ MVT VecVT,
+ MVT& NewEltVT, MVT& NewVecVT) {
+ unsigned EltWidth = Width + 1;
+ if (TLI.isTypeLegal(VecVT)) {
+ // We start with the preferred with, making it a power of 2 and find a
+ // legal vector type of that width. If not, we reduce it by another of 2.
+ // For incoming type is legal, this process will end as a vector of the
+ // smallest loadable type should always be legal.
+ do {
+ assert(EltWidth > 0);
+ EltWidth = 1 << Log2_32(EltWidth - 1);
+ NewEltVT = MVT::getIntegerVT(EltWidth);
+ unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
+ NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);
+ } while (!TLI.isTypeLegal(NewVecVT) ||
+ VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
+ } else {
+ // The incoming vector type is illegal and is the result of widening
+ // a vector to a power of 2. In this case, we will use the preferred
+ // with as long as it is a multiple of the incoming vector length.
+ // The legalization process will eventually make this into a legal type
+ // and remove the illegal bit converts (which would turn to stack converts
+ // if they are allow to exist).
+ do {
+ assert(EltWidth > 0);
+ EltWidth = 1 << Log2_32(EltWidth - 1);
+ NewEltVT = MVT::getIntegerVT(EltWidth);
+ unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
+ NewVecVT = MVT::getVectorVT(NewEltVT, NumElts);
+ } while (!TLI.isTypeLegal(NewEltVT) ||
+ VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
+ }
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+ SDValue Chain,
+ SDValue BasePtr,
+ const Value *SV,
+ int SVOffset,
+ unsigned Alignment,
+ bool isVolatile,
+ unsigned LdWidth,
+ MVT ResType,
+ DebugLoc dl) {
+ // The strategy assumes that we can efficiently load powers of two widths.
+ // The routines chops the vector into the largest power of 2 load and
+ // can be inserted into a legal vector and then cast the result into the
+ // vector type we want. This avoids unnecessary stack converts.
+
+ // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and
+ // the load is nonvolatile, we an use a wider load for the value.
+
+ // Find the vector type that can load from.
+ MVT NewEltVT, NewVecVT;
+ unsigned NewEltVTWidth;
+ FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);
+ NewEltVTWidth = NewEltVT.getSizeInBits();
+
+ SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset,
+ isVolatile, Alignment);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ LdChain.push_back(LdOp.getValue(1));
+
+ // Check if we can load the element with one instruction
+ if (LdWidth == NewEltVTWidth) {
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);
+ }
+
+ unsigned Idx = 1;
+ LdWidth -= NewEltVTWidth;
+ unsigned Offset = 0;
+
+ while (LdWidth > 0) {
+ unsigned Increment = NewEltVTWidth / 8;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+
+ if (LdWidth < NewEltVTWidth) {
+ // Our current type we are using is too large, use a smaller size by
+ // using a smaller power of 2
+ unsigned oNewEltVTWidth = NewEltVTWidth;
+ FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT);
+ NewEltVTWidth = NewEltVT.getSizeInBits();
+ // Readjust position and vector position based on new load type
+ Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
+ VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+ }
+
+ SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV,
+ SVOffset+Offset, isVolatile,
+ MinAlign(Alignment, Offset));
+ LdChain.push_back(LdOp.getValue(1));
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOp,
+ DAG.getIntPtrConstant(Idx++));
+
+ LdWidth -= NewEltVTWidth;
+ }
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);
+}
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+ SDValue Chain,
+ SDValue BasePtr,
+ const Value *SV,
+ int SVOffset,
+ unsigned Alignment,
+ bool isVolatile,
+ SDValue ValOp,
+ unsigned StWidth,
+ DebugLoc dl) {
+ // Breaks the stores into a series of power of 2 width stores. For any
+ // width, we convert the vector to the vector of element size that we
+ // want to store. This avoids requiring a stack convert.
+
+ // Find a width of the element type we can store with
+ MVT WidenVT = ValOp.getValueType();
+ MVT NewEltVT, NewVecVT;
+
+ FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
+ unsigned NewEltVTWidth = NewEltVT.getSizeInBits();
+
+ SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,
+ DAG.getIntPtrConstant(0));
+ SDValue StOp = DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset,
+ isVolatile, Alignment);
+ StChain.push_back(StOp);
+
+ // Check if we are done
+ if (StWidth == NewEltVTWidth) {
+ return;
+ }
+
+ unsigned Idx = 1;
+ StWidth -= NewEltVTWidth;
+ unsigned Offset = 0;
+
+ while (StWidth > 0) {
+ unsigned Increment = NewEltVTWidth / 8;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getIntPtrConstant(Increment));
+
+ if (StWidth < NewEltVTWidth) {
+ // Our current type we are using is too large, use a smaller size by
+ // using a smaller power of 2
+ unsigned oNewEltVTWidth = NewEltVTWidth;
+ FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
+ NewEltVTWidth = NewEltVT.getSizeInBits();
+ // Readjust position and vector position based on new load type
+ Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
+ VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+ }
+
+ EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,
+ DAG.getIntPtrConstant(Idx++));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
+ SVOffset + Offset, isVolatile,
+ MinAlign(Alignment, Offset)));
+ StWidth -= NewEltVTWidth;
+ }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) {
+ // Note that InOp might have been widened so it might already have
+ // the right width or it might need be narrowed.
+ MVT InVT = InOp.getValueType();
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+ DebugLoc dl = InOp.getDebugLoc();
+
+ // Check if InOp already has the right width.
+ if (InVT == NVT)
+ return InOp;
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = NVT.getVectorNumElements();
+ if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+ unsigned NumConcat = WidenNumElts / InNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat);
+ }
+
+ if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getIntPtrConstant(0));
+
+ // Fall back to extract and build.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ MVT EltVT = NVT.getVectorElementType();
+ unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+ unsigned Idx;
+ for (Idx = 0; Idx < MinNumElts; ++Idx)
+ Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(Idx));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for ( ; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts);
+}
diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile
new file mode 100644
index 0000000..185222a
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/Makefile
@@ -0,0 +1,15 @@
+##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMSelectionDAG
+PARALLEL_DIRS =
+BUILD_ARCHIVE = 1
+DONT_BUILD_RELINKED = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 0000000..af73b28
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,635 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical copies");
+
+static RegisterScheduler
+ fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+ createFastDAGScheduler);
+
+namespace {
+ /// FastPriorityQueue - A degenerate priority queue that considers
+ /// all nodes to have the same priority.
+ ///
+ struct VISIBILITY_HIDDEN FastPriorityQueue {
+ SmallVector<SUnit *, 16> Queue;
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ Queue.push_back(U);
+ }
+
+ SUnit *pop() {
+ if (empty()) return NULL;
+ SUnit *V = Queue.back();
+ Queue.pop_back();
+ return V;
+ }
+ };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class VISIBILITY_HIDDEN ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ FastPriorityQueue AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+public:
+ ScheduleDAGFast(MachineFunction &mf)
+ : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule();
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ void AddPred(SUnit *SU, const SDep &D) {
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+ void ListScheduleBottomUp();
+
+ /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool ForceUnitLatencies() const { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+ DOUT << "********** List Scheduling **********\n";
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph();
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ --PredSU->NumSuccsLeft;
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft < 0) {
+ cerr << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue.push(PredSU);
+ }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ LiveRegCycles[I->getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getFlaggedNode())
+ return NULL;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getValueType(i);
+ if (VT == MVT::Flag)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ MVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Flag)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = NewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (TID.isCommutable())
+ NewSU->isCommutable = true;
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = NewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ }
+
+ SDep ChainPred;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPred = *I;
+ else if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ if (ChainPred.getSUnit()) {
+ RemovePred(SU, ChainPred);
+ if (isNewLoad)
+ AddPred(LoadSU, ChainPred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad) {
+ AddPred(LoadSU, Pred);
+ }
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+ if (isNewLoad) {
+ AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
+ }
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
+ NewSU = Clone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+ }
+
+ AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+ AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = TID.getNumDefs();
+ for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVector<unsigned, 4> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ unsigned Reg = I->getReg();
+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
+ if (RegAdded.insert(Reg))
+ LRegs.push_back(Reg);
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg);
+ *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
+ if (RegAdded.insert(*Alias))
+ LRegs.push_back(*Alias);
+ }
+ }
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ if (!Node->isMachineOpcode())
+ continue;
+ const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
+ if (!TID.ImplicitDefs)
+ continue;
+ for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
+ if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
+ if (RegAdded.insert(*Reg))
+ LRegs.push_back(*Reg);
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(*Reg);
+ *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+ if (RegAdded.insert(*Alias))
+ LRegs.push_back(*Alias);
+ }
+ }
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue.push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue.pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue.pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try code duplication or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ if (!CurSU) {
+ // Try duplicating the nodes that produces these
+ // "expensive to copy" values to break the dependency. In case even
+ // that doesn't work, insert cross class copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is null, then it must be possible copy
+ // the value directly. Do not try duplicate the def.
+ SUnit *NewDef = 0;
+ if (DestRC)
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ else
+ DestRC = RC;
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DOUT << "Adding an edge from SU # " << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n";
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DOUT << "Adding an edge from SU # " << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n";
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ if (!CurSU) {
+ assert(false && "Unable to resolve live physical register dependencies!");
+ abort();
+ }
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue.push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+
+#ifndef NDEBUG
+ // Verify that all SUnits were scheduled.
+ bool AnyNotSched = false;
+ unsigned DeadNodes = 0;
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (!SUnits[i].isScheduled) {
+ if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ ++DeadNodes;
+ continue;
+ }
+ if (!AnyNotSched)
+ cerr << "*** List scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ if (SUnits[i].NumSuccsLeft != 0) {
+ if (!AnyNotSched)
+ cerr << "*** List scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ cerr << "has successors left!\n";
+ AnyNotSched = true;
+ }
+ }
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(!AnyNotSched);
+ assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
+ "The number of nodes scheduled doesn't match the expected number!");
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGFast(*IS->MF);
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
new file mode 100644
index 0000000..c432534
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -0,0 +1,268 @@
+//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+ tdListDAGScheduler("list-td", "Top-down list scheduler",
+ createTDListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGList - The actual list scheduler implementation. This supports
+/// top-down scheduling.
+///
+class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands become available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+public:
+ ScheduleDAGList(MachineFunction &mf,
+ SchedulingPriorityQueue *availqueue,
+ ScheduleHazardRecognizer *HR)
+ : ScheduleDAGSDNodes(mf),
+ AvailableQueue(availqueue), HazardRec(HR) {
+ }
+
+ ~ScheduleDAGList() {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+private:
+ void ReleaseSucc(SUnit *SU, const SDep &D);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+};
+} // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGList::Schedule() {
+ DOUT << "********** List Scheduling **********\n";
+
+ // Build the scheduling graph.
+ BuildSchedGraph();
+
+ AvailableQueue->initNodes(SUnits);
+
+ ListScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
+ SUnit *SuccSU = D.getSUnit();
+ --SuccSU->NumPredsLeft;
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft < 0) {
+ cerr << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
+ // Top down: release successors.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-td scheduler doesn't yet support physreg dependencies!");
+
+ ReleaseSucc(SU, *I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() == CurCycle) {
+ AvailableQueue->push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else {
+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+ }
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue->empty()) {
+ ++CurCycle;
+ continue;
+ }
+
+ SUnit *FoundSUnit = 0;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSUnit = AvailableQueue->pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ DOUT << "*** Advancing cycle, no work to do\n";
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DOUT << "*** Emitting noop\n";
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createTDListDAGScheduler - This creates a top-down list scheduler with a
+/// new hazard recognizer. This scheduler takes ownership of the hazard
+/// recognizer and deletes it when done.
+ScheduleDAGSDNodes *
+llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGList(*IS->MF,
+ new LatencyPriorityQueue(),
+ IS->CreateTargetHazardRecognizer());
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 0000000..c97e2a8
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,1533 @@
+//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms. The basic approach uses a priority
+// queue of available nodes to schedule. One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical register copies");
+
+static RegisterScheduler
+ burrListDAGScheduler("list-burr",
+ "Bottom-up register reduction list scheduling",
+ createBURRListDAGScheduler);
+static RegisterScheduler
+ tdrListrDAGScheduler("list-tdrr",
+ "Top-down register reduction list scheduling",
+ createTDRRListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation. This supports both top-down and bottom-up scheduling.
+///
+class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+ /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
+ /// it is top-down.
+ bool isBottomUp;
+
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+ /// Topo - A topological ordering for SUnits which permits fast IsReachable
+ /// and similar queries.
+ ScheduleDAGTopologicalSort Topo;
+
+public:
+ ScheduleDAGRRList(MachineFunction &mf,
+ bool isbottomup,
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
+ AvailableQueue(availqueue), Topo(SUnits) {
+ }
+
+ ~ScheduleDAGRRList() {
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+ /// IsReachable - Checks if SU is reachable from TargetSU.
+ bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+ return Topo.IsReachable(SU, TargetSU);
+ }
+
+ /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+ /// create a cycle.
+ bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ return Topo.WillCreateCycle(SU, TargetSU);
+ }
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Updates the topological ordering if required.
+ void AddPred(SUnit *SU, const SDep &D) {
+ Topo.AddPred(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ /// Updates the topological ordering if required.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ Topo.RemovePred(SU, D.getSUnit());
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, const SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void CapturePred(SDep *PredEdge);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ void ScheduleNodeTopDown(SUnit*, unsigned);
+ void UnscheduleNodeBottomUp(SUnit*);
+ void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+ void ListScheduleTopDown();
+ void ListScheduleBottomUp();
+
+
+ /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+ /// Updates the topological ordering if required.
+ SUnit *CreateNewSUnit(SDNode *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = NewSUnit(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// CreateClone - Creates a new SUnit from an existing one.
+ /// Updates the topological ordering if required.
+ SUnit *CreateClone(SUnit *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = Clone(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// ForceUnitLatencies - Return true, since register-pressure-reducing
+ /// scheduling doesn't need actual latency information.
+ bool ForceUnitLatencies() const { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+ DOUT << "********** List Scheduling **********\n";
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph();
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+ Topo.InitDAGTopologicalSorting();
+
+ AvailableQueue->initNodes(SUnits);
+
+ // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
+ if (isBottomUp)
+ ListScheduleBottomUp();
+ else
+ ListScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ --PredSU->NumSuccsLeft;
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft < 0) {
+ cerr << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue->push(PredSU);
+ }
+}
+
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ LiveRegCycles[I->getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ if (PredSU->isAvailable) {
+ PredSU->isAvailable = false;
+ if (!PredSU->isPending)
+ AvailableQueue->remove(PredSU);
+ }
+
+ ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+ DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
+ DEBUG(SU->dump(this));
+
+ AvailableQueue->UnscheduledNode(SU);
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ CapturePred(&*I);
+ if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (!LiveRegDefs[I->getReg()]) {
+ LiveRegDefs[I->getReg()] = SU;
+ ++NumLiveRegs;
+ }
+ if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
+ LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
+ }
+ }
+
+ SU->setHeightDirty();
+ SU->isScheduled = false;
+ SU->isAvailable = true;
+ AvailableQueue->push(SU);
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
+ unsigned &CurCycle) {
+ SUnit *OldSU = NULL;
+ while (CurCycle > BtCycle) {
+ OldSU = Sequence.back();
+ Sequence.pop_back();
+ if (SU->isSucc(OldSU))
+ // Don't try to remove SU from AvailableQueue.
+ SU->isAvailable = false;
+ UnscheduleNodeBottomUp(OldSU);
+ --CurCycle;
+ }
+
+ assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+ ++NumBacktracks;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getFlaggedNode())
+ return NULL;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return NULL;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getValueType(i);
+ if (VT == MVT::Flag)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = N->getOperand(i);
+ MVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (VT == MVT::Flag)
+ return NULL;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return NULL;
+
+ DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ ComputeLatency(LoadSU);
+ }
+
+ SUnit *NewSU = CreateNewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (TID.isCommutable())
+ NewSU->isCommutable = true;
+ ComputeLatency(NewSU);
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPreds.push_back(*I);
+ else if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
+ const SDep &Pred = ChainPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
+ NewSU = CreateClone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVector<SUnit*, 2> &Copies) {
+ SUnit *CopyFromSU = CreateNewSUnit(NULL);
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = CreateNewSUnit(NULL);
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+ AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(CopyFromSU);
+ AvailableQueue->addNode(CopyToSU);
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ unsigned NumRes = TID.getNumDefs();
+ for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ bool Added = false;
+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
+ if (RegAdded.insert(Reg)) {
+ LRegs.push_back(Reg);
+ Added = true;
+ }
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+ if (RegAdded.insert(*Alias)) {
+ LRegs.push_back(*Alias);
+ Added = true;
+ }
+ }
+ return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVector<unsigned, 4> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep())
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = 2; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = (Flags & 0xffff) >> 3;
+
+ ++i; // Skip the ID value.
+ if ((Flags & 7) == 2 || (Flags & 7) == 6) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+
+ if (!Node->isMachineOpcode())
+ continue;
+ const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
+ if (!TID.ImplicitDefs)
+ continue;
+ for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue->push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue->pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue->pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ SUnit *TrySU = NotReady[i];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ unsigned LiveCycle = CurCycle;
+ for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+ unsigned Reg = LRegs[j];
+ unsigned LCycle = LiveRegCycles[Reg];
+ LiveCycle = std::min(LiveCycle, LCycle);
+ }
+ SUnit *OldSU = Sequence[LiveCycle];
+ if (!WillCreateCycle(TrySU, OldSU)) {
+ BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (OldSU->isAvailable) {
+ OldSU->isAvailable = false;
+ AvailableQueue->remove(OldSU);
+ }
+ AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer avaialable. Schedule a successor that's now
+ // available instead.
+ if (!TrySU->isAvailable)
+ CurSU = AvailableQueue->pop();
+ else {
+ CurSU = TrySU;
+ TrySU->isPending = false;
+ NotReady.erase(NotReady.begin()+i);
+ }
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is null, then it must be possible copy
+ // the value directly. Do not try duplicate the def.
+ SUnit *NewDef = 0;
+ if (DestRC)
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ else
+ DestRC = RC;
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DOUT << "Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n";
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DOUT << "Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n";
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue->push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifySchedule(isBottomUp);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+ --SuccSU->NumPredsLeft;
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft < 0) {
+ cerr << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+ SuccSU->isAvailable = true;
+ AvailableQueue->push(SuccSU);
+ }
+}
+
+void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
+ // Top down: release successors
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-tdrr scheduler doesn't yet support physreg dependencies!");
+
+ ReleaseSucc(SU, &*I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSU = AvailableQueue->pop();
+
+ if (CurSU)
+ ScheduleNodeTopDown(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(isBottomUp);
+#endif
+}
+
+
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Implementation
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+namespace {
+ template<class SF>
+ class RegReductionPriorityQueue;
+
+ /// Sorting functions for the Available queue.
+ struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
+ bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+
+ struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
+ td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
+ td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+} // end anonymous namespace
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+ unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
+ if (SethiUllmanNumber != 0)
+ return SethiUllmanNumber;
+
+ unsigned Extra = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = I->getSUnit();
+ unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+
+ return SethiUllmanNumber;
+}
+
+namespace {
+ template<class SF>
+ class VISIBILITY_HIDDEN RegReductionPriorityQueue
+ : public SchedulingPriorityQueue {
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
+ unsigned currentQueueId;
+
+ protected:
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ScheduleDAGRRList *scheduleDAG;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ public:
+ RegReductionPriorityQueue(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri) :
+ Queue(SF(this)), currentQueueId(0),
+ TII(tii), TRI(tri), scheduleDAG(NULL) {}
+
+ void initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+ }
+
+ void addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+ }
+
+ void updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+ }
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
+ Opc == TargetInstrInfo::SUBREG_TO_REG ||
+ Opc == TargetInstrInfo::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+ return SethiUllmanNumbers[SU->NodeNum];
+ }
+
+ unsigned size() const { return Queue.size(); }
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++currentQueueId;
+ Queue.push(U);
+ }
+
+ void push_all(const std::vector<SUnit *> &Nodes) {
+ for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+ push(Nodes[i]);
+ }
+
+ SUnit *pop() {
+ if (empty()) return NULL;
+ SUnit *V = Queue.top();
+ Queue.pop();
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+ void remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ Queue.erase_one(SU);
+ SU->NodeQueueId = 0;
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+ };
+
+ typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+ BURegReductionPriorityQueue;
+
+ typedef RegReductionPriorityQueue<td_ls_rr_sort>
+ TDRegReductionPriorityQueue;
+}
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+ unsigned MaxHeight = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain succs
+ unsigned Height = I->getSUnit()->getHeight();
+ // If there are bunch of CopyToRegs stacked up, they should be considered
+ // to be at the same position.
+ if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+ Height = closestSucc(I->getSUnit())+1;
+ if (Height > MaxHeight)
+ MaxHeight = Height;
+ }
+ return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ Scratches++;
+ }
+ return Scratches;
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+ if (LPriority != RPriority)
+ return LPriority > RPriority;
+
+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+ // e.g.
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // and the following instructions are both ready.
+ // t2 = op c3
+ // t4 = op c4
+ //
+ // Then schedule t2 = op first.
+ // i.e.
+ // t4 = op c4
+ // t2 = op c3
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // This creates more short live intervals.
+ unsigned LDist = closestSucc(left);
+ unsigned RDist = closestSucc(right);
+ if (LDist != RDist)
+ return LDist < RDist;
+
+ // How many registers becomes live when the node is scheduled.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch != RScratch)
+ return LScratch > RScratch;
+
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+template<class SF>
+bool
+RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
+ if (SU->isTwoAddress) {
+ unsigned Opc = SU->getNode()->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ unsigned NumRes = TID.getNumDefs();
+ unsigned NumOps = TID.getNumOperands() - NumRes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
+ SDNode *DU = SU->getNode()->getOperand(i).getNode();
+ if (DU->getNodeId() != -1 &&
+ Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+/// hasCopyToRegUse - Return true if SU has a value successor that is a
+/// CopyToReg node.
+static bool hasCopyToRegUse(const SUnit *SU) {
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
+ return true;
+ }
+ return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SDNode *N = SuccSU->getNode();
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ assert(ImpDefs && "Caller should check hasPhysRegDefs");
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getFlaggedNode()) {
+ if (!SUNode->isMachineOpcode())
+ continue;
+ const unsigned *SUImpDefs =
+ TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+ if (!SUImpDefs)
+ return false;
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getValueType(i);
+ if (VT == MVT::Flag || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned Reg = ImpDefs[i - NumDefs];
+ for (;*SUImpDefs; ++SUImpDefs) {
+ unsigned SUReg = *SUImpDefs;
+ if (TRI->regsOverlap(Reg, SUReg))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+/// N
+/// / |
+/// / |
+/// U store
+/// |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+/// N
+/// ||
+/// ||
+/// store
+/// |
+/// U
+/// |
+/// ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+///
+template<class SF>
+void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
+ // Visit all the nodes in topological order, working top-down.
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ // For now, only look at nodes with no data successors, such as stores.
+ // These are especially important, due to the heuristics in
+ // getNodePriority for nodes with no data successors.
+ if (SU->NumSuccs != 0)
+ continue;
+ // For now, only look at nodes with exactly one data predecessor.
+ if (SU->NumPreds != 1)
+ continue;
+ // Avoid prescheduling copies to virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyToReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Locate the single data predecessor.
+ SUnit *PredSU = 0;
+ for (SUnit::const_pred_iterator II = SU->Preds.begin(),
+ EE = SU->Preds.end(); II != EE; ++II)
+ if (!II->isCtrl()) {
+ PredSU = II->getSUnit();
+ break;
+ }
+ assert(PredSU);
+
+ // Don't rewrite edges that carry physregs, because that requires additional
+ // support infrastructure.
+ if (PredSU->hasPhysRegDefs)
+ continue;
+ // Short-circuit the case where SU is PredSU's only data successor.
+ if (PredSU->NumSuccs == 1)
+ continue;
+ // Avoid prescheduling to copies from virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling // heuristics.
+ if (SDNode *N = SU->getNode())
+ if (N->getOpcode() == ISD::CopyFromReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Perform checks on the successors of PredSU.
+ for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
+ EE = PredSU->Succs.end(); II != EE; ++II) {
+ SUnit *PredSuccSU = II->getSUnit();
+ if (PredSuccSU == SU) continue;
+ // If PredSU has another successor with no data successors, for
+ // now don't attempt to choose either over the other.
+ if (PredSuccSU->NumSuccs == 0)
+ goto outer_loop_continue;
+ // Don't break physical register dependencies.
+ if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+ if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
+ goto outer_loop_continue;
+ // Don't introduce graph cycles.
+ if (scheduleDAG->IsReachable(SU, PredSuccSU))
+ goto outer_loop_continue;
+ }
+
+ // Ok, the transformation is safe and the heuristics suggest it is
+ // profitable. Update the graph.
+ DOUT << "Prescheduling SU # " << SU->NodeNum
+ << " next to PredSU # " << PredSU->NodeNum
+ << " to guide scheduling in the presence of multiple uses\n";
+ for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+ SDep Edge = PredSU->Succs[i];
+ assert(!Edge.isAssignedRegDep());
+ SUnit *SuccSU = Edge.getSUnit();
+ if (SuccSU != SU) {
+ Edge.setSUnit(PredSU);
+ scheduleDAG->RemovePred(SuccSU, Edge);
+ scheduleDAG->AddPred(SU, Edge);
+ Edge.setSUnit(SU);
+ scheduleDAG->AddPred(SuccSU, Edge);
+ --i;
+ }
+ }
+ outer_loop_continue:;
+ }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+template<class SF>
+void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ if (!SU->isTwoAddress)
+ continue;
+
+ SDNode *Node = SU->getNode();
+ if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
+ continue;
+
+ unsigned Opc = Node->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ unsigned NumRes = TID.getNumDefs();
+ unsigned NumOps = TID.getNumOperands() - NumRes;
+ for (unsigned j = 0; j != NumOps; ++j) {
+ if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
+ continue;
+ SDNode *DU = SU->getNode()->getOperand(j).getNode();
+ if (DU->getNodeId() == -1)
+ continue;
+ const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+ if (!DUSU) continue;
+ for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
+ E = DUSU->Succs.end(); I != E; ++I) {
+ if (I->isCtrl()) continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU == SU)
+ continue;
+ // Be conservative. Ignore if nodes aren't at roughly the same
+ // depth and height.
+ if (SuccSU->getHeight() < SU->getHeight() &&
+ (SU->getHeight() - SuccSU->getHeight()) > 1)
+ continue;
+ // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+ // constrains whatever is using the copy, instead of the copy
+ // itself. In the case that the copy is coalesced, this
+ // preserves the intent of the pseudo two-address heurietics.
+ while (SuccSU->Succs.size() == 1 &&
+ SuccSU->getNode()->isMachineOpcode() &&
+ SuccSU->getNode()->getMachineOpcode() ==
+ TargetInstrInfo::COPY_TO_REGCLASS)
+ SuccSU = SuccSU->Succs.front().getSUnit();
+ // Don't constrain non-instruction nodes.
+ if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+ continue;
+ // Don't constrain nodes with physical register defs if the
+ // predecessor can clobber them.
+ if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
+ if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
+ continue;
+ }
+ // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+ // these may be coalesced away. We want them close to their uses.
+ unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+ if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
+ SuccOpc == TargetInstrInfo::INSERT_SUBREG ||
+ SuccOpc == TargetInstrInfo::SUBREG_TO_REG)
+ continue;
+ if ((!canClobber(SuccSU, DUSU) ||
+ (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
+ (!SU->isCommutable && SuccSU->isCommutable)) &&
+ !scheduleDAG->IsReachable(SuccSU, SU)) {
+ DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum
+ << " to SU #" << SuccSU->NodeNum << "\n";
+ scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ }
+ }
+ }
+ }
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+template<class SF>
+void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
+
+/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
+/// predecessors of the successors of the SUnit SU. Stop when the provided
+/// limit is exceeded.
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
+ unsigned Limit) {
+ unsigned Sum = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ const SUnit *SuccSU = I->getSUnit();
+ for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
+ EE = SuccSU->Preds.end(); II != EE; ++II) {
+ SUnit *PredSU = II->getSUnit();
+ if (!PredSU->isScheduled)
+ if (++Sum > Limit)
+ return Sum;
+ }
+ }
+ return Sum;
+}
+
+
+// Top down
+bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+ bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
+ bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
+ bool LIsFloater = LIsTarget && left->NumPreds == 0;
+ bool RIsFloater = RIsTarget && right->NumPreds == 0;
+ unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
+ unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
+
+ if (left->NumSuccs == 0 && right->NumSuccs != 0)
+ return false;
+ else if (left->NumSuccs != 0 && right->NumSuccs == 0)
+ return true;
+
+ if (LIsFloater)
+ LBonus -= 2;
+ if (RIsFloater)
+ RBonus -= 2;
+ if (left->NumSuccs == 1)
+ LBonus += 2;
+ if (right->NumSuccs == 1)
+ RBonus += 2;
+
+ if (LPriority+LBonus != RPriority+RBonus)
+ return LPriority+LBonus < RPriority+RBonus;
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+
+ if (left->NumSuccsLeft != right->NumSuccsLeft)
+ return left->NumSuccsLeft > right->NumSuccsLeft;
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
+
+ ScheduleDAGRRList *SD =
+ new ScheduleDAGRRList(*IS->MF, true, PQ);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
+
+ ScheduleDAGRRList *SD =
+ new ScheduleDAGRRList(*IS->MF, false, PQ);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 0000000..7aa15bc
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,294 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+ : ScheduleDAG(mf) {
+}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos) {
+ DAG = dag;
+ ScheduleDAG::Run(bb, insertPos);
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+ SUnit *SU = NewSUnit(Old->getNode());
+ SU->OrigNode = Old->OrigNode;
+ SU->Latency = Old->Latency;
+ SU->isTwoAddress = Old->isTwoAddress;
+ SU->isCommutable = Old->isCommutable;
+ SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+ SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ Old->isCloned = true;
+ return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ unsigned &PhysReg, int &Cost) {
+ if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+ return;
+
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return;
+
+ unsigned ResNo = User->getOperand(2).getResNo();
+ if (Def->isMachineOpcode()) {
+ const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (ResNo >= II.getNumDefs() &&
+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+ PhysReg = Reg;
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
+ Cost = RC->getCopyCost();
+ }
+ }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+ // During scheduling, the NodeId field of SDNode is used to map SDNodes
+ // to their associated SUnits by holding SUnits table indices. A value
+ // of -1 means the SDNode does not yet have an associated SUnit.
+ unsigned NumNodes = 0;
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ NI->setNodeId(-1);
+ ++NumNodes;
+ }
+
+ // Reserve entries in the vector for each of the SUnits we are creating. This
+ // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+ // invalidated.
+ // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+ // This is a temporary workaround.
+ SUnits.reserve(NumNodes * 2);
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = ForceUnitLatencies();
+
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
+ continue;
+
+ // If this node has already been processed, stop now.
+ if (NI->getNodeId() != -1) continue;
+
+ SUnit *NodeSUnit = NewSUnit(NI);
+
+ // See if anything is flagged to this node, if so, add them to flagged
+ // nodes. Nodes can have at most one flag input and one flag output. Flags
+ // are required to be the last operand and result of a node.
+
+ // Scan up to find flagged preds.
+ SDNode *N = NI;
+ while (N->getNumOperands() &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+ N = N->getOperand(N->getNumOperands()-1).getNode();
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ }
+
+ // Scan down to find any flagged succs.
+ N = NI;
+ while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
+ SDValue FlagVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Flag result.
+ bool HasFlagUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI)
+ if (FlagVal.isOperandOf(*UI)) {
+ HasFlagUse = true;
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ N = *UI;
+ break;
+ }
+ if (!HasFlagUse) break;
+ }
+
+ // If there are flag operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are flagged together.
+ // Update the SUnit.
+ NodeSUnit->setNode(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+
+ // Assign the Latency field of NodeSUnit using target-provided information.
+ if (UnitLatencies)
+ NodeSUnit->Latency = 1;
+ else
+ ComputeLatency(NodeSUnit);
+ }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+ // Pass 2: add the preds, succs, etc.
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ SUnit *SU = &SUnits[su];
+ SDNode *MainNode = SU->getNode();
+
+ if (MainNode->isMachineOpcode()) {
+ unsigned Opc = MainNode->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+ if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+ SU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (TID.isCommutable())
+ SU->isCommutable = true;
+ }
+
+ // Find all predecessors and successors of the group.
+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+ if (N->isMachineOpcode() &&
+ TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+ SU->hasPhysRegClobbers = true;
+ unsigned NumUsed = CountResults(N);
+ while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+ --NumUsed; // Skip over unused values at the end.
+ if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+ SU->hasPhysRegDefs = true;
+ }
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *OpN = N->getOperand(i).getNode();
+ if (isPassiveNode(OpN)) continue; // Not scheduled.
+ SUnit *OpSU = &SUnits[OpN->getNodeId()];
+ assert(OpSU && "Node has no SUnit!");
+ if (OpSU == SU) continue; // In the same group.
+
+ MVT OpVT = N->getOperand(i).getValueType();
+ assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+ bool isChain = OpVT == MVT::Other;
+
+ unsigned PhysReg = 0;
+ int Cost = 1;
+ // Determine if this is a physical register dependency.
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+ assert((PhysReg == 0 || !isChain) &&
+ "Chain dependence via physreg data?");
+ // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+ // emits a copy from the physical register to a virtual register unless
+ // it requires a cross class copy (cost < 0). That means we are only
+ // treating "expensive to copy" register dependency as physical register
+ // dependency. This may change in the future though.
+ if (Cost >= 0)
+ PhysReg = 0;
+ SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+ OpSU->Latency, PhysReg));
+ }
+ }
+ }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input. This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// flagged together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph() {
+ // Populate the SUnits array.
+ BuildSchedUnits();
+ // Compute all the scheduling dependencies between nodes.
+ AddSchedEdges();
+}
+
+void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes flagged together into this SUnit.
+ SU->Latency = 0;
+ bool SawMachineOpcode = false;
+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
+ if (N->isMachineOpcode()) {
+ SawMachineOpcode = true;
+ SU->Latency +=
+ InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass());
+ }
+}
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional flag operands (which do
+/// not go into the resulting MachineInstr).
+unsigned ScheduleDAGSDNodes::CountResults(SDNode *Node) {
+ unsigned N = Node->getNumValues();
+ while (N && Node->getValueType(N - 1) == MVT::Flag)
+ --N;
+ if (N && Node->getValueType(N - 1) == MVT::Other)
+ --N; // Skip over chain result.
+ return N;
+}
+
+/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// followed by special operands that describe memory references, then an
+/// optional chain operand, then an optional flag operand. Compute the number
+/// of actual operands that will go into the resulting MachineInstr.
+unsigned ScheduleDAGSDNodes::CountOperands(SDNode *Node) {
+ unsigned N = ComputeMemOperandsEnd(Node);
+ while (N && isa<MemOperandSDNode>(Node->getOperand(N - 1).getNode()))
+ --N; // Ignore MEMOPERAND nodes
+ return N;
+}
+
+/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode
+/// operand
+unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+ --N;
+ if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+ --N; // Ignore chain if it exists.
+ return N;
+}
+
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+ if (!SU->getNode()) {
+ cerr << "PHYS REG COPY\n";
+ return;
+ }
+
+ SU->getNode()->dump(DAG);
+ cerr << "\n";
+ SmallVector<SDNode *, 4> FlaggedNodes;
+ for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
+ FlaggedNodes.push_back(N);
+ while (!FlaggedNodes.empty()) {
+ cerr << " ";
+ FlaggedNodes.back()->dump(DAG);
+ cerr << "\n";
+ FlaggedNodes.pop_back();
+ }
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 0000000..2a278b7
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,179 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGSDNODES_H
+#define SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+ /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+ ///
+ /// Edges between SUnits are initially based on edges in the SelectionDAG,
+ /// and additional edges can be added by the schedulers as heuristics.
+ /// SDNodes such as Constants, Registers, and a few others that are not
+ /// interesting to schedulers are not allocated SUnits.
+ ///
+ /// SDNodes with MVT::Flag operands are grouped along with the flagged
+ /// nodes into a single SUnit so that they are scheduled together.
+ ///
+ /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+ /// edges. Physical register dependence information is not carried in
+ /// the DAG and must be handled explicitly by schedulers.
+ ///
+ class ScheduleDAGSDNodes : public ScheduleDAG {
+ public:
+ SelectionDAG *DAG; // DAG of the current basic block
+
+ explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+ virtual ~ScheduleDAGSDNodes() {}
+
+ /// Run - perform scheduling.
+ ///
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb,
+ MachineBasicBlock::iterator insertPos);
+
+ /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+ ///
+ static bool isPassiveNode(SDNode *Node) {
+ if (isa<ConstantSDNode>(Node)) return true;
+ if (isa<ConstantFPSDNode>(Node)) return true;
+ if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<GlobalAddressSDNode>(Node)) return true;
+ if (isa<BasicBlockSDNode>(Node)) return true;
+ if (isa<FrameIndexSDNode>(Node)) return true;
+ if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<JumpTableSDNode>(Node)) return true;
+ if (isa<ExternalSymbolSDNode>(Node)) return true;
+ if (isa<MemOperandSDNode>(Node)) return true;
+ if (Node->getOpcode() == ISD::EntryToken) return true;
+ return false;
+ }
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *NewSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = 0;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ return &SUnits.back();
+ }
+
+ /// Clone - Creates a clone of the specified SUnit. It does not copy the
+ /// predecessors / successors info nor the temporary scheduling states.
+ ///
+ SUnit *Clone(SUnit *N);
+
+ /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+ /// are input. This SUnit graph is similar to the SelectionDAG, but
+ /// excludes nodes that aren't interesting to scheduling, and represents
+ /// flagged together nodes with a single SUnit.
+ virtual void BuildSchedGraph();
+
+ /// ComputeLatency - Compute node latency.
+ ///
+ virtual void ComputeLatency(SUnit *SU);
+
+ /// CountResults - The results of target nodes have register or immediate
+ /// operands first, then an optional chain, and optional flag operands
+ /// (which do not go into the machine instrs.)
+ static unsigned CountResults(SDNode *Node);
+
+ /// CountOperands - The inputs to target nodes have any actual inputs first,
+ /// followed by special operands that describe memory references, then an
+ /// optional chain operand, then flag operands. Compute the number of
+ /// actual operands that will go into the resulting MachineInstr.
+ static unsigned CountOperands(SDNode *Node);
+
+ /// ComputeMemOperandsEnd - Find the index one past the last
+ /// MemOperandSDNode operand
+ static unsigned ComputeMemOperandsEnd(SDNode *Node);
+
+ /// EmitNode - Generate machine code for an node and needed dependencies.
+ /// VRBaseMap contains, for each already emitted node, the first virtual
+ /// register number for the results of the node.
+ ///
+ void EmitNode(SDNode *Node, bool IsClone, bool HasClone,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ virtual MachineBasicBlock *EmitSchedule();
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ virtual void dumpNode(const SUnit *SU) const;
+
+ virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+
+ virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+ private:
+ /// EmitSubregNode - Generate machine code for subreg nodes.
+ ///
+ void EmitSubregNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS
+ /// nodes.
+ ///
+ void EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getVR - Return the virtual register corresponding to the specified result
+ /// of the specified node.
+ unsigned getVR(SDValue Op, DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getDstOfCopyToRegUse - If the only use of the specified result number of
+ /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+ unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const;
+
+ void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum,
+ const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// AddRegisterOperand - Add the specified register as an operand to the
+ /// specified machine instr. Insert register copies if the register is
+ /// not in the required register class.
+ void AddRegisterOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum, const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+ /// implicit physical register output.
+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
+ bool IsCloned, unsigned SrcReg,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+ const TargetInstrDesc &II, bool IsClone,
+ bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+ void BuildSchedUnits();
+ void AddSchedEdges();
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
new file mode 100644
index 0000000..fb5e207
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
@@ -0,0 +1,668 @@
+//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the ScheduleDAG class, which creates
+// MachineInstrs according to the computed schedule.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+ bool IsClone, bool IsCloned,
+ unsigned SrcReg,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VRBase = 0;
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Just use the input register directly!
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ return;
+ }
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ bool MatchReg = true;
+ const TargetRegisterClass *UseRC = NULL;
+ if (!IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ bool Match = true;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ Match = false;
+ } else if (DestReg != SrcReg)
+ Match = false;
+ } else {
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDValue Op = User->getOperand(i);
+ if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ continue;
+ MVT VT = Node->getValueType(Op.getResNo());
+ if (VT == MVT::Other || VT == MVT::Flag)
+ continue;
+ Match = false;
+ if (User->isMachineOpcode()) {
+ const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
+ const TargetRegisterClass *RC =
+ getInstrOperandRegClass(TRI, II, i+II.getNumDefs());
+ if (!UseRC)
+ UseRC = RC;
+ else if (RC) {
+ if (UseRC->hasSuperClass(RC))
+ UseRC = RC;
+ else
+ assert((UseRC == RC || RC->hasSuperClass(UseRC)) &&
+ "Multiple uses expecting different register classes!");
+ }
+ }
+ }
+ }
+ MatchReg &= Match;
+ if (VRBase)
+ break;
+ }
+
+ MVT VT = Node->getValueType(ResNo);
+ const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+ SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT);
+
+ // Figure out the register class to create for the destreg.
+ if (VRBase) {
+ DstRC = MRI.getRegClass(VRBase);
+ } else if (UseRC) {
+ assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+ DstRC = UseRC;
+ } else {
+ DstRC = TLI->getRegClassFor(VT);
+ }
+
+ // If all uses are reading from the src physical register and copying the
+ // register is either impossible or very expensive, then don't create a copy.
+ if (MatchReg && SrcRC->getCopyCost() < 0) {
+ VRBase = SrcReg;
+ } else {
+ // Create the reg, emit the copy.
+ VRBase = MRI.createVirtualRegister(DstRC);
+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg,
+ DstRC, SrcRC);
+
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ (void) Emitted;
+ }
+
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned ScheduleDAGSDNodes::getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const {
+ if (!Node->hasOneUse())
+ return 0;
+
+ SDNode *User = *Node->use_begin();
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return Reg;
+ }
+ return 0;
+}
+
+void ScheduleDAGSDNodes::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+ const TargetInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF &&
+ "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+ for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+ // If the specific node value is only used by a CopyToReg and the dest reg
+ // is a vreg in the same register class, use the CopyToReg'd destination
+ // register instead of creating a new vreg.
+ unsigned VRBase = 0;
+ const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, II, i);
+
+ if (!IsClone && !IsCloned)
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == i) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
+ if (RegRC == RC) {
+ VRBase = Reg;
+ MI->addOperand(MachineOperand::CreateReg(Reg, true));
+ break;
+ }
+ }
+ }
+ }
+
+ // Create the result registers for this node and add the result regs to
+ // the machine instruction.
+ if (VRBase == 0) {
+ assert(RC && "Isn't a register operand!");
+ VRBase = MRI.createVirtualRegister(RC);
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+ }
+
+ SDValue Op(Node, i);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned ScheduleDAGSDNodes::getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ // Add an IMPLICIT_DEF instruction before every use.
+ unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+ // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc
+ // does not include operand register class info.
+ if (!VReg) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
+ VReg = MRI.createVirtualRegister(RC);
+ }
+ BuildMI(BB, Op.getDebugLoc(), TII->get(TargetInstrInfo::IMPLICIT_DEF),VReg);
+ return VReg;
+ }
+
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+ return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+ScheduleDAGSDNodes::AddRegisterOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Flag &&
+ "Chain and flag operands should occur at end of operand list!");
+ // Get/emit the operand.
+ unsigned VReg = getVR(Op, VRBaseMap);
+ assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+
+ const TargetInstrDesc &TID = MI->getDesc();
+ bool isOptDef = IIOpNum < TID.getNumOperands() &&
+ TID.OpInfo[IIOpNum].isOptionalDef();
+
+ // If the instruction requires a register in a different class, create
+ // a new virtual register and copy the value into it.
+ if (II) {
+ const TargetRegisterClass *SrcRC =
+ MRI.getRegClass(VReg);
+ const TargetRegisterClass *DstRC =
+ getInstrOperandRegClass(TRI, *II, IIOpNum);
+ assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
+ "Don't have operand info for this instruction!");
+ if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg,
+ DstRC, SrcRC);
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ (void) Emitted;
+ VReg = NewVReg;
+ }
+ }
+
+ MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr. II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for
+/// assertions only.
+void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op,
+ unsigned IIOpNum,
+ const TargetInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Op.isMachineOpcode()) {
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+ } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateImm(C->getZExtValue()));
+ } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+ const ConstantFP *CFP = F->getConstantFPValue();
+ MI->addOperand(MachineOperand::CreateFPImm(CFP));
+ } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+ } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(),TGA->getOffset()));
+ } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
+ } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateJTI(JT->getIndex()));
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+ int Offset = CP->getOffset();
+ unsigned Align = CP->getAlignment();
+ const Type *Type = CP->getType();
+ // MachineConstantPool wants an explicit alignment.
+ if (Align == 0) {
+ Align = TM.getTargetData()->getPrefTypeAlignment(Type);
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TM.getTargetData()->getTypeAllocSize(Type);
+ }
+ }
+
+ unsigned Idx;
+ if (CP->isMachineConstantPoolEntry())
+ Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+ else
+ Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align);
+ MI->addOperand(MachineOperand::CreateCPI(Idx, Offset));
+ } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateES(ES->getSymbol()));
+ } else {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Flag &&
+ "Chain and flag operands should occur at end of operand list!");
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+ }
+}
+
+/// getSuperRegisterRegClass - Returns the register class of a superreg A whose
+/// "SubIdx"'th sub-register class is the specified register class and whose
+/// type matches the specified type.
+static const TargetRegisterClass*
+getSuperRegisterRegClass(const TargetRegisterClass *TRC,
+ unsigned SubIdx, MVT VT) {
+ // Pick the register class of the superegister for this type
+ for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),
+ E = TRC->superregclasses_end(); I != E; ++I)
+ if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)
+ return *I;
+ assert(false && "Couldn't find the register class");
+ return 0;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void ScheduleDAGSDNodes::EmitSubregNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VRBase = 0;
+ unsigned Opc = Node->getMachineOpcode();
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ break;
+ }
+ }
+ }
+
+ if (Opc == TargetInstrInfo::EXTRACT_SUBREG) {
+ unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+
+ // Create the extract_subreg machine instruction.
+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(),
+ TII->get(TargetInstrInfo::EXTRACT_SUBREG));
+
+ // Figure out the register class to create for the destreg.
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
+ const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+ assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+ // Figure out the register class to create for the destreg.
+ // Note that if we're going to directly use an existing register,
+ // it must be precisely the required class, and not a subclass
+ // thereof.
+ if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) {
+ // Create the reg
+ assert(SRC && "Couldn't find source register class");
+ VRBase = MRI.createVirtualRegister(SRC);
+ }
+
+ // Add def, source, and subreg index
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+ AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap);
+ MI->addOperand(MachineOperand::CreateImm(SubIdx));
+ BB->insert(InsertPos, MI);
+ } else if (Opc == TargetInstrInfo::INSERT_SUBREG ||
+ Opc == TargetInstrInfo::SUBREG_TO_REG) {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ unsigned SubReg = getVR(N1, VRBaseMap);
+ unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+ const TargetRegisterClass *TRC = MRI.getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ getSuperRegisterRegClass(TRC, SubIdx,
+ Node->getValueType(0));
+
+ // Figure out the register class to create for the destreg.
+ // Note that if we're going to directly use an existing register,
+ // it must be precisely the required class, and not a subclass
+ // thereof.
+ if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) {
+ // Create the reg
+ assert(SRC && "Couldn't find source register class");
+ VRBase = MRI.createVirtualRegister(SRC);
+ }
+
+ // Create the insert_subreg or subreg_to_reg machine instruction.
+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), TII->get(Opc));
+ MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+
+ // If creating a subreg_to_reg, then the first input operand
+ // is an implicit value immediate, otherwise it's a register
+ if (Opc == TargetInstrInfo::SUBREG_TO_REG) {
+ const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+ MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
+ } else
+ AddOperand(MI, N0, 0, 0, VRBaseMap);
+ // Add the subregster being inserted
+ AddOperand(MI, N1, 0, 0, VRBaseMap);
+ MI->addOperand(MachineOperand::CreateImm(SubIdx));
+ BB->insert(InsertPos, MI);
+ } else
+ assert(0 && "Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+ScheduleDAGSDNodes::EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(VReg);
+
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
+
+ // Create the new VReg in the destination class and emit a copy.
+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg,
+ DstRC, SrcRC);
+ assert(Emitted &&
+ "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");
+ (void) Emitted;
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ isNew = isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitNode - Generate machine code for an node and needed dependencies.
+///
+void ScheduleDAGSDNodes::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ // If machine instruction
+ if (Node->isMachineOpcode()) {
+ unsigned Opc = Node->getMachineOpcode();
+
+ // Handle subreg insert/extract specially
+ if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
+ Opc == TargetInstrInfo::INSERT_SUBREG ||
+ Opc == TargetInstrInfo::SUBREG_TO_REG) {
+ EmitSubregNode(Node, VRBaseMap);
+ return;
+ }
+
+ // Handle COPY_TO_REGCLASS specially.
+ if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) {
+ EmitCopyToRegClassNode(Node, VRBaseMap);
+ return;
+ }
+
+ if (Opc == TargetInstrInfo::IMPLICIT_DEF)
+ // We want a unique VR for each IMPLICIT_DEF use.
+ return;
+
+ const TargetInstrDesc &II = TII->get(Opc);
+ unsigned NumResults = CountResults(Node);
+ unsigned NodeOperands = CountOperands(Node);
+ unsigned MemOperandsEnd = ComputeMemOperandsEnd(Node);
+ bool HasPhysRegOuts = (NumResults > II.getNumDefs()) &&
+ II.getImplicitDefs() != 0;
+#ifndef NDEBUG
+ unsigned NumMIOperands = NodeOperands + NumResults;
+ assert((II.getNumOperands() == NumMIOperands ||
+ HasPhysRegOuts || II.isVariadic()) &&
+ "#operands for dag node doesn't match .td file!");
+#endif
+
+ // Create the new machine instruction.
+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), II);
+
+ // Add result register values for things that are defined by this
+ // instruction.
+ if (NumResults)
+ CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+
+ // Emit all of the actual operands of this instruction, adding them to the
+ // instruction as appropriate.
+ for (unsigned i = 0; i != NodeOperands; ++i)
+ AddOperand(MI, Node->getOperand(i), i+II.getNumDefs(), &II, VRBaseMap);
+
+ // Emit all of the memory operands of this instruction
+ for (unsigned i = NodeOperands; i != MemOperandsEnd; ++i)
+ AddMemOperand(MI, cast<MemOperandSDNode>(Node->getOperand(i))->MO);
+
+ if (II.usesCustomDAGSchedInsertionHook()) {
+ // Insert this instruction into the basic block using a target
+ // specific inserter which may returns a new basic block.
+ BB = TLI->EmitInstrWithCustomInserter(MI, BB);
+ InsertPos = BB->end();
+ } else {
+ BB->insert(InsertPos, MI);
+ }
+
+ // Additional results must be an physical register def.
+ if (HasPhysRegOuts) {
+ for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+ unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+ if (Node->hasAnyUseOfValue(i))
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+ }
+ }
+ return;
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump(DAG);
+#endif
+ assert(0 && "This target-independent node should have been selected!");
+ break;
+ case ISD::EntryToken:
+ assert(0 && "EntryToken should have been excluded from the schedule!");
+ break;
+ case ISD::TokenFactor: // fall thru
+ break;
+ case ISD::CopyToReg: {
+ unsigned SrcReg;
+ SDValue SrcVal = Node->getOperand(2);
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+ SrcReg = R->getReg();
+ else
+ SrcReg = getVR(SrcVal, VRBaseMap);
+
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+ break;
+
+ const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0;
+ // Get the register classes of the src/dst.
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg))
+ SrcTRC = MRI.getRegClass(SrcReg);
+ else
+ SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType());
+
+ if (TargetRegisterInfo::isVirtualRegister(DestReg))
+ DstTRC = MRI.getRegClass(DestReg);
+ else
+ DstTRC = TRI->getPhysicalRegisterRegClass(DestReg,
+ Node->getOperand(1).getValueType());
+
+ bool Emitted = TII->copyRegToReg(*BB, InsertPos, DestReg, SrcReg,
+ DstTRC, SrcTRC);
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ (void) Emitted;
+ break;
+ }
+ case ISD::CopyFromReg: {
+ unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+ break;
+ }
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+ --NumOps; // Ignore the flag operand.
+
+ // Create the inline asm machine instruction.
+ MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(),
+ TII->get(TargetInstrInfo::INLINEASM));
+
+ // Add the asm string as an external symbol operand.
+ const char *AsmStr =
+ cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+ MI->addOperand(MachineOperand::CreateES(AsmStr));
+
+ // Add all of the operand registers to the instruction.
+ for (unsigned i = 2; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ MI->addOperand(MachineOperand::CreateImm(Flags));
+ ++i; // Skip the ID value.
+
+ switch (Flags & 7) {
+ default: assert(0 && "Bad flags!");
+ case 2: // Def of register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MI->addOperand(MachineOperand::CreateReg(Reg, true));
+ }
+ break;
+ case 6: // Def of earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false,
+ false, 0, true));
+ }
+ break;
+ case 1: // Use of register.
+ case 3: // Immediate.
+ case 4: // Addressing mode.
+ // The addressing mode has been selected, just add all of the
+ // operands to the machine instruction.
+ for (; NumVals; --NumVals, ++i)
+ AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+ break;
+ }
+ }
+ BB->insert(InsertPos, MI);
+ break;
+ }
+ }
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+ DenseMap<SDValue, unsigned> VRBaseMap;
+ DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ SUnit *SU = Sequence[i];
+ if (!SU) {
+ // Null SUnit* is a noop.
+ EmitNoop();
+ continue;
+ }
+
+ // For pre-regalloc scheduling, create instructions corresponding to the
+ // SDNode and any flagged SDNodes and append them to the block.
+ if (!SU->getNode()) {
+ // Emit a copy.
+ EmitPhysRegCopy(SU, CopyVRBaseMap);
+ continue;
+ }
+
+ SmallVector<SDNode *, 4> FlaggedNodes;
+ for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
+ N = N->getFlaggedNode())
+ FlaggedNodes.push_back(N);
+ while (!FlaggedNodes.empty()) {
+ EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,VRBaseMap);
+ FlaggedNodes.pop_back();
+ }
+ EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap);
+ }
+
+ return BB;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 0000000..195896e
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,5743 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const MVT *VTs, unsigned NumVTs) {
+ SDVTList Res = {VTs, NumVTs};
+ return Res;
+}
+
+static const fltSemantics *MVTToAPFloatSemantics(MVT VT) {
+ switch (VT.getSimpleVT()) {
+ default: assert(0 && "Unknown FP format");
+ case MVT::f32: return &APFloat::IEEEsingle;
+ case MVT::f64: return &APFloat::IEEEdouble;
+ case MVT::f80: return &APFloat::x87DoubleExtended;
+ case MVT::f128: return &APFloat::IEEEquad;
+ case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+ }
+}
+
+SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {}
+
+//===----------------------------------------------------------------------===//
+// ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+ return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(MVT VT,
+ const APFloat& Val) {
+ assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+ // PPC long double cannot be converted to any other type.
+ if (VT == MVT::ppcf128 ||
+ &Val.getSemantics() == &APFloat::PPCDoubleDouble)
+ return false;
+
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ (void) Val2.convert(*MVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+// ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BIT_CONVERT)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements.
+ SDValue NotZero = N->getOperand(i);
+ if (isa<ConstantSDNode>(NotZero)) {
+ if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+ return false;
+ } else if (isa<ConstantFPSDNode>(NotZero)) {
+ if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
+ bitcastToAPInt().isAllOnesValue())
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != NotZero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BIT_CONVERT)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements.
+ SDValue Zero = N->getOperand(i);
+ if (isa<ConstantSDNode>(Zero)) {
+ if (!cast<ConstantSDNode>(Zero)->isNullValue())
+ return false;
+ } else if (isa<ConstantFPSDNode>(Zero)) {
+ if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != Zero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+/// isScalarToVector - Return true if the specified node is a
+/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+/// element is not an undef.
+bool ISD::isScalarToVector(const SDNode *N) {
+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return true;
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ if (N->getOperand(0).getOpcode() == ISD::UNDEF)
+ return false;
+ unsigned NumElems = N->getNumOperands();
+ for (unsigned i = 1; i < NumElems; ++i) {
+ SDValue V = N->getOperand(i);
+ if (V.getOpcode() != ISD::UNDEF)
+ return false;
+ }
+ return true;
+}
+
+
+/// isDebugLabel - Return true if the specified node represents a debug
+/// label (i.e. ISD::DBG_LABEL or TargetInstrInfo::DBG_LABEL node).
+bool ISD::isDebugLabel(const SDNode *N) {
+ SDValue Zero;
+ if (N->getOpcode() == ISD::DBG_LABEL)
+ return true;
+ if (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetInstrInfo::DBG_LABEL)
+ return true;
+ return false;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+ // To perform this operation, we just need to swap the L and G bits of the
+ // operation.
+ unsigned OldL = (Operation >> 2) & 1;
+ unsigned OldG = (Operation >> 1) & 1;
+ return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits
+ (OldL << 1) | // New G bit
+ (OldG << 2)); // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+ unsigned Operation = Op;
+ if (isInteger)
+ Operation ^= 7; // Flip L, G, E bits, but not U.
+ else
+ Operation ^= 15; // Flip all of the condition bits.
+
+ if (Operation > ISD::SETTRUE2)
+ Operation &= ~8; // Don't let N and U bits get set.
+
+ return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison. Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+ switch (Opcode) {
+ default: assert(0 && "Illegal integer setcc operation!");
+ case ISD::SETEQ:
+ case ISD::SETNE: return 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return 1;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return 2;
+ }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed integer setcc with an unsigned integer setcc.
+ return ISD::SETCC_INVALID;
+
+ unsigned Op = Op1 | Op2; // Combine all of the condition bits.
+
+ // If the N and U bits get set then the resultant comparison DOES suddenly
+ // care about orderedness, and is true when ordered.
+ if (Op > ISD::SETTRUE2)
+ Op &= ~16; // Clear the U bit if the N bit is set.
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ Op = ISD::SETNE;
+
+ return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed setcc with an unsigned setcc.
+ return ISD::SETCC_INVALID;
+
+ // Combine all of the condition bits.
+ ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger) {
+ switch (Result) {
+ default: break;
+ case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
+ case ISD::SETOEQ: // SETEQ & SETU[LG]E
+ case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
+ case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
+ case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
+ }
+ }
+
+ return Result;
+}
+
+const TargetMachine &SelectionDAG::getTarget() const {
+ return MF->getTarget();
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
+ ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+ ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDValue *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDUse *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->getNode());
+ ID.AddInteger(Ops->getResNo());
+ }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+ unsigned short OpC, SDVTList VTList,
+ const SDValue *OpList, unsigned N) {
+ AddNodeIDOpcode(ID, OpC);
+ AddNodeIDValueTypes(ID, VTList);
+ AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDCustom - If this is an SDNode with special info, add this info to
+/// the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+ switch (N->getOpcode()) {
+ default: break; // Normal nodes don't need extra info.
+ case ISD::ARG_FLAGS:
+ ID.AddInteger(cast<ARG_FLAGSSDNode>(N)->getArgFlags().getRawBits());
+ break;
+ case ISD::TargetConstant:
+ case ISD::Constant:
+ ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
+ break;
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP: {
+ ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+ break;
+ }
+ case ISD::TargetGlobalAddress:
+ case ISD::GlobalAddress:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::GlobalTLSAddress: {
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ ID.AddPointer(GA->getGlobal());
+ ID.AddInteger(GA->getOffset());
+ break;
+ }
+ case ISD::BasicBlock:
+ ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+ break;
+ case ISD::Register:
+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+ break;
+ case ISD::DBG_STOPPOINT: {
+ const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(N);
+ ID.AddInteger(DSP->getLine());
+ ID.AddInteger(DSP->getColumn());
+ ID.AddPointer(DSP->getCompileUnit());
+ break;
+ }
+ case ISD::SRCVALUE:
+ ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+ break;
+ case ISD::MEMOPERAND: {
+ const MachineMemOperand &MO = cast<MemOperandSDNode>(N)->MO;
+ MO.Profile(ID);
+ break;
+ }
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+ break;
+ case ISD::JumpTable:
+ case ISD::TargetJumpTable:
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+ break;
+ case ISD::ConstantPool:
+ case ISD::TargetConstantPool: {
+ const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ ID.AddInteger(CP->getAlignment());
+ ID.AddInteger(CP->getOffset());
+ if (CP->isMachineConstantPoolEntry())
+ CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+ else
+ ID.AddPointer(CP->getConstVal());
+ break;
+ }
+ case ISD::CALL: {
+ const CallSDNode *Call = cast<CallSDNode>(N);
+ ID.AddInteger(Call->getCallingConv());
+ ID.AddInteger(Call->isVarArg());
+ break;
+ }
+ case ISD::LOAD: {
+ const LoadSDNode *LD = cast<LoadSDNode>(N);
+ ID.AddInteger(LD->getMemoryVT().getRawBits());
+ ID.AddInteger(LD->getRawSubclassData());
+ break;
+ }
+ case ISD::STORE: {
+ const StoreSDNode *ST = cast<StoreSDNode>(N);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX: {
+ const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+ ID.AddInteger(AT->getMemoryVT().getRawBits());
+ ID.AddInteger(AT->getRawSubclassData());
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+ i != e; ++i)
+ ID.AddInteger(SVN->getMaskElt(i));
+ break;
+ }
+ } // end switch (N->getOpcode())
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+ AddNodeIDOpcode(ID, N->getOpcode());
+ // Add the return value info.
+ AddNodeIDValueTypes(ID, N->getVTList());
+ // Add the operand info.
+ AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+ // Handle SDNode leafs with special info.
+ AddNodeIDCustom(ID, N);
+}
+
+/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
+/// the CSE map that carries alignment, volatility, indexing mode, and
+/// extension/truncation information.
+///
+static inline unsigned
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM,
+ bool isVolatile, unsigned Alignment) {
+ assert((ConvType & 3) == ConvType &&
+ "ConvType may not require more than 2 bits!");
+ assert((AM & 7) == AM &&
+ "AM may not require more than 3 bits!");
+ return ConvType |
+ (AM << 2) |
+ (isVolatile << 5) |
+ ((Log2_32(Alignment) + 1) << 6);
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+ if (N->getValueType(0) == MVT::Flag)
+ return true; // Never CSE anything that produces a flag.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::HANDLENODE:
+ case ISD::DBG_LABEL:
+ case ISD::DBG_STOPPOINT:
+ case ISD::EH_LABEL:
+ case ISD::DECLARE:
+ return true; // Never CSE these nodes.
+ }
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Flag)
+ return true; // Never CSE anything that produces a flag.
+
+ return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted.
+ HandleSDNode Dummy(getRoot());
+
+ SmallVector<SDNode*, 128> DeadNodes;
+
+ // Add all obviously-dead nodes to the DeadNodes worklist.
+ for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+ if (I->use_empty())
+ DeadNodes.push_back(I);
+
+ RemoveDeadNodes(DeadNodes);
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
+ DAGUpdateListener *UpdateListener) {
+
+ // Process the worklist, deleting the nodes and adding their uses to the
+ // worklist.
+ while (!DeadNodes.empty()) {
+ SDNode *N = DeadNodes.pop_back_val();
+
+ if (UpdateListener)
+ UpdateListener->NodeDeleted(N, 0);
+
+ // Take the node out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Next, brutally remove the operand list. This is safe to do, as there are
+ // no cycles in the graph.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Operand = Use.getNode();
+ Use.set(SDValue());
+
+ // Now that we removed this operand, see if there are no uses of it left.
+ if (Operand->use_empty())
+ DeadNodes.push_back(Operand);
+ }
+
+ DeallocateNode(N);
+ }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
+ SmallVector<SDNode*, 16> DeadNodes(1, N);
+ RemoveDeadNodes(DeadNodes, UpdateListener);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+ // First take this out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Finally, remove uses due to operands of this node, remove from the
+ // AllNodes list, and delete the node.
+ DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+ assert(N != AllNodes.begin() && "Cannot delete the entry node!");
+ assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+ // Drop all of the operands and decrement used node's use counts.
+ N->DropOperands();
+
+ DeallocateNode(N);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+
+ // Set the opcode to DELETED_NODE to help catch bugs when node
+ // memory is reallocated.
+ N->NodeType = ISD::DELETED_NODE;
+
+ NodeAllocator.Deallocate(AllNodes.remove(N));
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it. This is useful when we're about to delete or repurpose
+/// the node. We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+ bool Erased = false;
+ switch (N->getOpcode()) {
+ case ISD::EntryToken:
+ assert(0 && "EntryToken should not be in CSEMaps!");
+ return false;
+ case ISD::HANDLENODE: return false; // noop.
+ case ISD::CONDCODE:
+ assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+ "Cond code doesn't exist!");
+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+ break;
+ case ISD::ExternalSymbol:
+ Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::TargetExternalSymbol:
+ Erased =
+ TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::VALUETYPE: {
+ MVT VT = cast<VTSDNode>(N)->getVT();
+ if (VT.isExtended()) {
+ Erased = ExtendedValueTypeNodes.erase(VT);
+ } else {
+ Erased = ValueTypeNodes[VT.getSimpleVT()] != 0;
+ ValueTypeNodes[VT.getSimpleVT()] = 0;
+ }
+ break;
+ }
+ default:
+ // Remove it from the CSE Map.
+ Erased = CSEMap.RemoveNode(N);
+ break;
+ }
+#ifndef NDEBUG
+ // Verify that the node was actually in one of the CSE maps, unless it has a
+ // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // not subject to CSE.
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+ !N->isMachineOpcode() && !doNotCSE(N)) {
+ N->dump(this);
+ cerr << "\n";
+ assert(0 && "Node is not in map!");
+ }
+#endif
+ return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+///
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,
+ DAGUpdateListener *UpdateListener) {
+ // For node types that aren't CSE'd, just act as if no identical node
+ // already exists.
+ if (!doNotCSE(N)) {
+ SDNode *Existing = CSEMap.GetOrInsertNode(N);
+ if (Existing != N) {
+ // If there was already an existing matching node, use ReplaceAllUsesWith
+ // to replace the dead one with the existing one. This can cause
+ // recursive merging of other unrelated nodes down the line.
+ ReplaceAllUsesWith(N, Existing, UpdateListener);
+
+ // N is now dead. Inform the listener if it exists and delete it.
+ if (UpdateListener)
+ UpdateListener->NodeDeleted(N, Existing);
+ DeleteNodeNotInCSEMaps(N);
+ return;
+ }
+ }
+
+ // If the node doesn't already exist, we updated it. Inform a listener if
+ // it exists.
+ if (UpdateListener)
+ UpdateListener->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+ AddNodeIDCustom(ID, N);
+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ SDValue Op1, SDValue Op2,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ SDValue Ops[] = { Op1, Op2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+ AddNodeIDCustom(ID, N);
+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ const SDValue *Ops,unsigned NumOps,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return 0;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+ AddNodeIDCustom(ID, N);
+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+/// VerifyNode - Sanity check the given node. Aborts if it is invalid.
+void SelectionDAG::VerifyNode(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BUILD_PAIR: {
+ MVT VT = N->getValueType(0);
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+ "Wrong return type!");
+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+ "Mismatched operand types!");
+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+ "Wrong operand type!");
+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+ "Wrong return type size");
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(N->getValueType(0).isVector() && "Wrong return type!");
+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+ "Wrong number of operands!");
+ MVT EltVT = N->getValueType(0).getVectorElementType();
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+ assert((I->getValueType() == EltVT ||
+ (EltVT.isInteger() && I->getValueType().isInteger() &&
+ EltVT.bitsLE(I->getValueType()))) &&
+ "Wrong operand type!");
+ break;
+ }
+ }
+}
+
+/// getMVTAlignment - Compute the default alignment value for the
+/// given type.
+///
+unsigned SelectionDAG::getMVTAlignment(MVT VT) const {
+ const Type *Ty = VT == MVT::iPTR ?
+ PointerType::get(Type::Int8Ty, 0) :
+ VT.getTypeForMVT();
+
+ return TLI.getTargetData()->getABITypeAlignment(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)
+ : TLI(tli), FLI(fli), DW(0),
+ EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(),
+ getVTList(MVT::Other)), Root(getEntryNode()) {
+ AllNodes.push_back(&EntryNode);
+}
+
+void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
+ DwarfWriter *dw) {
+ MF = &mf;
+ MMI = mmi;
+ DW = dw;
+}
+
+SelectionDAG::~SelectionDAG() {
+ allnodes_clear();
+}
+
+void SelectionDAG::allnodes_clear() {
+ assert(&*AllNodes.begin() == &EntryNode);
+ AllNodes.remove(AllNodes.begin());
+ while (!AllNodes.empty())
+ DeallocateNode(AllNodes.begin());
+}
+
+void SelectionDAG::clear() {
+ allnodes_clear();
+ OperandAllocator.Reset();
+ CSEMap.clear();
+
+ ExtendedValueTypeNodes.clear();
+ ExternalSymbols.clear();
+ TargetExternalSymbols.clear();
+ std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+ static_cast<CondCodeSDNode*>(0));
+ std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+ static_cast<SDNode*>(0));
+
+ EntryNode.UseList = 0;
+ AllNodes.push_back(&EntryNode);
+ Root = getEntryNode();
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) {
+ if (Op.getValueType() == VT) return Op;
+ APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(),
+ VT.getSizeInBits());
+ return getNode(ISD::AND, DL, Op.getValueType(), Op,
+ getConstant(Imm, Op.getValueType()));
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+///
+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, MVT VT) {
+ MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ SDValue NegOne =
+ getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ return getNode(ISD::XOR, DL, VT, Val, NegOne);
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, MVT VT, bool isT) {
+ MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ assert((EltVT.getSizeInBits() >= 64 ||
+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+ "getConstant with a uint64_t value that doesn't fit in the type!");
+ return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, MVT VT, bool isT) {
+ return getConstant(*ConstantInt::get(Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, MVT VT, bool isT) {
+ assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+ MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
+ "APInt size does not match type size!");
+
+ unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(&Val);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+ if (!N) {
+ N = NodeAllocator.Allocate<ConstantSDNode>();
+ new (N) ConstantSDNode(isT, &Val, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
+ return getConstant(Val, TLI.getPointerTy(), isTarget);
+}
+
+
+SDValue SelectionDAG::getConstantFP(const APFloat& V, MVT VT, bool isTarget) {
+ return getConstantFP(*ConstantFP::get(V), VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){
+ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+ MVT EltVT =
+ VT.isVector() ? VT.getVectorElementType() : VT;
+
+ // Do the map lookup using the actual bit pattern for the floating point
+ // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+ // we don't have issues with SNANs.
+ unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddPointer(&V);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+ if (!N) {
+ N = NodeAllocator.Allocate<ConstantFPSDNode>();
+ new (N) ConstantFPSDNode(isTarget, &V, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector()) {
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(VT.getVectorNumElements(), Result);
+ // FIXME DebugLoc info might be appropriate here
+ Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) {
+ MVT EltVT =
+ VT.isVector() ? VT.getVectorElementType() : VT;
+ if (EltVT==MVT::f32)
+ return getConstantFP(APFloat((float)Val), VT, isTarget);
+ else
+ return getConstantFP(APFloat(Val), VT, isTarget);
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
+ MVT VT, int64_t Offset,
+ bool isTargetGA) {
+ unsigned Opc;
+
+ // Truncate (with sign-extension) the offset value to the pointer size.
+ unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
+ if (BitWidth < 64)
+ Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
+
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee for determining thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ }
+
+ if (GVar && GVar->isThreadLocal())
+ Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+ else
+ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddPointer(GV);
+ ID.AddInteger(Offset);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();
+ new (N) GlobalAddressSDNode(isTargetGA, GV, VT, Offset);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(FI);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>();
+ new (N) FrameIndexSDNode(FI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget){
+ unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(JTI);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();
+ new (N) JumpTableSDNode(JTI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget) {
+ if (Alignment == 0)
+ Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ ID.AddPointer(C);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
+ new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget) {
+ if (Alignment == 0)
+ Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ C->AddSelectionDAGCSEId(ID);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
+ new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(MBB);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>();
+ new (N) BasicBlockSDNode(MBB);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getArgFlags(ISD::ArgFlagsTy Flags) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::ARG_FLAGS, getVTList(MVT::Other), 0, 0);
+ ID.AddInteger(Flags.getRawBits());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<ARG_FLAGSSDNode>();
+ new (N) ARG_FLAGSSDNode(Flags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(MVT VT) {
+ if (VT.isSimple() && (unsigned)VT.getSimpleVT() >= ValueTypeNodes.size())
+ ValueTypeNodes.resize(VT.getSimpleVT()+1);
+
+ SDNode *&N = VT.isExtended() ?
+ ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT()];
+
+ if (N) return SDValue(N, 0);
+ N = NodeAllocator.Allocate<VTSDNode>();
+ new (N) VTSDNode(VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) {
+ SDNode *&N = ExternalSymbols[Sym];
+ if (N) return SDValue(N, 0);
+ N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
+ new (N) ExternalSymbolSDNode(false, Sym, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT) {
+ SDNode *&N = TargetExternalSymbols[Sym];
+ if (N) return SDValue(N, 0);
+ N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
+ new (N) ExternalSymbolSDNode(true, Sym, VT);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+ if ((unsigned)Cond >= CondCodeNodes.size())
+ CondCodeNodes.resize(Cond+1);
+
+ if (CondCodeNodes[Cond] == 0) {
+ CondCodeSDNode *N = NodeAllocator.Allocate<CondCodeSDNode>();
+ new (N) CondCodeSDNode(Cond);
+ CondCodeNodes[Cond] = N;
+ AllNodes.push_back(N);
+ }
+ return SDValue(CondCodeNodes[Cond], 0);
+}
+
+// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in
+// the shuffle mask M that point at N1 to point at N2, and indices that point
+// N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
+ std::swap(N1, N2);
+ int NElts = M.size();
+ for (int i = 0; i != NElts; ++i) {
+ if (M[i] >= NElts)
+ M[i] -= NElts;
+ else if (M[i] >= 0)
+ M[i] += NElts;
+ }
+}
+
+SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
+ SDValue N2, const int *Mask) {
+ assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Vector Shuffle VTs must be a vectors");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
+ && "Vector Shuffle VTs must have same element type");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ // Validate that all indices in Mask are within the range of the elements
+ // input to the shuffle.
+ unsigned NElts = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i != NElts; ++i) {
+ assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
+ MaskVec.push_back(Mask[i]);
+ }
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N1 == N2) {
+ N2 = getUNDEF(VT);
+ for (unsigned i = 0; i != NElts; ++i)
+ if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N1.getOpcode() == ISD::UNDEF)
+ commuteShuffle(N1, N2, MaskVec);
+
+ // Canonicalize all index into lhs, -> shuffle lhs, undef
+ // Canonicalize all index into rhs, -> shuffle rhs, undef
+ bool AllLHS = true, AllRHS = true;
+ bool N2Undef = N2.getOpcode() == ISD::UNDEF;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= (int)NElts) {
+ if (N2Undef)
+ MaskVec[i] = -1;
+ else
+ AllLHS = false;
+ } else if (MaskVec[i] >= 0) {
+ AllRHS = false;
+ }
+ }
+ if (AllLHS && AllRHS)
+ return getUNDEF(VT);
+ if (AllLHS && !N2Undef)
+ N2 = getUNDEF(VT);
+ if (AllRHS) {
+ N1 = getUNDEF(VT);
+ commuteShuffle(N1, N2, MaskVec);
+ }
+
+ // If Identity shuffle, or all shuffle in to undef, return that node.
+ bool AllUndef = true;
+ bool Identity = true;
+ for (unsigned i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+ if (MaskVec[i] >= 0) AllUndef = false;
+ }
+ if (Identity)
+ return N1;
+ if (AllUndef)
+ return getUNDEF(VT);
+
+ FoldingSetNodeID ID;
+ SDValue Ops[2] = { N1, N2 };
+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
+ for (unsigned i = 0; i != NElts; ++i)
+ ID.AddInteger(MaskVec[i]);
+
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ // Allocate the mask array for the node out of the BumpPtrAllocator, since
+ // SDNode doesn't have access to it. This memory will be "leaked" when
+ // the node is deallocated, but recovered when the NodeAllocator is released.
+ int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+ memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
+
+ ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
+ new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl,
+ SDValue Val, SDValue DTy,
+ SDValue STy, SDValue Rnd, SDValue Sat,
+ ISD::CvtCode Code) {
+ // If the src and dest types are the same and the conversion is between
+ // integer types of the same sign or two floats, no conversion is necessary.
+ if (DTy == STy &&
+ (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
+ return Val;
+
+ FoldingSetNodeID ID;
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
+ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+ new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+ ID.AddInteger(RegNo);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<RegisterSDNode>();
+ new (N) RegisterSDNode(RegNo, VT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root,
+ unsigned Line, unsigned Col,
+ Value *CU) {
+ SDNode *N = NodeAllocator.Allocate<DbgStopPointSDNode>();
+ new (N) DbgStopPointSDNode(Root, Line, Col, CU);
+ N->setDebugLoc(DL);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
+ SDValue Root,
+ unsigned LabelID) {
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Root };
+ AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1);
+ ID.AddInteger(LabelID);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<LabelSDNode>();
+ new (N) LabelSDNode(Opcode, dl, Root, LabelID);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+ assert((!V || isa<PointerType>(V->getType())) &&
+ "SrcValue is not a pointer?");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(V);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>();
+ new (N) SrcValueSDNode(V);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMemOperand(const MachineMemOperand &MO) {
+#ifndef NDEBUG
+ const Value *v = MO.getValue();
+ assert((!v || isa<PointerType>(v->getType())) &&
+ "SrcValue is not a pointer?");
+#endif
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MEMOPERAND, getVTList(MVT::Other), 0, 0);
+ MO.Profile(ID);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = NodeAllocator.Allocate<MemOperandSDNode>();
+ new (N) MemOperandSDNode(MO);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) {
+ MVT OpTy = Op.getValueType();
+ MVT ShTy = TLI.getShiftAmountTy();
+ if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+ ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ return getNode(Opcode, Op.getDebugLoc(), ShTy, Op);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) {
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ unsigned ByteSize = VT.getStoreSizeInBits()/8;
+ const Type *Ty = VT.getTypeForMVT();
+ unsigned StackAlign =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
+
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+/// CreateStackTemporary - Create a stack temporary suitable for holding
+/// either of the specified value types.
+SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) {
+ unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
+ VT2.getStoreSizeInBits())/8;
+ const Type *Ty1 = VT1.getTypeForMVT();
+ const Type *Ty2 = VT2.getTypeForMVT();
+ const TargetData *TD = TLI.getTargetData();
+ unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
+ TD->getPrefTypeAlignment(Ty2));
+
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align);
+ return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1,
+ SDValue N2, ISD::CondCode Cond, DebugLoc dl) {
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return getConstant(1, VT);
+
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETO:
+ case ISD::SETUO:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+ break;
+ }
+
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {
+ const APInt &C2 = N2C->getAPIntValue();
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ switch (Cond) {
+ default: assert(0 && "Unknown integer setcc!");
+ case ISD::SETEQ: return getConstant(C1 == C2, VT);
+ case ISD::SETNE: return getConstant(C1 != C2, VT);
+ case ISD::SETULT: return getConstant(C1.ult(C2), VT);
+ case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);
+ case ISD::SETULE: return getConstant(C1.ule(C2), VT);
+ case ISD::SETUGE: return getConstant(C1.uge(C2), VT);
+ case ISD::SETLT: return getConstant(C1.slt(C2), VT);
+ case ISD::SETGT: return getConstant(C1.sgt(C2), VT);
+ case ISD::SETLE: return getConstant(C1.sle(C2), VT);
+ case ISD::SETGE: return getConstant(C1.sge(C2), VT);
+ }
+ }
+ }
+ if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
+ // No compile time operations on this type yet.
+ if (N1C->getValueType(0) == MVT::ppcf128)
+ return SDValue();
+
+ APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
+ switch (Cond) {
+ default: break;
+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);
+ case ISD::SETNE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETLT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);
+ case ISD::SETGT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);
+ case ISD::SETLE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETGE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT);
+ case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT);
+ case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, VT);
+ case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);
+ case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, VT);
+ case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, VT);
+ case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);
+ case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);
+ }
+ } else {
+ // Ensure that the constant occurs on the RHS.
+ return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+ }
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+ unsigned BitWidth = Op.getValueSizeInBits();
+ return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+ unsigned Depth) const {
+ APInt KnownZero, KnownOne;
+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets. This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth) const {
+ unsigned BitWidth = Mask.getBitWidth();
+ assert(BitWidth == Op.getValueType().getSizeInBits() &&
+ "Mask size mismatches value type size!");
+
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
+ if (Depth == 6 || Mask == 0)
+ return; // Limit search depth.
+
+ APInt KnownZero2, KnownOne2;
+
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
+ KnownZero = ~KnownOne & Mask;
+ return;
+ case ISD::AND:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
+ KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ return;
+ case ISD::OR:
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
+ KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ return;
+ case ISD::XOR: {
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ return;
+ }
+ case ISD::MUL: {
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clear();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero &= Mask;
+ return;
+ }
+ case ISD::UDIV: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0),
+ AllOnes, KnownZero2, KnownOne2, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clear();
+ KnownZero2.clear();
+ ComputeMaskedBits(Op.getOperand(1),
+ AllOnes, KnownZero2, KnownOne2, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ return;
+ }
+ case ISD::SELECT:
+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SELECT_CC:
+ ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ return;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+ case ISD::SHL:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero <<= ShAmt;
+ KnownOne <<= ShAmt;
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ }
+ return;
+ case ISD::SRL:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ return;
+ case ISD::SRA:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ return;
+
+ APInt InDemandedMask = (Mask << ShAmt);
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+ if (HighBits.getBoolValue())
+ InDemandedMask |= APInt::getSignBit(BitWidth);
+
+ ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bits.
+ APInt SignBit = APInt::getSignBit(BitWidth);
+ SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask.
+
+ if (KnownZero.intersects(SignBit)) {
+ KnownZero |= HighBits; // New bits are known zero.
+ } else if (KnownOne.intersects(SignBit)) {
+ KnownOne |= HighBits; // New bits are known one.
+ }
+ }
+ return;
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned EBits = EVT.getSizeInBits();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+
+ APInt InSignBit = APInt::getSignBit(EBits);
+ APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+
+ // If the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InSignBit.zext(BitWidth);
+ if (NewBits.getBoolValue())
+ InputDemandedBits |= InSignBit;
+
+ ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear
+ KnownZero |= NewBits;
+ KnownOne &= ~NewBits;
+ } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ return;
+ }
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownOne.clear();
+ return;
+ }
+ case ISD::LOAD: {
+ if (ISD::isZEXTLoad(Op.getNode())) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ MVT VT = LD->getMemoryVT();
+ unsigned MemBits = VT.getSizeInBits();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+ }
+ return;
+ }
+ case ISD::ZERO_EXTEND: {
+ MVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getSizeInBits();
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+ APInt InMask = Mask;
+ InMask.trunc(InBits);
+ KnownZero.trunc(InBits);
+ KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ return;
+ }
+ case ISD::SIGN_EXTEND: {
+ MVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getSizeInBits();
+ APInt InSignBit = APInt::getSignBit(InBits);
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+ APInt InMask = Mask;
+ InMask.trunc(InBits);
+
+ // If any of the sign extended bits are demanded, we know that the sign
+ // bit is demanded. Temporarily set this bit in the mask for our callee.
+ if (NewBits.getBoolValue())
+ InMask |= InSignBit;
+
+ KnownZero.trunc(InBits);
+ KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+
+ // Note if the sign bit is known to be zero or one.
+ bool SignBitKnownZero = KnownZero.isNegative();
+ bool SignBitKnownOne = KnownOne.isNegative();
+ assert(!(SignBitKnownZero && SignBitKnownOne) &&
+ "Sign bit can't be known to be both zero and one!");
+
+ // If the sign bit wasn't actually demanded by our caller, we don't
+ // want it set in the KnownZero and KnownOne result values. Reset the
+ // mask and reapply it to the result values.
+ InMask = Mask;
+ InMask.trunc(InBits);
+ KnownZero &= InMask;
+ KnownOne &= InMask;
+
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero or one, the top bits match.
+ if (SignBitKnownZero)
+ KnownZero |= NewBits;
+ else if (SignBitKnownOne)
+ KnownOne |= NewBits;
+ return;
+ }
+ case ISD::ANY_EXTEND: {
+ MVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getSizeInBits();
+ APInt InMask = Mask;
+ InMask.trunc(InBits);
+ KnownZero.trunc(InBits);
+ KnownOne.trunc(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ return;
+ }
+ case ISD::TRUNCATE: {
+ MVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getSizeInBits();
+ APInt InMask = Mask;
+ InMask.zext(InBits);
+ KnownZero.zext(InBits);
+ KnownOne.zext(InBits);
+ ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero.trunc(BitWidth);
+ KnownOne.trunc(BitWidth);
+ break;
+ }
+ case ISD::AssertZext: {
+ MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+ ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
+ KnownOne, Depth+1);
+ KnownZero |= (~InMask) & Mask;
+ return;
+ }
+ case ISD::FGETSIGN:
+ // All bits are zero except the low bit.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+
+ case ISD::SUB: {
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (CLHS->getAPIntValue().isNonNegative()) {
+ unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
+ Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ }
+ }
+ }
+ }
+ // fall through
+ case ISD::ADD: {
+ // Output known-0 bits are known if clear or set in both the low clear bits
+ // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
+ // low 3 bits clear.
+ APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes());
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+
+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ KnownZeroOut = std::min(KnownZeroOut,
+ KnownZero2.countTrailingOnes());
+
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ return;
+ }
+ case ISD::SREM:
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
+ APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+
+ // If the sign bit of the first operand is zero, the sign bit of
+ // the result is zero. If the first operand has no one bits below
+ // the second operand's single 1 bit, its sign will be zero.
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero2 |= ~LowBits;
+
+ KnownZero |= KnownZero2 & Mask;
+
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ }
+ }
+ return;
+ case ISD::UREM: {
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ APInt Mask2 = LowBits & Mask;
+ KnownZero |= ~LowBits & Mask;
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
+ Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
+ Depth+1);
+
+ uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clear();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ return;
+ }
+ default:
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this);
+ }
+ return;
+ }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits. We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information. For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
+ MVT VT = Op.getValueType();
+ assert(VT.isInteger() && "Invalid VT!");
+ unsigned VTBits = VT.getSizeInBits();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::AssertSext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::AssertZext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp;
+
+ case ISD::Constant: {
+ const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
+ // If negative, return # leading ones.
+ if (Val.isNegative())
+ return Val.countLeadingOnes();
+
+ // Return # leading zeros.
+ return Val.countLeadingZeros();
+ }
+
+ case ISD::SIGN_EXTEND:
+ Tmp = VTBits-Op.getOperand(0).getValueType().getSizeInBits();
+ return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+
+ case ISD::SIGN_EXTEND_INREG:
+ // Max of the input and what this extends.
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ Tmp = VTBits-Tmp+1;
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ return std::max(Tmp, Tmp2);
+
+ case ISD::SRA:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ // SRA X, C -> adds C sign bits.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Tmp += C->getZExtValue();
+ if (Tmp > VTBits) Tmp = VTBits;
+ }
+ return Tmp;
+ case ISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (C->getZExtValue() >= VTBits || // Bad shift.
+ C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - C->getZExtValue();
+ }
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // ComputeMaskedBits, and pick whichever answer is better.
+ }
+ break;
+
+ case ISD::SELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ case ISD::SETCC:
+ // If setcc returns 0/-1, all bits are sign bits.
+ if (TLI.getBooleanContents() ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+
+ // Handle rotate right by N like a rotate left by 32-N.
+ if (Op.getOpcode() == ISD::ROTR)
+ RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+ // If we aren't rotating out all of the known-in sign bits, return the
+ // number that are left. This handles rotl(sext(x), 1) for example.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp > RotAmt+1) return Tmp-RotAmt;
+ }
+ break;
+ case ISD::ADD:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ return VTBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero.isNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+ break;
+
+ case ISD::SUB:
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+ if (CLHS->isNullValue()) {
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)) == Mask)
+ return VTBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero.isNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+ break;
+ case ISD::TRUNCATE:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ }
+
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = LD->getMemoryVT().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = LD->getMemoryVT().getSizeInBits();
+ return VTBits-Tmp;
+ }
+ }
+
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) {
+ unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+ if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ APInt KnownZero, KnownOne;
+ APInt Mask = APInt::getAllOnesValue(VTBits);
+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+
+ if (KnownZero.isNegative()) { // sign bit is 0
+ Mask = KnownZero;
+ } else if (KnownOne.isNegative()) { // sign bit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-VTBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+}
+
+
+bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+ if (!GA) return false;
+ if (GA->getOffset() != 0) return false;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
+ if (!GV) return false;
+ MachineModuleInfo *MMI = getMachineModuleInfo();
+ return MMI && MMI->hasDebugInfo();
+}
+
+
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
+ unsigned i) {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getMaskElt(i) < 0)
+ return getUNDEF(VT.getVectorElementType());
+ unsigned Index = N->getMaskElt(i);
+ unsigned NumElems = VT.getVectorNumElements();
+ SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ Index %= NumElems;
+
+ if (V.getOpcode() == ISD::BIT_CONVERT) {
+ V = V.getOperand(0);
+ MVT VVT = V.getValueType();
+ if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
+ return SDValue();
+ }
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Index == 0) ? V.getOperand(0)
+ : getUNDEF(VT.getVectorElementType());
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Index);
+ if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V))
+ return getShuffleScalarElt(SVN, Index);
+ return SDValue();
+}
+
+
+/// getNode - Gets or creates the specified node.
+///
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, getVTList(VT));
+ CSEMap.InsertNode(N, IP);
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ MVT VT, SDValue Operand) {
+ // Constant fold unary operations with an integer constant operand.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
+ const APInt &Val = C->getAPIntValue();
+ unsigned BitWidth = VT.getSizeInBits();
+ switch (Opcode) {
+ default: break;
+ case ISD::SIGN_EXTEND:
+ return getConstant(APInt(Val).sextOrTrunc(BitWidth), VT);
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::TRUNCATE:
+ return getConstant(APInt(Val).zextOrTrunc(BitWidth), VT);
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ const uint64_t zero[] = {0, 0};
+ // No compile time operations on this type.
+ if (VT==MVT::ppcf128)
+ break;
+ APFloat apf = APFloat(APInt(BitWidth, 2, zero));
+ (void)apf.convertFromAPInt(Val,
+ Opcode==ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, VT);
+ }
+ case ISD::BIT_CONVERT:
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(Val.bitsToFloat(), VT);
+ else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(Val.bitsToDouble(), VT);
+ break;
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), VT);
+ case ISD::CTPOP:
+ return getConstant(Val.countPopulation(), VT);
+ case ISD::CTLZ:
+ return getConstant(Val.countLeadingZeros(), VT);
+ case ISD::CTTZ:
+ return getConstant(Val.countTrailingZeros(), VT);
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
+ APFloat V = C->getValueAPF(); // make copy
+ if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) {
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, VT);
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(*MVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ integerPart x[2];
+ bool ignored;
+ assert(integerPartWidth >= 64);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+ Opcode==ISD::FP_TO_SINT,
+ APFloat::rmTowardZero, &ignored);
+ if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ APInt api(VT.getSizeInBits(), 2, x);
+ return getConstant(api, VT);
+ }
+ case ISD::BIT_CONVERT:
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+ else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+ break;
+ }
+ }
+ }
+
+ unsigned OpOpcode = Operand.getNode()->getOpcode();
+ switch (Opcode) {
+ case ISD::TokenFactor:
+ case ISD::MERGE_VALUES:
+ case ISD::CONCAT_VECTORS:
+ return Operand; // Factor, merge or concat of one node? No need.
+ case ISD::FP_ROUND: assert(0 && "Invalid method to make FP_ROUND node");
+ case ISD::FP_EXTEND:
+ assert(VT.isFloatingPoint() &&
+ Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
+ if (Operand.getValueType() == VT) return Operand; // noop conversion.
+ if (Operand.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SIGN_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid SIGN_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().bitsLT(VT)
+ && "Invalid sext node, dst < src!");
+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ case ISD::ZERO_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ZERO_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().bitsLT(VT)
+ && "Invalid zext node, dst < src!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, DL, VT,
+ Operand.getNode()->getOperand(0));
+ break;
+ case ISD::ANY_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ANY_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType().bitsLT(VT)
+ && "Invalid anyext node, dst < src!");
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)
+ // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ case ISD::TRUNCATE:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid TRUNCATE!");
+ if (Operand.getValueType() == VT) return Operand; // noop truncate
+ assert(Operand.getValueType().bitsGT(VT)
+ && "Invalid truncate node, src < dst!");
+ if (OpOpcode == ISD::TRUNCATE)
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
+ // If the source is smaller than the dest, we still need an extend.
+ if (Operand.getNode()->getOperand(0).getValueType().bitsLT(VT))
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ else
+ return Operand.getNode()->getOperand(0);
+ }
+ break;
+ case ISD::BIT_CONVERT:
+ // Basic sanity checking.
+ assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
+ && "Cannot BIT_CONVERT between types of different sizes!");
+ if (VT == Operand.getValueType()) return Operand; // noop conversion.
+ if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ assert(VT.isVector() && !Operand.getValueType().isVector() &&
+ (VT.getVectorElementType() == Operand.getValueType() ||
+ (VT.getVectorElementType().isInteger() &&
+ Operand.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+ "Illegal SCALAR_TO_VECTOR node!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+ if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Operand.getOperand(1)) &&
+ Operand.getConstantOperandVal(1) == 0 &&
+ Operand.getOperand(0).getValueType() == VT)
+ return Operand.getOperand(0);
+ break;
+ case ISD::FNEG:
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+ if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+ return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
+ Operand.getNode()->getOperand(0));
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return Operand.getNode()->getOperand(0);
+ break;
+ case ISD::FABS:
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ }
+
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+ FoldingSetNodeID ID;
+ SDValue Ops[1] = { Operand };
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTs, Operand);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
+ MVT VT,
+ ConstantSDNode *Cst1,
+ ConstantSDNode *Cst2) {
+ const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+
+ switch (Opcode) {
+ case ISD::ADD: return getConstant(C1 + C2, VT);
+ case ISD::SUB: return getConstant(C1 - C2, VT);
+ case ISD::MUL: return getConstant(C1 * C2, VT);
+ case ISD::UDIV:
+ if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
+ break;
+ case ISD::UREM:
+ if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
+ break;
+ case ISD::SDIV:
+ if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
+ break;
+ case ISD::SREM:
+ if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
+ break;
+ case ISD::AND: return getConstant(C1 & C2, VT);
+ case ISD::OR: return getConstant(C1 | C2, VT);
+ case ISD::XOR: return getConstant(C1 ^ C2, VT);
+ case ISD::SHL: return getConstant(C1 << C2, VT);
+ case ISD::SRL: return getConstant(C1.lshr(C2), VT);
+ case ISD::SRA: return getConstant(C1.ashr(C2), VT);
+ case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
+ case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
+ default: break;
+ }
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ SDValue N1, SDValue N2) {
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ switch (Opcode) {
+ default: break;
+ case ISD::TokenFactor:
+ assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+ N2.getValueType() == MVT::Other && "Invalid token factor!");
+ // Fold trivial token factors.
+ if (N1.getOpcode() == ISD::EntryToken) return N2;
+ if (N2.getOpcode() == ISD::EntryToken) return N1;
+ if (N1 == N2) return N1;
+ break;
+ case ISD::CONCAT_VECTORS:
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+ Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::AND:
+ assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
+ // worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N2;
+ if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ return N1;
+ break;
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
+ // it's worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::SREM:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ // fall through
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (UnsafeFPMath) {
+ if (Opcode == ISD::FADD) {
+ // 0+x --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
+ if (CFP->getValueAPF().isZero())
+ return N2;
+ // x+0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FSUB) {
+ // x-0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ }
+ }
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
+ assert(N1.getValueType() == VT &&
+ N1.getValueType().isFloatingPoint() &&
+ N2.getValueType().isFloatingPoint() &&
+ "Invalid FCOPYSIGN!");
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(VT == N1.getValueType() &&
+ "Shift operators return type must be the same as their first arg");
+ assert(VT.isInteger() && N2.getValueType().isInteger() &&
+ "Shifts only work on integers");
+
+ // Always fold shifts of i1 values so the code generator doesn't need to
+ // handle them. Since we know the size of the shift has to be less than the
+ // size of the value, the shift/rotate count is guaranteed to be zero.
+ if (VT == MVT::i1)
+ return N1;
+ break;
+ case ISD::FP_ROUND_INREG: {
+ MVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg round!");
+ assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
+ "Cannot FP_ROUND_INREG integer types");
+ assert(EVT.bitsLE(VT) && "Not rounding down!");
+ if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
+ break;
+ }
+ case ISD::FP_ROUND:
+ assert(VT.isFloatingPoint() &&
+ N1.getValueType().isFloatingPoint() &&
+ VT.bitsLE(N1.getValueType()) &&
+ isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ break;
+ case ISD::AssertSext:
+ case ISD::AssertZext: {
+ MVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (VT == EVT) return N1; // noop assertion.
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (EVT == VT) return N1; // Not actually extending
+
+ if (N1C) {
+ APInt Val = N1C->getAPIntValue();
+ unsigned FromBits = cast<VTSDNode>(N2)->getVT().getSizeInBits();
+ Val <<= Val.getBitWidth()-FromBits;
+ Val = Val.ashr(Val.getBitWidth()-FromBits);
+ return getConstant(Val, VT);
+ }
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
+ if (N1.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+ // expanding copies of large vectors from registers.
+ if (N2C &&
+ N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0) {
+ unsigned Factor =
+ N1.getOperand(0).getValueType().getVectorNumElements();
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ N1.getOperand(N2C->getZExtValue() / Factor),
+ getConstant(N2C->getZExtValue() % Factor,
+ N2.getValueType()));
+ }
+
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+ // expanding large vector constants.
+ if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue Elt = N1.getOperand(N2C->getZExtValue());
+ if (Elt.getValueType() != VT) {
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ assert(VT.isInteger() && Elt.getValueType().isInteger() &&
+ VT.bitsLE(Elt.getValueType()) &&
+ "Bad type for BUILD_VECTOR operand");
+ Elt = getNode(ISD::TRUNCATE, DL, VT, Elt);
+ }
+ return Elt;
+ }
+
+ // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+ // operations are lowered to scalars.
+ if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ // If the indices are the same, return the inserted element.
+ if (N1.getOperand(2) == N2)
+ return N1.getOperand(1);
+ // If the indices are known different, extract the element from
+ // the original vector.
+ else if (isa<ConstantSDNode>(N1.getOperand(2)) &&
+ isa<ConstantSDNode>(N2))
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+ }
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+ assert(!N1.getValueType().isVector() && !VT.isVector() &&
+ (N1.getValueType().isInteger() == VT.isInteger()) &&
+ "Wrong types for EXTRACT_ELEMENT!");
+
+ // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+ // 64-bit integers into 32-bit parts. Instead of building the extract of
+ // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+ if (N1.getOpcode() == ISD::BUILD_PAIR)
+ return N1.getOperand(N2C->getZExtValue());
+
+ // EXTRACT_ELEMENT of a constant int is also very common.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ unsigned ElementSize = VT.getSizeInBits();
+ unsigned Shift = ElementSize * N2C->getZExtValue();
+ APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+ return getConstant(ShiftedVal.trunc(ElementSize), VT);
+ }
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ if (N1.getValueType() == VT) // Trivial extraction.
+ return N1;
+ break;
+ }
+
+ if (N1C) {
+ if (N2C) {
+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
+ if (SV.getNode()) return SV;
+ } else { // Cannonicalize constant to RHS if commutative
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ }
+ }
+ }
+
+ // Constant fold FP operations.
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ if (N1CFP) {
+ if (!N2CFP && isCommutativeBinOp(Opcode)) {
+ // Cannonicalize constant to RHS if commutative
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ } else if (N2CFP && VT != MVT::ppcf128) {
+ APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
+ APFloat::opStatus s;
+ switch (Opcode) {
+ case ISD::FADD:
+ s = V1.add(V2, APFloat::rmNearestTiesToEven);
+ if (s != APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FSUB:
+ s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FMUL:
+ s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FDIV:
+ s = V1.divide(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FREM :
+ s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+ if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ return getConstantFP(V1, VT);
+ break;
+ case ISD::FCOPYSIGN:
+ V1.copySign(V2);
+ return getConstantFP(V1, VT);
+ default: break;
+ }
+ }
+ }
+
+ // Canonicalize an UNDEF to the RHS, even over a constant.
+ if (N1.getOpcode() == ISD::UNDEF) {
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1, N2);
+ } else {
+ switch (Opcode) {
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SUB:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SRA:
+ return N1; // fold op(undef, arg2) -> undef
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(undef, arg2) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N2;
+ }
+ }
+ }
+
+ // Fold a bunch of operators when the RHS is undef.
+ if (N2.getOpcode() == ISD::UNDEF) {
+ switch (Opcode) {
+ case ISD::XOR:
+ if (N1.getOpcode() == ISD::UNDEF)
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return getConstant(0, VT);
+ // fallthrough
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUB:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ return N2; // fold op(arg1, undef) -> undef
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, VT); // fold op(arg1, undef) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N1;
+ case ISD::OR:
+ if (!VT.isVector())
+ return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ // For vectors, we can't easily build an all one vector, just return
+ // the LHS.
+ return N1;
+ case ISD::SRA:
+ return N1;
+ }
+ }
+
+ // Memoize this node if possible.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) {
+ SDValue Ops[] = { N1, N2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ SDValue N1, SDValue N2, SDValue N3) {
+ // Perform various simplifications.
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ switch (Opcode) {
+ case ISD::CONCAT_VECTORS:
+ // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+ // one big BUILD_VECTOR.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+ N2.getOpcode() == ISD::BUILD_VECTOR &&
+ N3.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+ Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
+ Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ }
+ break;
+ case ISD::SETCC: {
+ // Use FoldSetCC to simplify SETCC's.
+ SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
+ if (Simp.getNode()) return Simp;
+ break;
+ }
+ case ISD::SELECT:
+ if (N1C) {
+ if (N1C->getZExtValue())
+ return N2; // select true, X, Y -> X
+ else
+ return N3; // select false, X, Y -> Y
+ }
+
+ if (N2 == N3) return N2; // select C, X, X -> X
+ break;
+ case ISD::BRCOND:
+ if (N2C) {
+ if (N2C->getZExtValue()) // Unconditional branch
+ return getNode(ISD::BR, DL, MVT::Other, N1, N3);
+ else
+ return N1; // Never-taken branch
+ }
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ assert(0 && "should use getVectorShuffle constructor!");
+ break;
+ case ISD::BIT_CONVERT:
+ // Fold bit_convert nodes from a type to themselves.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
+ }
+
+ // Memoize node if it doesn't produce a flag.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) {
+ SDValue Ops[] = { N1, N2, N3 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+ }
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VT, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VT, Ops, 5);
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned NumBits = VT.isVector() ?
+ VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ APInt Val = APInt(NumBits, C->getZExtValue() & 255);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ if (VT.isInteger())
+ return DAG.getConstant(Val, VT);
+ return DAG.getConstantFP(APFloat(Val), VT);
+ }
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Value = DAG.getNode(ISD::OR, dl, VT,
+ DAG.getNode(ISD::SHL, dl, VT, Value,
+ DAG.getConstant(Shift,
+ TLI.getShiftAmountTy())),
+ Value);
+ Shift <<= 1;
+ }
+
+ return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ std::string &Str, unsigned Offset) {
+ // Handle vector with all elements zero.
+ if (Str.empty()) {
+ if (VT.isInteger())
+ return DAG.getConstant(0, VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getConstant(0, MVT::getVectorVT(EltVT, NumElts)));
+ }
+
+ assert(!VT.isVector() && "Can't handle vector type here!");
+ unsigned NumBits = VT.getSizeInBits();
+ unsigned MSB = NumBits / 8;
+ uint64_t Val = 0;
+ if (TLI.isLittleEndian())
+ Offset = Offset + MSB - 1;
+ for (unsigned i = 0; i != MSB; ++i) {
+ Val = (Val << 8) | (unsigned char)Str[Offset];
+ Offset += TLI.isLittleEndian() ? -1 : 1;
+ }
+ return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+///
+static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
+ SelectionDAG &DAG) {
+ MVT VT = Base.getValueType();
+ return DAG.getNode(ISD::ADD, Base.getDebugLoc(),
+ VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// isMemSrcFromString - Returns true if memcpy source is a string constant.
+///
+static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+ unsigned SrcDelta = 0;
+ GlobalAddressSDNode *G = NULL;
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+ }
+ if (!G)
+ return false;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+ if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
+ return true;
+
+ return false;
+}
+
+/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
+/// to replace the memset / memcpy is below the threshold. It also returns the
+/// types of the sequence of memory ops to perform memset / memcpy.
+static
+bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps,
+ SDValue Dst, SDValue Src,
+ unsigned Limit, uint64_t Size, unsigned &Align,
+ std::string &Str, bool &isSrcStr,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ isSrcStr = isMemSrcFromString(Src, Str);
+ bool isSrcConst = isa<ConstantSDNode>(Src);
+ bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses();
+ MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr);
+ if (VT != MVT::iAny) {
+ unsigned NewAlign = (unsigned)
+ TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT());
+ // If source is a string constant, this will require an unaligned load.
+ if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
+ if (Dst.getOpcode() != ISD::FrameIndex) {
+ // Can't change destination alignment. It requires a unaligned store.
+ if (AllowUnalign)
+ VT = MVT::iAny;
+ } else {
+ int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change destination alignment. It requires a unaligned store.
+ if (AllowUnalign)
+ VT = MVT::iAny;
+ } else {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI) < NewAlign)
+ MFI->setObjectAlignment(FI, NewAlign);
+ Align = NewAlign;
+ }
+ }
+ }
+ }
+
+ if (VT == MVT::iAny) {
+ if (AllowUnalign) {
+ VT = MVT::i64;
+ } else {
+ switch (Align & 7) {
+ case 0: VT = MVT::i64; break;
+ case 4: VT = MVT::i32; break;
+ case 2: VT = MVT::i16; break;
+ default: VT = MVT::i8; break;
+ }
+ }
+
+ MVT LVT = MVT::i64;
+ while (!TLI.isTypeLegal(LVT))
+ LVT = (MVT::SimpleValueType)(LVT.getSimpleVT() - 1);
+ assert(LVT.isInteger());
+
+ if (VT.bitsGT(LVT))
+ VT = LVT;
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ while (VTSize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ if (VT.isVector()) {
+ VT = MVT::i64;
+ while (!TLI.isTypeLegal(VT))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1);
+ VTSize = VT.getSizeInBits() / 8;
+ } else {
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1);
+ VTSize >>= 1;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memcpy to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ std::vector<MVT> MemOps;
+ uint64_t Limit = -1ULL;
+ if (!AlwaysInline)
+ Limit = TLI.getMaxStoresPerMemcpy();
+ unsigned DstAlign = Align; // Destination alignment can change.
+ std::string Str;
+ bool CopyFromStr;
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
+ Str, CopyFromStr, DAG, TLI))
+ return SDValue();
+
+
+ bool isZeroStr = CopyFromStr && Str.empty();
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ uint64_t SrcOff = 0, DstOff = 0;
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ if (CopyFromStr && (isZeroStr || !VT.isVector())) {
+ // It's unlikely a store of a vector immediate can be done in a single
+ // instruction. It would require a load from a constantpool first.
+ // We also handle store a vector with all zero's.
+ // FIXME: Handle other cases where store of vector immediate is done in
+ // a single instruction.
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+ Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, false, DstAlign);
+ } else {
+ Value = DAG.getLoad(VT, dl, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcSV, SrcSVOff + SrcOff, false, Align);
+ Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, false, DstAlign);
+ }
+ OutChains.push_back(Store);
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memmove to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ std::vector<MVT> MemOps;
+ uint64_t Limit = -1ULL;
+ if (!AlwaysInline)
+ Limit = TLI.getMaxStoresPerMemmove();
+ unsigned DstAlign = Align; // Destination alignment can change.
+ std::string Str;
+ bool CopyFromStr;
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
+ Str, CopyFromStr, DAG, TLI))
+ return SDValue();
+
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ SmallVector<SDValue, 8> LoadValues;
+ SmallVector<SDValue, 8> LoadChains;
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Value = DAG.getLoad(VT, dl, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcSV, SrcSVOff + SrcOff, false, Align);
+ LoadValues.push_back(Value);
+ LoadChains.push_back(Value.getValue(1));
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ OutChains.clear();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ Store = DAG.getStore(Chain, dl, LoadValues[i],
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, false, DstAlign);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memset to a series of load/store ops if the size operand
+ // falls below a certain threshold.
+ std::vector<MVT> MemOps;
+ std::string Str;
+ bool CopyFromStr;
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
+ Size, Align, Str, CopyFromStr, DAG, TLI))
+ return SDValue();
+
+ SmallVector<SDValue, 8> OutChains;
+ uint64_t DstOff = 0;
+
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+ SDValue Store = DAG.getStore(Chain, dl, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
+
+ // Check to see if we should lower the memcpy to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memcpy with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),
+ Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memcpy with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+ AlwaysInline,
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, true,
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
+ }
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, false, CallingConv::C, false,
+ getExternalSymbol("memcpy", TLI.getPointerTy()),
+ Args, *this, dl);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
+
+ // Check to see if we should lower the memmove to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memmove with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),
+ Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memmove with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align,
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.getNode())
+ return Result;
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, false, CallingConv::C, false,
+ getExternalSymbol("memmove", TLI.getPointerTy()),
+ Args, *this, dl);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff) {
+
+ // Check to see if we should lower the memset to stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memset with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+ Align, DstSV, DstSVOff);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memset with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDValue Result =
+ TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align,
+ DstSV, DstSVOff);
+ if (Result.getNode())
+ return Result;
+
+ // Emit a library call.
+ const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst; Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ else
+ Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+ Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
+ Args.push_back(Entry);
+ Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false;
+ Args.push_back(Entry);
+ // FIXME: pass in DebugLoc
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, false, CallingConv::C, false,
+ getExternalSymbol("memset", TLI.getPointerTy()),
+ Args, *this, dl);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Cmp,
+ SDValue Swp, const Value* PtrVal,
+ unsigned Alignment) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+ MVT VT = Cmp.getValueType();
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getMVTAlignment(MemVT);
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
+ new (N) AtomicSDNode(Opcode, dl, VTs, MemVT,
+ Chain, Ptr, Cmp, Swp, PtrVal, Alignment);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT,
+ SDValue Chain,
+ SDValue Ptr, SDValue Val,
+ const Value* PtrVal,
+ unsigned Alignment) {
+ assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+ Opcode == ISD::ATOMIC_LOAD_SUB ||
+ Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_OR ||
+ Opcode == ISD::ATOMIC_LOAD_XOR ||
+ Opcode == ISD::ATOMIC_LOAD_NAND ||
+ Opcode == ISD::ATOMIC_LOAD_MIN ||
+ Opcode == ISD::ATOMIC_LOAD_MAX ||
+ Opcode == ISD::ATOMIC_LOAD_UMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UMAX ||
+ Opcode == ISD::ATOMIC_SWAP) &&
+ "Invalid Atomic Op");
+
+ MVT VT = Val.getValueType();
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getMVTAlignment(MemVT);
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ SDValue Ops[] = {Chain, Ptr, Val};
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
+ new (N) AtomicSDNode(Opcode, dl, VTs, MemVT,
+ Chain, Ptr, Val, PtrVal, Alignment);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+/// Allowed to return something different (and simpler) if Simplify is true.
+SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
+ DebugLoc dl) {
+ if (NumOps == 1)
+ return Ops[0];
+
+ SmallVector<MVT, 4> VTs;
+ VTs.reserve(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ VTs.push_back(Ops[i].getValueType());
+ return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps),
+ Ops, NumOps);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
+ const MVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps,
+ MVT MemVT, const Value *srcValue, int SVOff,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
+ MemVT, srcValue, SVOff, Align, Vol,
+ ReadMem, WriteMem);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps,
+ MVT MemVT, const Value *srcValue, int SVOff,
+ unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem) {
+ // Memoize the node unless it returns a flag.
+ MemIntrinsicSDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
+ new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT,
+ srcValue, SVOff, Align, Vol, ReadMem, WriteMem);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<MemIntrinsicSDNode>();
+ new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT,
+ srcValue, SVOff, Align, Vol, ReadMem, WriteMem);
+ }
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs,
+ bool IsTailCall, bool IsInreg, SDVTList VTs,
+ const SDValue *Operands, unsigned NumOperands) {
+ // Do not include isTailCall in the folding set profile.
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::CALL, VTs, Operands, NumOperands);
+ ID.AddInteger(CallingConv);
+ ID.AddInteger(IsVarArgs);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ // Instead of including isTailCall in the folding set, we just
+ // set the flag of the existing node.
+ if (!IsTailCall)
+ cast<CallSDNode>(E)->setNotTailCall();
+ return SDValue(E, 0);
+ }
+ SDNode *N = NodeAllocator.Allocate<CallSDNode>();
+ new (N) CallSDNode(CallingConv, dl, IsVarArgs, IsTailCall, IsInreg,
+ VTs, Operands, NumOperands);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
+ ISD::LoadExtType ExtType, MVT VT, SDValue Chain,
+ SDValue Ptr, SDValue Offset,
+ const Value *SV, int SVOffset, MVT EVT,
+ bool isVolatile, unsigned Alignment) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getMVTAlignment(VT);
+
+ if (VT == EVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == EVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ if (VT.isVector())
+ assert(EVT.getVectorNumElements() == VT.getVectorNumElements() &&
+ "Invalid vector extload!");
+ else
+ assert(EVT.bitsLT(VT) &&
+ "Should only be an extending load, not truncating!");
+ assert((ExtType == ISD::EXTLOAD || VT.isInteger()) &&
+ "Cannot sign/zero extend a FP/Vector load!");
+ assert(VT.isInteger() == EVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&
+ "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ?
+ getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+ ID.AddInteger(EVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, isVolatile, Alignment));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<LoadSDNode>();
+ new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, EVT, SV, SVOffset,
+ Alignment, isVolatile);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ const Value *SV, int SVOffset,
+ bool isVolatile, unsigned Alignment) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef,
+ SV, SVOffset, VT, isVolatile, Alignment);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT,
+ SDValue Chain, SDValue Ptr,
+ const Value *SV,
+ int SVOffset, MVT EVT,
+ bool isVolatile, unsigned Alignment) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
+ SV, SVOffset, EVT, isVolatile, Alignment);
+}
+
+SDValue
+SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+ assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+ "Load is already a indexed load!");
+ return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(),
+ LD->getChain(), Base, Offset, LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->getMemoryVT(),
+ LD->isVolatile(), LD->getAlignment());
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, const Value *SV, int SVOffset,
+ bool isVolatile, unsigned Alignment) {
+ MVT VT = Val.getValueType();
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getMVTAlignment(VT);
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED,
+ isVolatile, Alignment));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+ new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false,
+ VT, SV, SVOffset, Alignment, isVolatile);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+ SDValue Ptr, const Value *SV,
+ int SVOffset, MVT SVT,
+ bool isVolatile, unsigned Alignment) {
+ MVT VT = Val.getValueType();
+
+ if (VT == SVT)
+ return getStore(Chain, dl, Val, Ptr, SV, SVOffset, isVolatile, Alignment);
+
+ assert(VT.bitsGT(SVT) && "Not a truncation?");
+ assert(VT.isInteger() == SVT.isInteger() &&
+ "Can't do FP-INT conversion!");
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getMVTAlignment(VT);
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED,
+ isVolatile, Alignment));
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+ new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true,
+ SVT, SV, SVOffset, Alignment, isVolatile);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM) {
+ StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+ assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+ "Store is already a indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
+ new (N) StoreSDNode(Ops, dl, VTs, AM,
+ ST->isTruncatingStore(), ST->getMemoryVT(),
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getAlignment(), ST->isVolatile());
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getVAArg(MVT VT, DebugLoc dl,
+ SDValue Chain, SDValue Ptr,
+ SDValue SV) {
+ SDValue Ops[] = { Chain, Ptr, SV };
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ const SDUse *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ // Copy from an SDUse array into an SDValue array for use with
+ // the regular getNode logic.
+ SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps);
+ return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::SELECT_CC: {
+ assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+ assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+ "LHS and RHS of condition must have same type!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "True and False arms of SelectCC must have same type!");
+ assert(Ops[2].getValueType() == VT &&
+ "select_cc node must be of same type as true and false value!");
+ break;
+ }
+ case ISD::BR_CC: {
+ assert(NumOps == 5 && "BR_CC takes 5 operands!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "LHS/RHS of comparison should match types!");
+ break;
+ }
+ }
+
+ // Memoize nodes.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+
+ if (VT != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+ void *IP = 0;
+
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
+ }
+
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ const std::vector<MVT> &ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
+ Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+ const MVT *VTs, unsigned NumVTs,
+ const SDValue *Ops, unsigned NumOps) {
+ if (NumVTs == 1)
+ return getNode(Opcode, DL, VTs[0], Ops, NumOps);
+ return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.NumVTs == 1)
+ return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
+
+ switch (Opcode) {
+ // FIXME: figure out how to safely handle things like
+ // int foo(int x) { return 1 << (x & 255); }
+ // int bar() { return foo(256); }
+#if 0
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ case ISD::SHL_PARTS:
+ if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ else if (N3.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+ // If the and is only masking out bits that cannot effect the shift,
+ // eliminate the and.
+ unsigned NumBits = VT.getSizeInBits()*2;
+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ }
+ break;
+#endif
+ }
+
+ // Memoize the node unless it returns a flag.
+ SDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+ if (NumOps == 1) {
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+ } else {
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ CSEMap.InsertNode(N, IP);
+ } else {
+ if (NumOps == 1) {
+ N = NodeAllocator.Allocate<UnarySDNode>();
+ new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+ } else if (NumOps == 2) {
+ N = NodeAllocator.Allocate<BinarySDNode>();
+ new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+ } else if (NumOps == 3) {
+ N = NodeAllocator.Allocate<TernarySDNode>();
+ new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]);
+ } else {
+ N = NodeAllocator.Allocate<SDNode>();
+ new (N) SDNode(Opcode, DL, VTList, Ops, NumOps);
+ }
+ }
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifyNode(N);
+#endif
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) {
+ return getNode(Opcode, DL, VTList, 0, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1) {
+ SDValue Ops[] = { N1 };
+ return getNode(Opcode, DL, VTList, Ops, 1);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2) {
+ SDValue Ops[] = { N1, N2 };
+ return getNode(Opcode, DL, VTList, Ops, 2);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDValue Ops[] = { N1, N2, N3 };
+ return getNode(Opcode, DL, VTList, Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VTList, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3,
+ SDValue N4, SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VTList, Ops, 5);
+}
+
+SDVTList SelectionDAG::getVTList(MVT VT) {
+ return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
+ return *I;
+
+ MVT *Array = Allocator.Allocate<MVT>(2);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ SDVTList Result = makeVTList(Array, 2);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3)
+ return *I;
+
+ MVT *Array = Allocator.Allocate<MVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ SDVTList Result = makeVTList(Array, 3);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) {
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I)
+ if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+ I->VTs[2] == VT3 && I->VTs[3] == VT4)
+ return *I;
+
+ MVT *Array = Allocator.Allocate<MVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Array[3] = VT4;
+ SDVTList Result = makeVTList(Array, 4);
+ VTList.push_back(Result);
+ return Result;
+}
+
+SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) {
+ switch (NumVTs) {
+ case 0: assert(0 && "Cannot have nodes without results!");
+ case 1: return getVTList(VTs[0]);
+ case 2: return getVTList(VTs[0], VTs[1]);
+ case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+ default: break;
+ }
+
+ for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+ E = VTList.rend(); I != E; ++I) {
+ if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
+ continue;
+
+ bool NoMatch = false;
+ for (unsigned i = 2; i != NumVTs; ++i)
+ if (VTs[i] != I->VTs[i]) {
+ NoMatch = true;
+ break;
+ }
+ if (!NoMatch)
+ return *I;
+ }
+
+ MVT *Array = Allocator.Allocate<MVT>(NumVTs);
+ std::copy(VTs, VTs+NumVTs, Array);
+ SDVTList Result = makeVTList(Array, NumVTs);
+ VTList.push_back(Result);
+ return Result;
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands. If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists. If the resultant node does not exist in the DAG, the
+/// input node is returned. As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) {
+ SDNode *N = InN.getNode();
+ assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op == N->getOperand(0)) return InN;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+ return SDValue(Existing, InN.getResNo());
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ N->OperandList[0].set(Op);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) {
+ SDNode *N = InN.getNode();
+ assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+ return InN; // No operands changed, just return the input node.
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+ return SDValue(Existing, InN.getResNo());
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ if (N->OperandList[0] != Op1)
+ N->OperandList[0].set(Op1);
+ if (N->OperandList[1] != Op2)
+ N->OperandList[1].set(Op2);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+ return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4, SDValue Op5) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+ return UpdateNodeOperands(N, Ops, 5);
+}
+
+SDValue SelectionDAG::
+UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) {
+ SDNode *N = InN.getNode();
+ assert(N->getNumOperands() == NumOps &&
+ "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ bool AnyChange = false;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (Ops[i] != N->getOperand(i)) {
+ AnyChange = true;
+ break;
+ }
+ }
+
+ // No operands changed, just return the input node.
+ if (!AnyChange) return InN;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+ return SDValue(Existing, InN.getResNo());
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = 0;
+
+ // Now we update the operands.
+ for (unsigned i = 0; i != NumOps; ++i)
+ if (N->OperandList[i] != Ops[i])
+ N->OperandList[i].set(Ops[i]);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+ // Unlike the code in MorphNodeTo that does this, we don't need to
+ // watch for dead nodes here.
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ Use.set(SDValue());
+ }
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2, MVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2, MVT VT3, MVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2,
+ SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ MVT VT1, MVT VT2, MVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return MorphNodeTo(N, Opc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT);
+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2, const SDValue *Ops,
+ unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2, MVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2,
+ SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ MVT VT1, MVT VT2,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return MorphNodeTo(N, Opc, VTs, Ops, 3);
+}
+
+/// MorphNodeTo - These *mutate* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one. Note that the DebugLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ SDVTList VTs, const SDValue *Ops,
+ unsigned NumOps) {
+ // If an identical node already exists, use it.
+ void *IP = 0;
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return ON;
+ }
+
+ if (!RemoveNodeFromCSEMaps(N))
+ IP = 0;
+
+ // Start the morphing.
+ N->NodeType = Opc;
+ N->ValueList = VTs.VTs;
+ N->NumValues = VTs.NumVTs;
+
+ // Clear the operands list, updating used nodes to remove this from their
+ // use list. Keep track of any operands that become dead as a result.
+ SmallPtrSet<SDNode*, 16> DeadNodeSet;
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Used = Use.getNode();
+ Use.set(SDValue());
+ if (Used->use_empty())
+ DeadNodeSet.insert(Used);
+ }
+
+ // If NumOps is larger than the # of operands we currently have, reallocate
+ // the operand list.
+ if (NumOps > N->NumOperands) {
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+
+ if (N->isMachineOpcode()) {
+ // We're creating a final node that will live unmorphed for the
+ // remainder of the current SelectionDAG iteration, so we can allocate
+ // the operands directly out of a pool with no recycling metadata.
+ N->OperandList = OperandAllocator.Allocate<SDUse>(NumOps);
+ N->OperandsNeedDelete = false;
+ } else {
+ N->OperandList = new SDUse[NumOps];
+ N->OperandsNeedDelete = true;
+ }
+ }
+
+ // Assign the new operands.
+ N->NumOperands = NumOps;
+ for (unsigned i = 0, e = NumOps; i != e; ++i) {
+ N->OperandList[i].setUser(N);
+ N->OperandList[i].setInitial(Ops[i]);
+ }
+
+ // Delete any nodes that are still dead after adding the uses for the
+ // new operands.
+ SmallVector<SDNode *, 16> DeadNodes;
+ for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+ E = DeadNodeSet.end(); I != E; ++I)
+ if ((*I)->use_empty())
+ DeadNodes.push_back(*I);
+ RemoveDeadNodes(DeadNodes);
+
+ if (IP)
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+
+/// getTargetNode - These are used for target selectors to create a new node
+/// with specified return type(s), target opcode, and operands.
+///
+/// Note that getTargetNode returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT) {
+ return getNode(~Opcode, dl, VT).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
+ SDValue Op1) {
+ return getNode(~Opcode, dl, VT, Op1).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
+ SDValue Op1, SDValue Op2) {
+ return getNode(~Opcode, dl, VT, Op1, Op2).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ return getNode(~Opcode, dl, VT, Op1, Op2, Op3).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT,
+ const SDValue *Ops, unsigned NumOps) {
+ return getNode(~Opcode, dl, VT, Ops, NumOps).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ MVT VT1, MVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Op;
+ return getNode(~Opcode, dl, VTs, &Op, 0).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
+ MVT VT2, SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getNode(~Opcode, dl, VTs, &Op1, 1).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
+ MVT VT2, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return getNode(~Opcode, dl, VTs, Ops, 2).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
+ MVT VT2, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getNode(~Opcode, dl, VTs, Ops, 3).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ MVT VT1, MVT VT2,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ MVT VT1, MVT VT2, MVT VT3,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2 };
+ return getNode(~Opcode, dl, VTs, Ops, 2).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ MVT VT1, MVT VT2, MVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getNode(~Opcode, dl, VTs, Ops, 3).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ MVT VT1, MVT VT2, MVT VT3,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1,
+ MVT VT2, MVT VT3, MVT VT4,
+ const SDValue *Ops, unsigned NumOps) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode();
+}
+
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl,
+ const std::vector<MVT> &ResultTys,
+ const SDValue *Ops, unsigned NumOps) {
+ return getNode(~Opcode, dl, ResultTys, Ops, NumOps).getNode();
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return E;
+ }
+ return NULL;
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
+ DAGUpdateListener *UpdateListener) {
+ SDNode *From = FromN.getNode();
+ assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+ "Cannot replace with this method!");
+ assert(From != To.getNode() && "Cannot replace uses of with self");
+
+ // Iterate over all the existing uses of From. New uses will be added
+ // to the beginning of the use list, which we avoid visiting.
+ // This specifically avoids visiting uses of From that arise while the
+ // replacement is happening, because any such uses would be the result
+ // of CSE: If an existing node looks like From after one of its operands
+ // is replaced by To, we don't want to replace of all its users with To
+ // too. See PR3018 for more info.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
+ DAGUpdateListener *UpdateListener) {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ assert((!From->hasAnyUseOfValue(i) ||
+ From->getValueType(i) == To->getValueType(i)) &&
+ "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+ // Handle the trivial case.
+ if (From == To)
+ return;
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.setNode(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values. To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
+ const SDValue *To,
+ DAGUpdateListener *UpdateListener) {
+ if (From->getNumValues() == 1) // Handle the simple case efficiently.
+ return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener);
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ const SDValue &ToOp = To[Use.getResNo()];
+ ++UI;
+ Use.set(ToOp);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
+ DAGUpdateListener *UpdateListener){
+ // Handle the really simple, really trivial case efficiently.
+ if (From == To) return;
+
+ // Handle the simple, trivial, case efficiently.
+ if (From.getNode()->getNumValues() == 1) {
+ ReplaceAllUsesWith(From, To, UpdateListener);
+ return;
+ }
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From.getNode()->use_begin(),
+ UE = From.getNode()->use_end();
+ while (UI != UE) {
+ SDNode *User = *UI;
+ bool UserRemovedFromCSEMaps = false;
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+
+ // Skip uses of different values from the same node.
+ if (Use.getResNo() != From.getResNo()) {
+ ++UI;
+ continue;
+ }
+
+ // If this node hasn't been modified yet, it's still in the CSE maps,
+ // so remove its old self from the CSE maps.
+ if (!UserRemovedFromCSEMaps) {
+ RemoveNodeFromCSEMaps(User);
+ UserRemovedFromCSEMaps = true;
+ }
+
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // We are iterating over all uses of the From node, so if a use
+ // doesn't use the specific value, no changes are made.
+ if (!UserRemovedFromCSEMaps)
+ continue;
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+namespace {
+ /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+ /// to record information about a use.
+ struct UseMemo {
+ SDNode *User;
+ unsigned Index;
+ SDUse *Use;
+ };
+
+ /// operator< - Sort Memos by User.
+ bool operator<(const UseMemo &L, const UseMemo &R) {
+ return (intptr_t)L.User < (intptr_t)R.User;
+ }
+}
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The same value
+/// may appear in both the From and To list. The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+ const SDValue *To,
+ unsigned Num,
+ DAGUpdateListener *UpdateListener){
+ // Handle the simple, trivial case efficiently.
+ if (Num == 1)
+ return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener);
+
+ // Read up all the uses and make records of them. This helps
+ // processing new uses that are introduced during the
+ // replacement process.
+ SmallVector<UseMemo, 4> Uses;
+ for (unsigned i = 0; i != Num; ++i) {
+ unsigned FromResNo = From[i].getResNo();
+ SDNode *FromNode = From[i].getNode();
+ for (SDNode::use_iterator UI = FromNode->use_begin(),
+ E = FromNode->use_end(); UI != E; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == FromResNo) {
+ UseMemo Memo = { *UI, i, &Use };
+ Uses.push_back(Memo);
+ }
+ }
+ }
+
+ // Sort the uses, so that all the uses from a given User are together.
+ std::sort(Uses.begin(), Uses.end());
+
+ for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+ UseIndex != UseIndexEnd; ) {
+ // We know that this user uses some value of From. If it is the right
+ // value, update it.
+ SDNode *User = Uses[UseIndex].User;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // The Uses array is sorted, so all the uses for a given User
+ // are next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ unsigned i = Uses[UseIndex].Index;
+ SDUse &Use = *Uses[UseIndex].Use;
+ ++UseIndex;
+
+ Use.set(To[i]);
+ } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User, UpdateListener);
+ }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+
+ unsigned DAGSize = 0;
+
+ // SortedPos tracks the progress of the algorithm. Nodes before it are
+ // sorted, nodes after it are unsorted. When the algorithm completes
+ // it is at the end of the list.
+ allnodes_iterator SortedPos = allnodes_begin();
+
+ // Visit all the nodes. Move nodes with no operands to the front of
+ // the list immediately. Annotate nodes that do have operands with their
+ // operand count. Before we do this, the Node Id fields of the nodes
+ // may contain arbitrary values. After, the Node Id fields for nodes
+ // before SortedPos will contain the topological sort index, and the
+ // Node Id fields for nodes At SortedPos and after will contain the
+ // count of outstanding operands.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
+ SDNode *N = I++;
+ unsigned Degree = N->getNumOperands();
+ if (Degree == 0) {
+ // A node with no uses, add it to the result array immediately.
+ N->setNodeId(DAGSize++);
+ allnodes_iterator Q = N;
+ if (Q != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+ ++SortedPos;
+ } else {
+ // Temporarily use the Node Id as scratch space for the degree count.
+ N->setNodeId(Degree);
+ }
+ }
+
+ // Visit all the nodes. As we iterate, moves nodes into sorted order,
+ // such that by the time the end is reached all nodes will be sorted.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
+ SDNode *N = I;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *P = *UI;
+ unsigned Degree = P->getNodeId();
+ --Degree;
+ if (Degree == 0) {
+ // All of P's operands are sorted, so P may sorted now.
+ P->setNodeId(DAGSize++);
+ if (P != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+ ++SortedPos;
+ } else {
+ // Update P's outstanding operand count.
+ P->setNodeId(Degree);
+ }
+ }
+ }
+
+ assert(SortedPos == AllNodes.end() &&
+ "Topological sort incomplete!");
+ assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+ "First node in topological sort is not the entry token!");
+ assert(AllNodes.front().getNodeId() == 0 &&
+ "First node in topological sort has non-zero id!");
+ assert(AllNodes.front().getNumOperands() == 0 &&
+ "First node in topological sort has operands!");
+ assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+ "Last node in topologic sort has unexpected id!");
+ assert(AllNodes.back().use_empty() &&
+ "Last node in topologic sort has users!");
+ assert(DAGSize == allnodes_size() && "Node count mismatch!");
+ return DAGSize;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// SDNode Class
+//===----------------------------------------------------------------------===//
+
+HandleSDNode::~HandleSDNode() {
+ DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA,
+ MVT VT, int64_t o)
+ : SDNode(isa<GlobalVariable>(GA) &&
+ cast<GlobalVariable>(GA)->isThreadLocal() ?
+ // Thread Local
+ (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) :
+ // Non Thread Local
+ (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress),
+ DebugLoc::getUnknownLoc(), getSDVTList(VT)), Offset(o) {
+ TheGlobal = const_cast<GlobalValue*>(GA);
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT memvt,
+ const Value *srcValue, int SVO,
+ unsigned alignment, bool vol)
+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment);
+ assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!");
+ assert(getAlignment() == alignment && "Alignment representation error!");
+ assert(isVolatile() == vol && "Volatile representation error!");
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
+ const SDValue *Ops,
+ unsigned NumOps, MVT memvt, const Value *srcValue,
+ int SVO, unsigned alignment, bool vol)
+ : SDNode(Opc, dl, VTs, Ops, NumOps),
+ MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment);
+ assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!");
+ assert(getAlignment() == alignment && "Alignment representation error!");
+ assert(isVolatile() == vol && "Volatile representation error!");
+}
+
+/// getMemOperand - Return a MachineMemOperand object describing the memory
+/// reference performed by this memory reference.
+MachineMemOperand MemSDNode::getMemOperand() const {
+ int Flags = 0;
+ if (isa<LoadSDNode>(this))
+ Flags = MachineMemOperand::MOLoad;
+ else if (isa<StoreSDNode>(this))
+ Flags = MachineMemOperand::MOStore;
+ else if (isa<AtomicSDNode>(this)) {
+ Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ }
+ else {
+ const MemIntrinsicSDNode* MemIntrinNode = dyn_cast<MemIntrinsicSDNode>(this);
+ assert(MemIntrinNode && "Unknown MemSDNode opcode!");
+ if (MemIntrinNode->readMem()) Flags |= MachineMemOperand::MOLoad;
+ if (MemIntrinNode->writeMem()) Flags |= MachineMemOperand::MOStore;
+ }
+
+ int Size = (getMemoryVT().getSizeInBits() + 7) >> 3;
+ if (isVolatile()) Flags |= MachineMemOperand::MOVolatile;
+
+ // Check if the memory reference references a frame index
+ const FrameIndexSDNode *FI =
+ dyn_cast<const FrameIndexSDNode>(getBasePtr().getNode());
+ if (!getSrcValue() && FI)
+ return MachineMemOperand(PseudoSourceValue::getFixedStack(FI->getIndex()),
+ Flags, 0, Size, getAlignment());
+ else
+ return MachineMemOperand(getSrcValue(), Flags, getSrcValueOffset(),
+ Size, getAlignment());
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+ AddNodeIDNode(ID, this);
+}
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const MVT *SDNode::getValueTypeList(MVT VT) {
+ if (VT.isExtended()) {
+ static std::set<MVT, MVT::compareRawBits> EVTs;
+ return &(*EVTs.insert(VT).first);
+ } else {
+ static MVT VTs[MVT::LAST_VALUETYPE];
+ VTs[VT.getSimpleVT()] = VT;
+ return &VTs[VT.getSimpleVT()];
+ }
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value. This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ // TODO: Only iterate over uses of a given value of the node
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ if (UI.getUse().getResNo() == Value) {
+ if (NUses == 0)
+ return false;
+ --NUses;
+ }
+ }
+
+ // Found exactly the right number of uses?
+ return NUses == 0;
+}
+
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+ if (UI.getUse().getResNo() == Value)
+ return true;
+
+ return false;
+}
+
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUserOf(SDNode *N) const {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (User == this)
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDValue::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (*this == N->getOperand(i))
+ return true;
+ return false;
+}
+
+bool SDNode::isOperandOf(SDNode *N) const {
+ for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+ if (this == N->OperandList[i].getNode())
+ return true;
+ return false;
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions. In practice, this looks through token
+/// factors and non-volatile loads. In order to remain efficient, this only
+/// looks a couple of nodes in, it does not do an exhaustive search.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+ unsigned Depth) const {
+ if (*this == Dest) return true;
+
+ // Don't search too deeply, we just want to be able to see through
+ // TokenFactor's etc.
+ if (Depth == 0) return false;
+
+ // If this is a token factor, all inputs to the TF happen in parallel. If any
+ // of the operands of the TF reach dest, then we can do the xform.
+ if (getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+ return true;
+ return false;
+ }
+
+ // Loads don't have side effects, look through them.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+ if (!Ld->isVolatile())
+ return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+ }
+ return false;
+}
+
+
+static void findPredecessor(SDNode *N, const SDNode *P, bool &found,
+ SmallPtrSet<SDNode *, 32> &Visited) {
+ if (found || !Visited.insert(N))
+ return;
+
+ for (unsigned i = 0, e = N->getNumOperands(); !found && i != e; ++i) {
+ SDNode *Op = N->getOperand(i).getNode();
+ if (Op == P) {
+ found = true;
+ return;
+ }
+ findPredecessor(Op, P, found, Visited);
+ }
+}
+
+/// isPredecessorOf - Return true if this node is a predecessor of N. This node
+/// is either an operand of N or it can be reached by recursively traversing
+/// up the operands.
+/// NOTE: this is an expensive method. Use it carefully.
+bool SDNode::isPredecessorOf(SDNode *N) const {
+ SmallPtrSet<SDNode *, 32> Visited;
+ bool found = false;
+ findPredecessor(N, this, found, Visited);
+ return found;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+ assert(Num < NumOperands && "Invalid child # of SDNode!");
+ return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
+}
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->get(getMachineOpcode()).getName();
+ return "<<Unknown Machine Node>>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node>>";
+ }
+ return "<<Unknown Node>>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE:
+ return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::MEMBARRIER: return "MemBarrier";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MEMOPERAND: return "MemOperand";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::ARG_FLAGS: return "ArgFlags";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+
+ case ISD::Constant: return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IID = cast<ConstantSDNode>(getOperand(0))->getZExtValue();
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ }
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IID = cast<ConstantSDNode>(getOperand(1))->getZExtValue();
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstantFP:return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::DBG_LABEL: return "dbg_label";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::DECLARE: return "declare";
+ case ISD::HANDLENODE: return "handlenode";
+ case ISD::FORMAL_ARGUMENTS: return "formal_arguments";
+ case ISD::CALL: return "call";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FPOWI: return "fpowi";
+ case ISD::FPOW: return "fpow";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+
+ case ISD::SETCC: return "setcc";
+ case ISD::VSETCC: return "vsetcc";
+ case ISD::SELECT: return "select";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BIT_CONVERT: return "bit_convert";
+
+ case ISD::CONVERT_RNDSAT: {
+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+ default: assert(0 && "Unknown cvt code!");
+ case ISD::CVT_FF: return "cvt_ff";
+ case ISD::CVT_FS: return "cvt_fs";
+ case ISD::CVT_FU: return "cvt_fu";
+ case ISD::CVT_SF: return "cvt_sf";
+ case ISD::CVT_UF: return "cvt_uf";
+ case ISD::CVT_SS: return "cvt_ss";
+ case ISD::CVT_SU: return "cvt_su";
+ case ISD::CVT_US: return "cvt_us";
+ case ISD::CVT_UU: return "cvt_uu";
+ }
+ }
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::RET: return "ret";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+
+ // Bit manipulation
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTLZ: return "ctlz";
+
+ // Debug info
+ case ISD::DBG_STOPPOINT: return "dbg_stoppoint";
+ case ISD::DEBUG_LOC: return "debug_loc";
+
+ // Trampolines
+ case ISD::TRAMPOLINE: return "trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: assert(0 && "Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setue";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default:
+ return "";
+ case ISD::PRE_INC:
+ return "<pre-inc>";
+ case ISD::PRE_DEC:
+ return "<pre-dec>";
+ case ISD::POST_INC:
+ return "<post-inc>";
+ case ISD::POST_DEC:
+ return "<post-dec>";
+ }
+}
+
+std::string ISD::ArgFlagsTy::getArgFlagsString() {
+ std::string S = "< ";
+
+ if (isZExt())
+ S += "zext ";
+ if (isSExt())
+ S += "sext ";
+ if (isInReg())
+ S += "inreg ";
+ if (isSRet())
+ S += "sret ";
+ if (isByVal())
+ S += "byval ";
+ if (isNest())
+ S += "nest ";
+ if (getByValAlign())
+ S += "byval-align:" + utostr(getByValAlign()) + " ";
+ if (getOrigAlign())
+ S += "orig-align:" + utostr(getOrigAlign()) + " ";
+ if (getByValSize())
+ S += "byval-size:" + utostr(getByValSize()) + " ";
+ return S + ">";
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+ print(errs(), G);
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << (void*)this << ": ";
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getMVTString();
+ }
+ OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(this);
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ }
+
+ if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().dump();
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ WriteAsOperand(OS, GADN->getGlobal());
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ if (G && R->getReg() &&
+ TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ OS << " " << G->getTarget().getRegisterInfo()->getName(R->getReg());
+ } else {
+ OS << " #" << R->getReg();
+ }
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(this)) {
+ if (M->MO.getValue())
+ OS << "<" << M->MO.getValue() << ":" << M->MO.getOffset() << ">";
+ else
+ OS << "<null:" << M->MO.getOffset() << ">";
+ } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(this)) {
+ OS << N->getArgFlags().getArgFlagsString();
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getMVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ const Value *SrcValue = LD->getSrcValue();
+ int SrcOffset = LD->getSrcValueOffset();
+ OS << " <";
+ if (SrcValue)
+ OS << SrcValue;
+ else
+ OS << "null";
+ OS << ":" << SrcOffset << ">";
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << " <anyext "; break;
+ case ISD::SEXTLOAD: OS << " <sext "; break;
+ case ISD::ZEXTLOAD: OS << " <zext "; break;
+ }
+ if (doExt)
+ OS << LD->getMemoryVT().getMVTString() << ">";
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << " " << AM;
+ if (LD->isVolatile())
+ OS << " <volatile>";
+ OS << " alignment=" << LD->getAlignment();
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ const Value *SrcValue = ST->getSrcValue();
+ int SrcOffset = ST->getSrcValueOffset();
+ OS << " <";
+ if (SrcValue)
+ OS << SrcValue;
+ else
+ OS << "null";
+ OS << ":" << SrcOffset << ">";
+
+ if (ST->isTruncatingStore())
+ OS << " <trunc " << ST->getMemoryVT().getMVTString() << ">";
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << " " << AM;
+ if (ST->isVolatile())
+ OS << " <volatile>";
+ OS << " alignment=" << ST->getAlignment();
+ } else if (const AtomicSDNode* AT = dyn_cast<AtomicSDNode>(this)) {
+ const Value *SrcValue = AT->getSrcValue();
+ int SrcOffset = AT->getSrcValueOffset();
+ OS << " <";
+ if (SrcValue)
+ OS << SrcValue;
+ else
+ OS << "null";
+ OS << ":" << SrcOffset << ">";
+ if (AT->isVolatile())
+ OS << " <volatile>";
+ OS << " alignment=" << AT->getAlignment();
+ }
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ OS << " ";
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", ";
+ OS << (void*)getOperand(i).getNode();
+ if (unsigned RN = getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ print_details(OS, G);
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getNode()->hasOneUse())
+ DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+ else
+ cerr << "\n" << std::string(indent+2, ' ')
+ << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+
+ cerr << "\n" << std::string(indent, ' ');
+ N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+ cerr << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = I;
+ if (!N->hasOneUse() && N != getRoot().getNode())
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+
+ cerr << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ print_types(OS, G);
+ print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N)) // If we've been here before, return now.
+ return;
+ // Dump the current SDNode, but don't end the line yet.
+ OS << std::string(indent, ' ');
+ N->printr(OS, G);
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ if (i) OS << ",";
+ OS << " ";
+ if (child->getNumOperands() == 0) {
+ // This child has no grandchildren; print it inline right here.
+ child->printr(OS, G);
+ once.insert(child);
+ } else { // Just the address. FIXME: also print the child's opcode
+ OS << (void*)child;
+ if (unsigned RN = N->getOperand(i).getResNo())
+ OS << ":" << RN;
+ }
+ }
+ OS << "\n";
+ // Dump children that have grandchildren on their own line(s).
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ const SDNode *child = N->getOperand(i).getNode();
+ DumpNodesr(OS, child, indent+2, G, once);
+ }
+}
+
+void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(errs(), this, 0, 0, once);
+}
+
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+ return getGlobal()->getType()->getAddressSpace();
+}
+
+
+const Type *ConstantPoolSDNode::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
+ APInt &SplatUndef,
+ unsigned &SplatBitSize,
+ bool &HasAnyUndefs,
+ unsigned MinSplatBits) {
+ MVT VT = getValueType(0);
+ assert(VT.isVector() && "Expected a vector type");
+ unsigned sz = VT.getSizeInBits();
+ if (MinSplatBits > sz)
+ return false;
+
+ SplatValue = APInt(sz, 0);
+ SplatUndef = APInt(sz, 0);
+
+ // Get the bits. Bits with undefined values (when the corresponding element
+ // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+ // in SplatValue. If any of the values are not constant, give up and return
+ // false.
+ unsigned int nOps = getNumOperands();
+ assert(nOps > 0 && "isConstantSplat has 0-size build vector");
+ unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+ for (unsigned i = 0; i < nOps; ++i) {
+ SDValue OpVal = getOperand(i);
+ unsigned BitPos = i * EltBitSize;
+
+ if (OpVal.getOpcode() == ISD::UNDEF)
+ SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize);
+ else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
+ SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+ zextOrTrunc(sz) << BitPos);
+ else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+ SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
+ else
+ return false;
+ }
+
+ // The build_vector is all constants or undefs. Find the smallest element
+ // size that splats the vector.
+
+ HasAnyUndefs = (SplatUndef != 0);
+ while (sz > 8) {
+
+ unsigned HalfSize = sz / 2;
+ APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = APInt(SplatValue).trunc(HalfSize);
+ APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = APInt(SplatUndef).trunc(HalfSize);
+
+ // If the two halves do not match (ignoring undef bits), stop here.
+ if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+ MinSplatBits > HalfSize)
+ break;
+
+ SplatValue = HighValue | LowValue;
+ SplatUndef = HighUndef & LowUndef;
+
+ sz = HalfSize;
+ }
+
+ SplatBitSize = sz;
+ return true;
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) {
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i, e;
+ for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+ /* search */;
+
+ assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+
+ // Make sure all remaining elements are either undef or the same as the first
+ // non-undef value.
+ for (int Idx = Mask[i]; i != e; ++i)
+ if (Mask[i] >= 0 && Mask[i] != Idx)
+ return false;
+ return true;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
new file mode 100644
index 0000000..889d7f5
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -0,0 +1,6052 @@
+//===-- SelectionDAGBuild.cpp - Selection-DAG building --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "SelectionDAGBuild.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+LimitFPPrecision("limit-float-precision",
+ cl::desc("Generate low-precision inline sequences "
+ "for some float libcalls"),
+ cl::location(LimitFloatPrecision),
+ cl::init(0));
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+ const unsigned *Indices,
+ const unsigned *IndicesEnd,
+ unsigned CurIndex = 0) {
+ // Base case: We're done.
+ if (Indices && Indices == IndicesEnd)
+ return CurIndex;
+
+ // Given a struct type, recursively traverse the elements.
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI) {
+ if (Indices && *Indices == unsigned(EI - EB))
+ return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // Given an array type, recursively traverse the elements.
+ else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ const Type *EltTy = ATy->getElementType();
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+ if (Indices && *Indices == i)
+ return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+ }
+ return CurIndex;
+ }
+ // We haven't found the type we're looking for, so keep searching.
+ return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// MVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+ SmallVectorImpl<MVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *Offsets = 0,
+ uint64_t StartingOffset = 0) {
+ // Given a struct type, recursively traverse the elements.
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI)
+ ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+ StartingOffset + SL->getElementOffset(EI - EB));
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ const Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty == Type::VoidTy)
+ return;
+ // Base case: we can get an MVT for this LLVM IR type.
+ ValueVTs.push_back(TLI.getValueType(Ty));
+ if (Offsets)
+ Offsets->push_back(StartingOffset);
+}
+
+namespace llvm {
+ /// RegsForValue - This struct represents the registers (physical or virtual)
+ /// that a particular set of values is assigned, and the type information about
+ /// the value. The most common situation is to represent one value at a time,
+ /// but struct or array values are handled element-wise as multiple values.
+ /// The splitting of aggregates is performed recursively, so that we never
+ /// have aggregate-typed registers. The values at this point do not necessarily
+ /// have legal types, so each value may require one or more registers of some
+ /// legal type.
+ ///
+ struct VISIBILITY_HIDDEN RegsForValue {
+ /// TLI - The TargetLowering object.
+ ///
+ const TargetLowering *TLI;
+
+ /// ValueVTs - The value types of the values, which may not be legal, and
+ /// may need be promoted or synthesized from one or more registers.
+ ///
+ SmallVector<MVT, 4> ValueVTs;
+
+ /// RegVTs - The value types of the registers. This is the same size as
+ /// ValueVTs and it records, for each value, what the type of the assigned
+ /// register or registers are. (Individual values are never synthesized
+ /// from more than one type of register.)
+ ///
+ /// With virtual registers, the contents of RegVTs is redundant with TLI's
+ /// getRegisterType member function, however when with physical registers
+ /// it is necessary to have a separate record of the types.
+ ///
+ SmallVector<MVT, 4> RegVTs;
+
+ /// Regs - This list holds the registers assigned to the values.
+ /// Each legal or promoted value requires one register, and each
+ /// expanded value requires multiple registers.
+ ///
+ SmallVector<unsigned, 4> Regs;
+
+ RegsForValue() : TLI(0) {}
+
+ RegsForValue(const TargetLowering &tli,
+ const SmallVector<unsigned, 4> &regs,
+ MVT regvt, MVT valuevt)
+ : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+ RegsForValue(const TargetLowering &tli,
+ const SmallVector<unsigned, 4> &regs,
+ const SmallVector<MVT, 4> &regvts,
+ const SmallVector<MVT, 4> &valuevts)
+ : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
+ RegsForValue(const TargetLowering &tli,
+ unsigned Reg, const Type *Ty) : TLI(&tli) {
+ ComputeValueVTs(tli, Ty, ValueVTs);
+
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ MVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI->getNumRegisters(ValueVT);
+ MVT RegisterVT = TLI->getRegisterType(ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ Reg += NumRegs;
+ }
+ }
+
+ /// append - Add the specified values to this one.
+ void append(const RegsForValue &RHS) {
+ TLI = RHS.TLI;
+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ }
+
+
+ /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+ /// this value and returns the result as a ValueVTs value. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const;
+
+ /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+ /// specified value into the registers specified by this object. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const;
+
+ /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+ /// operand list. This adds the code marker, matching input operand index
+ /// (if applicable), and includes the number of values added into it.
+ void AddInlineAsmOperands(unsigned Code,
+ bool HasMatching, unsigned MatchingIdx,
+ SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
+ };
+}
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+ if (isa<PHINode>(I)) return true;
+ BasicBlock *BB = I->getParent();
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+ if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
+ return true;
+ return false;
+}
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {
+ // With FastISel active, we may be splitting blocks, so force creation
+ // of virtual registers for all non-dead arguments.
+ // Don't force virtual registers for byval arguments though, because
+ // fast-isel can't handle those in all cases.
+ if (EnableFastISel && !A->hasByValAttr())
+ return A->use_empty();
+
+ BasicBlock *Entry = A->getParent()->begin();
+ for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+ if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
+ return false; // Use not in entry block.
+ return true;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)
+ : TLI(tli) {
+}
+
+void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
+ SelectionDAG &DAG,
+ bool EnableFastISel) {
+ Fn = &fn;
+ MF = &mf;
+ RegInfo = &MF->getRegInfo();
+
+ // Create a vreg for each argument register that is not dead and is used
+ // outside of the entry block for the function.
+ for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
+ AI != E; ++AI)
+ if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
+ InitializeRegForValue(AI);
+
+ // Initialize the mapping of values to registers. This is only set up for
+ // instruction values that are used outside of the block that defines
+ // them.
+ Function::iterator BB = Fn->begin(), EB = Fn->end();
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+ const Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+ StaticAllocaMap[AI] =
+ MF->getFrameInfo()->CreateStackObject(TySize, Align);
+ }
+
+ for (; BB != EB; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+ if (!isa<AllocaInst>(I) ||
+ !StaticAllocaMap.count(cast<AllocaInst>(I)))
+ InitializeRegForValue(I);
+
+ // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
+ // also creates the initial PHI MachineInstrs, though none of the input
+ // operands are populated.
+ for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
+ MBBMap[BB] = MBB;
+ MF->push_back(MBB);
+
+ // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+ // appropriate.
+ PHINode *PN;
+ DebugLoc DL;
+ for (BasicBlock::iterator
+ I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (Function *F = CI->getCalledFunction()) {
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::dbg_stoppoint: {
+ DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
+
+ if (DIDescriptor::ValidDebugInfo(SPI->getContext(),
+ CodeGenOpt::Default)) {
+ DICompileUnit CU(cast<GlobalVariable>(SPI->getContext()));
+ unsigned idx = MF->getOrCreateDebugLocID(CU.getGV(),
+ SPI->getLine(),
+ SPI->getColumn());
+ DL = DebugLoc::get(idx);
+ }
+
+ break;
+ }
+ case Intrinsic::dbg_func_start: {
+ DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
+ Value *SP = FSI->getSubprogram();
+
+ if (DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::Default)) {
+ DISubprogram Subprogram(cast<GlobalVariable>(SP));
+ DICompileUnit CU(Subprogram.getCompileUnit());
+ unsigned Line = Subprogram.getLineNumber();
+ DL = DebugLoc::get(MF->getOrCreateDebugLocID(CU.getGV(),
+ Line, 0));
+ }
+
+ break;
+ }
+ }
+ }
+ }
+
+ PN = dyn_cast<PHINode>(I);
+ if (!PN || PN->use_empty()) continue;
+
+ unsigned PHIReg = ValueMap[PN];
+ assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ MVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(VT);
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0; i != NumRegisters; ++i)
+ BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i);
+ PHIReg += NumRegisters;
+ }
+ }
+ }
+}
+
+unsigned FunctionLoweringInfo::MakeReg(MVT VT) {
+ return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+/// CreateRegForValue - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types. Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, V->getType(), ValueVTs);
+
+ unsigned FirstReg = 0;
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ MVT ValueVT = ValueVTs[Value];
+ MVT RegisterVT = TLI.getRegisterType(ValueVT);
+
+ unsigned NumRegs = TLI.getNumRegisters(ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ unsigned R = MakeReg(RegisterVT);
+ if (!FirstReg) FirstReg = R;
+ }
+ }
+ return FirstReg;
+}
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
+ const SDValue *Parts,
+ unsigned NumParts, MVT PartVT, MVT ValueVT,
+ ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ if (NumParts > 1) {
+ // Assemble the value from multiple parts.
+ if (!ValueVT.isVector() && ValueVT.isInteger()) {
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned ValueBits = ValueVT.getSizeInBits();
+
+ // Assemble the power of 2 part.
+ unsigned RoundParts = NumParts & (NumParts - 1) ?
+ 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundBits = PartBits * RoundParts;
+ MVT RoundVT = RoundBits == ValueBits ?
+ ValueVT : MVT::getIntegerVT(RoundBits);
+ SDValue Lo, Hi;
+
+ MVT HalfVT = MVT::getIntegerVT(RoundBits/2);
+
+ if (RoundParts > 2) {
+ Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT);
+ Hi = getCopyFromParts(DAG, dl, Parts+RoundParts/2, RoundParts/2,
+ PartVT, HalfVT);
+ } else {
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
+ }
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
+
+ if (RoundParts < NumParts) {
+ // Assemble the trailing non-power-of-2 part.
+ unsigned OddParts = NumParts - RoundParts;
+ MVT OddVT = MVT::getIntegerVT(OddParts * PartBits);
+ Hi = getCopyFromParts(DAG, dl,
+ Parts+RoundParts, OddParts, PartVT, OddVT);
+
+ // Combine the round and odd parts.
+ Lo = Val;
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ MVT TotalVT = MVT::getIntegerVT(NumParts * PartBits);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueType().getSizeInBits(),
+ TLI.getPointerTy()));
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
+ }
+ } else if (ValueVT.isVector()) {
+ // Handle a multi-element vector.
+ MVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ TLI.getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT == Parts[0].getValueType() &&
+ "Part type doesn't match part!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
+ PartVT, IntermediateVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate operands
+ // from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate
+ // operands.
+ Val = DAG.getNode(IntermediateVT.isVector() ?
+ ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
+ ValueVT, &Ops[0], NumIntermediates);
+ } else if (PartVT.isFloatingPoint()) {
+ // FP split into multiple FP parts (for ppcf128)
+ assert(ValueVT == MVT(MVT::ppcf128) && PartVT == MVT(MVT::f64) &&
+ "Unexpected split");
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[1]);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
+ } else {
+ // FP split into integer parts (soft fp)
+ assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+ !PartVT.isVector() && "Unexpected split");
+ MVT IntVT = MVT::getIntegerVT(ValueVT.getSizeInBits());
+ Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
+ }
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ PartVT = Val.getValueType();
+
+ if (PartVT == ValueVT)
+ return Val;
+
+ if (PartVT.isVector()) {
+ assert(ValueVT.isVector() && "Unknown vector conversion!");
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+ }
+
+ if (ValueVT.isVector()) {
+ assert(ValueVT.getVectorElementType() == PartVT &&
+ ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial scalar-to-vector conversions should get here!");
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
+ }
+
+ if (PartVT.isInteger() &&
+ ValueVT.isInteger()) {
+ if (ValueVT.bitsLT(PartVT)) {
+ // For a truncate, see if we have any information to
+ // indicate whether the truncated bits will always be
+ // zero or sign-extension.
+ if (AssertOp != ISD::DELETED_NODE)
+ Val = DAG.getNode(AssertOp, dl, PartVT, Val,
+ DAG.getValueType(ValueVT));
+ return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+ } else {
+ return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
+ }
+ }
+
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ if (ValueVT.bitsLT(Val.getValueType()))
+ // FP_ROUND's are always exact here.
+ return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
+ DAG.getIntPtrConstant(1));
+ return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
+ }
+
+ if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+
+ assert(0 && "Unknown mismatch!");
+ return SDValue();
+}
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts. If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
+ SDValue *Parts, unsigned NumParts, MVT PartVT,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy();
+ MVT ValueVT = Val.getValueType();
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned OrigNumParts = NumParts;
+ assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+
+ if (!NumParts)
+ return;
+
+ if (!ValueVT.isVector()) {
+ if (PartVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
+ return;
+ }
+
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
+ } else if (PartVT.isInteger() && ValueVT.isInteger()) {
+ ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+ Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
+ } else {
+ assert(0 && "Unknown mismatch!");
+ }
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartVT != ValueVT);
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ if (PartVT.isInteger() && ValueVT.isInteger()) {
+ ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+ } else {
+ assert(0 && "Unknown mismatch!");
+ }
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
+
+ if (NumParts == 1) {
+ assert(PartVT == ValueVT && "Type conversion failed!");
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
+ DAG.getConstant(RoundBits,
+ TLI.getPointerTy()));
+ getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT);
+ if (TLI.isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+ NumParts = RoundParts;
+ ValueVT = MVT::getIntegerVT(NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::getIntegerVT(ValueVT.getSizeInBits()),
+ Val);
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ MVT ThisVT = MVT::getIntegerVT (ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ ThisVT, Part0,
+ DAG.getConstant(1, PtrVT));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ ThisVT, Part0,
+ DAG.getConstant(0, PtrVT));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ PartVT, Part1);
+ }
+ }
+ }
+
+ if (TLI.isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+
+ return;
+ }
+
+ // Vector ValueVT.
+ if (NumParts == 1) {
+ if (PartVT != ValueVT) {
+ if (PartVT.isVector()) {
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
+ } else {
+ assert(ValueVT.getVectorElementType() == PartVT &&
+ ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial vector-to-scalar conversions should get here!");
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ PartVT, Val,
+ DAG.getConstant(0, PtrVT));
+ }
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Handle a multi-element vector.
+ MVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs = TLI
+ .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+ unsigned NumElements = ValueVT.getVectorNumElements();
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+ // Split the vector into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ if (IntermediateVT.isVector())
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ IntermediateVT, Val,
+ DAG.getConstant(i * (NumElements / NumIntermediates),
+ PtrVT));
+ else
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ IntermediateVT, Val,
+ DAG.getConstant(i, PtrVT));
+
+ // Split the intermediate operands into legal parts.
+ if (NumParts == NumIntermediates) {
+ // If the register was not expanded, promote or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, split each the value into
+ // legal parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ getCopyToParts(DAG, dl, Ops[i], &Parts[i * Factor], Factor, PartVT);
+ }
+}
+
+
+void SelectionDAGLowering::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+ AA = &aa;
+ GFI = gfi;
+ TD = DAG.getTarget().getTargetData();
+}
+
+/// clear - Clear out the curret SelectionDAG and the associated
+/// state and prepare this SelectionDAGLowering object to be used
+/// for a new block. This doesn't clear out information about
+/// additional blocks that are needed to complete switch lowering
+/// or PHI node updating; that information is cleared out as it is
+/// consumed.
+void SelectionDAGLowering::clear() {
+ NodeMap.clear();
+ PendingLoads.clear();
+ PendingExports.clear();
+ DAG.clear();
+ CurDebugLoc = DebugLoc::getUnknownLoc();
+}
+
+/// getRoot - Return the current virtual root of the Selection DAG,
+/// flushing any PendingLoad items. This must be done before emitting
+/// a store or any other node that may need to be ordered after any
+/// prior load instructions.
+///
+SDValue SelectionDAGLowering::getRoot() {
+ if (PendingLoads.empty())
+ return DAG.getRoot();
+
+ if (PendingLoads.size() == 1) {
+ SDValue Root = PendingLoads[0];
+ DAG.setRoot(Root);
+ PendingLoads.clear();
+ return Root;
+ }
+
+ // Otherwise, we have to make a token factor node.
+ SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingLoads[0], PendingLoads.size());
+ PendingLoads.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+/// getControlRoot - Similar to getRoot, but instead of flushing all the
+/// PendingLoad items, flush all the PendingExports items. It is necessary
+/// to do this before emitting a terminator instruction.
+///
+SDValue SelectionDAGLowering::getControlRoot() {
+ SDValue Root = DAG.getRoot();
+
+ if (PendingExports.empty())
+ return Root;
+
+ // Turn all of the CopyToReg chains into one factored node.
+ if (Root.getOpcode() != ISD::EntryToken) {
+ unsigned i = 0, e = PendingExports.size();
+ for (; i != e; ++i) {
+ assert(PendingExports[i].getNode()->getNumOperands() > 1);
+ if (PendingExports[i].getNode()->getOperand(0) == Root)
+ break; // Don't add the root if we already indirectly depend on it.
+ }
+
+ if (i == e)
+ PendingExports.push_back(Root);
+ }
+
+ Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &PendingExports[0],
+ PendingExports.size());
+ PendingExports.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+void SelectionDAGLowering::visit(Instruction &I) {
+ visit(I.getOpcode(), I);
+}
+
+void SelectionDAGLowering::visit(unsigned Opcode, User &I) {
+ // Note: this doesn't use InstVisitor, because it has to work with
+ // ConstantExpr's in addition to instructions.
+ switch (Opcode) {
+ default: assert(0 && "Unknown instruction type encountered!");
+ abort();
+ // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
+#include "llvm/Instruction.def"
+ }
+}
+
+void SelectionDAGLowering::visitAdd(User &I) {
+ if (I.getType()->isFPOrFPVector())
+ visitBinary(I, ISD::FADD);
+ else
+ visitBinary(I, ISD::ADD);
+}
+
+void SelectionDAGLowering::visitMul(User &I) {
+ if (I.getType()->isFPOrFPVector())
+ visitBinary(I, ISD::FMUL);
+ else
+ visitBinary(I, ISD::MUL);
+}
+
+SDValue SelectionDAGLowering::getValue(const Value *V) {
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+ MVT VT = TLI.getValueType(V->getType(), true);
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return N = DAG.getConstant(*CI, VT);
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return N = DAG.getGlobalAddress(GV, VT);
+
+ if (isa<ConstantPointerNull>(C))
+ return N = DAG.getConstant(0, TLI.getPointerTy());
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return N = DAG.getConstantFP(*CFP, VT);
+
+ if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+ return N = DAG.getUNDEF(VT);
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ visit(CE->getOpcode(), *CE);
+ SDValue N1 = NodeMap[V];
+ assert(N1.getNode() && "visit didn't populate the ValueMap!");
+ return N1;
+ }
+
+ if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+ SmallVector<SDValue, 4> Constants;
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI) {
+ SDNode *Val = getValue(*OI).getNode();
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Constants.push_back(SDValue(Val, i));
+ }
+ return DAG.getMergeValues(&Constants[0], Constants.size(),
+ getCurDebugLoc());
+ }
+
+ if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
+ assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+ "Unknown struct or array constant!");
+
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, C->getType(), ValueVTs);
+ unsigned NumElts = ValueVTs.size();
+ if (NumElts == 0)
+ return SDValue(); // empty struct
+ SmallVector<SDValue, 4> Constants(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ MVT EltVT = ValueVTs[i];
+ if (isa<UndefValue>(C))
+ Constants[i] = DAG.getUNDEF(EltVT);
+ else if (EltVT.isFloatingPoint())
+ Constants[i] = DAG.getConstantFP(0, EltVT);
+ else
+ Constants[i] = DAG.getConstant(0, EltVT);
+ }
+ return DAG.getMergeValues(&Constants[0], NumElts, getCurDebugLoc());
+ }
+
+ const VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = VecTy->getNumElements();
+
+ // Now that we know the number and type of the elements, get that number of
+ // elements into the Ops array based on what kind of constant it is.
+ SmallVector<SDValue, 16> Ops;
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+ for (unsigned i = 0; i != NumElements; ++i)
+ Ops.push_back(getValue(CP->getOperand(i)));
+ } else {
+ assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+ MVT EltVT = TLI.getValueType(VecTy->getElementType());
+
+ SDValue Op;
+ if (EltVT.isFloatingPoint())
+ Op = DAG.getConstantFP(0, EltVT);
+ else
+ Op = DAG.getConstant(0, EltVT);
+ Ops.assign(NumElements, Op);
+ }
+
+ // Create a BUILD_VECTOR node.
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
+
+ // If this is a static alloca, generate it as the frameindex instead of
+ // computation.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+ }
+
+ unsigned InReg = FuncInfo.ValueMap[V];
+ assert(InReg && "Value not in map!");
+
+ RegsForValue RFV(TLI, InReg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
+}
+
+
+void SelectionDAGLowering::visitRet(ReturnInst &I) {
+ if (I.getNumOperands() == 0) {
+ DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(),
+ MVT::Other, getControlRoot()));
+ return;
+ }
+
+ SmallVector<SDValue, 8> NewValues;
+ NewValues.push_back(getControlRoot());
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) continue;
+
+ SDValue RetOp = getValue(I.getOperand(i));
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ MVT VT = ValueVTs[j];
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ const Function *F = I.getParent()->getParent();
+ if (F->paramHasAttr(0, Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->paramHasAttr(0, Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ MVT MinVT = TLI.getRegisterType(MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(VT);
+ MVT PartVT = TLI.getRegisterType(VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ getCopyToParts(DAG, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+ &Parts[0], NumParts, PartVT, ExtendKind);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (F->paramHasAttr(0, Attribute::InReg))
+ Flags.setInReg();
+ for (unsigned i = 0; i < NumParts; ++i) {
+ NewValues.push_back(Parts[i]);
+ NewValues.push_back(DAG.getArgFlags(Flags));
+ }
+ }
+ }
+ DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), MVT::Other,
+ &NewValues[0], NewValues.size()));
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) {
+ if (!V->use_empty()) {
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end())
+ CopyValueToVirtualRegister(V, VMI->second);
+ }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) {
+ // No need to export constants.
+ if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+ // Already exported?
+ if (FuncInfo.isExportedInst(V)) return;
+
+ unsigned Reg = FuncInfo.InitializeRegForValue(V);
+ CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V,
+ const BasicBlock *FromBB) {
+ // The operands of the setcc have to be in this block. We don't know
+ // how to export them from some other block.
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ // Can export from current BB.
+ if (VI->getParent() == FromBB)
+ return true;
+
+ // Is already exported, noop.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // If this is an argument, we can export it if the BB is the entry block or
+ // if it is already exported.
+ if (isa<Argument>(V)) {
+ if (FromBB == &FromBB->getParent()->getEntryBlock())
+ return true;
+
+ // Otherwise, can only export this if it is already exported.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // Otherwise, constants can always be exported.
+ return true;
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code. This includes
+/// consideration of global floating-point math flags.
+///
+static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
+ ISD::CondCode FPC, FOC;
+ switch (Pred) {
+ case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+ case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+ case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+ case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+ case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+ case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+ case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+ case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break;
+ case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break;
+ case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+ case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+ case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+ case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+ case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+ case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+ case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break;
+ default:
+ assert(0 && "Invalid FCmp predicate opcode!");
+ FOC = FPC = ISD::SETFALSE;
+ break;
+ }
+ if (FiniteOnlyFPMath())
+ return FOC;
+ else
+ return FPC;
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
+ switch (Pred) {
+ case ICmpInst::ICMP_EQ: return ISD::SETEQ;
+ case ICmpInst::ICMP_NE: return ISD::SETNE;
+ case ICmpInst::ICMP_SLE: return ISD::SETLE;
+ case ICmpInst::ICMP_ULE: return ISD::SETULE;
+ case ICmpInst::ICMP_SGE: return ISD::SETGE;
+ case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+ case ICmpInst::ICMP_SLT: return ISD::SETLT;
+ case ICmpInst::ICMP_ULT: return ISD::SETULT;
+ case ICmpInst::ICMP_SGT: return ISD::SETGT;
+ case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+ default:
+ assert(0 && "Invalid ICmp predicate opcode!");
+ return ISD::SETNE;
+ }
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+///
+void
+SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB) {
+ const BasicBlock *BB = CurBB->getBasicBlock();
+
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ // The operands of the cmp have to be in this block. We don't know
+ // how to export them from some other block. If this is the first block
+ // of the sequence, no exporting is needed.
+ if (CurBB == CurMBB ||
+ (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+ isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+ ISD::CondCode Condition;
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ Condition = getICmpCondCode(IC->getPredicate());
+ } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+ Condition = getFCmpCondCode(FC->getPredicate());
+ } else {
+ Condition = ISD::SETEQ; // silence warning.
+ assert(0 && "Unknown compare instruction");
+ }
+
+ CaseBlock CB(Condition, BOp->getOperand(0),
+ BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+ return;
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(),
+ NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGLowering::FindMergedConditions(Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ unsigned Opc) {
+ // If this node is not part of the or/and tree, emit it as a branch.
+ Instruction *BOp = dyn_cast<Instruction>(Cond);
+ if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+ (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+ !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI = CurBB;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+ }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGLowering::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+ if (Cases.size() != 2) return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ return true;
+}
+
+void SelectionDAGLowering::visitBr(BranchInst &I) {
+ // Update machine-CFG edges.
+ MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ if (I.isUnconditional()) {
+ // Update machine-CFG edges.
+ CurMBB->addSuccessor(Succ0MBB);
+
+ // If this is not a fall-through branch, emit the branch.
+ if (Succ0MBB != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Succ0MBB)));
+ return;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ Value *CondVal = I.getCondition();
+ MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ //
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+ if (BOp->hasOneUse() &&
+ (BOp->getOpcode() == Instruction::And ||
+ BOp->getOpcode() == Instruction::Or)) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
+ // If the compares in later blocks need to use values not currently
+ // exported from this block, export them now. This block should always
+ // be the first entry.
+ assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (ShouldEmitAsBranches(SwitchCases)) {
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+ }
+
+ // Emit the branch for this block.
+ visitSwitchCase(SwitchCases[0]);
+ SwitchCases.erase(SwitchCases.begin());
+ return;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+ CurMBB->getParent()->erase(SwitchCases[i].ThisBB);
+
+ SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(),
+ NULL, Succ0MBB, Succ1MBB, CurMBB);
+ // Use visitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ visitSwitchCase(CB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
+ SDValue Cond;
+ SDValue CondLHS = getValue(CB.CmpLHS);
+ DebugLoc dl = getCurDebugLoc();
+
+ // Build the setcc now.
+ if (CB.CmpMHS == NULL) {
+ // Fold "(X == true)" to X and "(X == false)" to !X to
+ // handle common cases produced by branch lowering.
+ if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ)
+ Cond = CondLHS;
+ else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) {
+ SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+ } else
+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+ } else {
+ assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ SDValue CmpOp = getValue(CB.CmpMHS);
+ MVT VT = CmpOp.getValueType();
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+ Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+ ISD::SETLE);
+ } else {
+ SDValue SUB = DAG.getNode(ISD::SUB, dl,
+ VT, CmpOp, DAG.getConstant(Low, VT));
+ Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+ DAG.getConstant(High-Low, VT), ISD::SETULE);
+ }
+ }
+
+ // Update successor info
+ CurMBB->addSuccessor(CB.TrueBB);
+ CurMBB->addSuccessor(CB.FalseBB);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == NextBlock) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ SDValue True = DAG.getConstant(1, Cond.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+ }
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(CB.TrueBB));
+
+ // If the branch was constant folded, fix up the CFG.
+ if (BrCond.getOpcode() == ISD::BR) {
+ CurMBB->removeSuccessor(CB.FalseBB);
+ DAG.setRoot(BrCond);
+ } else {
+ // Otherwise, go ahead and insert the false branch.
+ if (BrCond == getControlRoot())
+ CurMBB->removeSuccessor(CB.TrueBB);
+
+ if (CB.FalseBB == NextBlock)
+ DAG.setRoot(BrCond);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB)));
+ }
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGLowering::visitJumpTable(JumpTable &JT) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ MVT PTy = TLI.getPointerTy();
+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ JT.Reg, PTy);
+ SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+ DAG.setRoot(DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
+ MVT::Other, Index.getValue(1),
+ Table, Index));
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
+ JumpTableHeader &JTH) {
+ // Subtract the lowest switch case value from the value being switched on and
+ // conditional branch to default mbb if the result is greater than the
+ // difference between smallest and largest cases.
+ SDValue SwitchOp = getValue(JTH.SValue);
+ MVT VT = SwitchOp.getValueType();
+ SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(JTH.First, VT));
+
+ // The SDNode we just created, which holds the value being switched on minus
+ // the the smallest case value, needs to be copied to a virtual register so it
+ // can be used as an index into the jump table in a subsequent basic block.
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ if (VT.bitsGT(TLI.getPointerTy()))
+ SwitchOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(), SUB);
+ else
+ SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(), SUB);
+
+ unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ JumpTableReg, SwitchOp);
+ JT.Reg = JumpTableReg;
+
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the largest
+ // case in the switch.
+ SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(SUB.getValueType()), SUB,
+ DAG.getConstant(JTH.Last-JTH.First,VT),
+ ISD::SETUGT);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ if (JT.MBB == NextBlock)
+ DAG.setRoot(BrCond);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB)));
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {
+ // Subtract the minimum value
+ SDValue SwitchOp = getValue(B.SValue);
+ MVT VT = SwitchOp.getValueType();
+ SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+ DAG.getConstant(B.First, VT));
+
+ // Check range
+ SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(SUB.getValueType()),
+ SUB, DAG.getConstant(B.Range, VT),
+ ISD::SETUGT);
+
+ SDValue ShiftOp;
+ if (VT.bitsGT(TLI.getPointerTy()))
+ ShiftOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(), SUB);
+ else
+ ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(), SUB);
+
+ B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+ B.Reg, ShiftOp);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+ CurMBB->addSuccessor(B.Default);
+ CurMBB->addSuccessor(MBB);
+
+ SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, CopyTo, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+
+ if (MBB == NextBlock)
+ DAG.setRoot(BrRange);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
+ DAG.getBasicBlock(MBB)));
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB,
+ unsigned Reg,
+ BitTestCase &B) {
+ // Make desired shift
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
+ TLI.getPointerTy());
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ DAG.getConstant(1, TLI.getPointerTy()),
+ ShiftOp);
+
+ // Emit bit tests and jumps
+ SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ TLI.getPointerTy(), SwitchVal,
+ DAG.getConstant(B.Mask, TLI.getPointerTy()));
+ SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(AndOp.getValueType()),
+ AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+ ISD::SETNE);
+
+ CurMBB->addSuccessor(B.TargetBB);
+ CurMBB->addSuccessor(NextMBB);
+
+ SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ AndCmp, DAG.getBasicBlock(B.TargetBB));
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ if (NextMBB == NextBlock)
+ DAG.setRoot(BrAnd);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
+ DAG.getBasicBlock(NextMBB)));
+}
+
+void SelectionDAGLowering::visitInvoke(InvokeInst &I) {
+ // Retrieve successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+ MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ const Value *Callee(I.getCalledValue());
+ if (isa<InlineAsm>(Callee))
+ visitInlineAsm(&I);
+ else
+ LowerCallTo(&I, getValue(Callee), false, LandingPad);
+
+ // If the value of the invoke is used outside of its defining block, make it
+ // available as a virtual register.
+ CopyToExportRegsIfNeeded(&I);
+
+ // Update successor info
+ CurMBB->addSuccessor(Return);
+ CurMBB->addSuccessor(LandingPad);
+
+ // Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGLowering::visitUnwind(UnwindInst &I) {
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Size is the number of Cases represented by this range.
+ size_t Size = CR.Range.second - CR.Range.first;
+ if (Size > 3)
+ return false;
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ // TODO: If any two of the cases has the same destination, and if one value
+ // is the same as the other, but has one bit unset that the other has set,
+ // use bit manipulation to do two compares at once. For example:
+ // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+
+ // Rearrange the case blocks so that the last one falls through if possible.
+ if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+ // The last case block won't fall through into 'NextBlock' if we emit the
+ // branches in this order. See if rearranging a case value would help.
+ for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+ if (I->BB == NextBlock) {
+ std::swap(*I, BackCase);
+ break;
+ }
+ }
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the Case's target mbb if the value being switched on SV is equal
+ // to C.
+ MachineBasicBlock *CurBlock = CR.CaseBB;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+ MachineBasicBlock *FallThrough;
+ if (I != E-1) {
+ FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
+ CurMF->insert(BBI, FallThrough);
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ } else {
+ // If the last case doesn't match, go to the default block.
+ FallThrough = Default;
+ }
+
+ Value *RHS, *LHS, *MHS;
+ ISD::CondCode CC;
+ if (I->High == I->Low) {
+ // This is just small small case range :) containing exactly 1 case
+ CC = ISD::SETEQ;
+ LHS = SV; RHS = I->High; MHS = NULL;
+ } else {
+ CC = ISD::SETLE;
+ LHS = I->Low; MHS = SV; RHS = I->High;
+ }
+ CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+ // If emitting the first comparison, just call visitSwitchCase to emit the
+ // code into the current block. Otherwise, push the CaseBlock onto the
+ // vector to be later processed by SDISel, and insert the node's MBB
+ // before the next MBB.
+ if (CurBlock == CurMBB)
+ visitSwitchCase(CB);
+ else
+ SwitchCases.push_back(CB);
+
+ CurBlock = FallThrough;
+ }
+
+ return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+ return !DisableJumpTables &&
+ (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+static APInt ComputeRange(const APInt &First, const APInt &Last) {
+ APInt LastExt(Last), FirstExt(First);
+ uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
+ LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+ return (LastExt - FirstExt + 1ULL);
+}
+
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue();
+
+ size_t TSize = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ if (!areJTsAllowed(TLI) || TSize <= 3)
+ return false;
+
+ APInt Range = ComputeRange(First, Last);
+ double Density = (double)TSize / Range.roundToDouble();
+ if (Density < 0.4)
+ return false;
+
+ DEBUG(errs() << "Lowering jump table\n"
+ << "First entry: " << First << ". Last entry: " << Last << '\n'
+ << "Range: " << Range
+ << "Size: " << TSize << ". Density: " << Density << "\n\n");
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Create a new basic block to hold the code for loading the address
+ // of the jump table, and jumping to it. Update successor information;
+ // we will either branch to the default case for the switch, or the jump
+ // table.
+ MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, JumpTableBB);
+ CR.CaseBB->addSuccessor(Default);
+ CR.CaseBB->addSuccessor(JumpTableBB);
+
+ // Build a vector of destination BBs, corresponding to each target
+ // of the jump table. If the value of the jump table slot corresponds to
+ // a case statement, push the case's BB onto the vector, otherwise, push
+ // the default BB.
+ std::vector<MachineBasicBlock*> DestBBs;
+ APInt TEI = First;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+ const APInt& Low = cast<ConstantInt>(I->Low)->getValue();
+ const APInt& High = cast<ConstantInt>(I->High)->getValue();
+
+ if (Low.sle(TEI) && TEI.sle(High)) {
+ DestBBs.push_back(I->BB);
+ if (TEI==High)
+ ++I;
+ } else {
+ DestBBs.push_back(Default);
+ }
+ }
+
+ // Update successor info. Add one edge to each unique successor.
+ BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
+ for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
+ E = DestBBs.end(); I != E; ++I) {
+ if (!SuccsHandled[(*I)->getNumber()]) {
+ SuccsHandled[(*I)->getNumber()] = true;
+ JumpTableBB->addSuccessor(*I);
+ }
+ }
+
+ // Create a jump table index for this jump table, or return an existing
+ // one.
+ unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs);
+
+ // Set the jump table information so that we can codegen it as a second
+ // MachineBasicBlock
+ JumpTable JT(-1U, JTI, JumpTableBB, Default);
+ JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));
+ if (CR.CaseBB == CurMBB)
+ visitJumpTableHeader(JT, JTH);
+
+ JTCases.push_back(JumpTableBlock(JTH, JT));
+
+ return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Size is the number of Cases represented by this range.
+ unsigned Size = CR.Range.second - CR.Range.first;
+
+ const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue();
+ double FMetric = 0;
+ CaseItr Pivot = CR.Range.first + Size/2;
+
+ // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+ // (heuristically) allow us to emit JumpTable's later.
+ size_t TSize = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ size_t LSize = FrontCase.size();
+ size_t RSize = TSize-LSize;
+ DEBUG(errs() << "Selecting best pivot: \n"
+ << "First: " << First << ", Last: " << Last <<'\n'
+ << "LSize: " << LSize << ", RSize: " << RSize << '\n');
+ for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+ J!=E; ++I, ++J) {
+ const APInt& LEnd = cast<ConstantInt>(I->High)->getValue();
+ const APInt& RBegin = cast<ConstantInt>(J->Low)->getValue();
+ APInt Range = ComputeRange(LEnd, RBegin);
+ assert((Range - 2ULL).isNonNegative() &&
+ "Invalid case distance");
+ double LDensity = (double)LSize / (LEnd - First + 1ULL).roundToDouble();
+ double RDensity = (double)RSize / (Last - RBegin + 1ULL).roundToDouble();
+ double Metric = Range.logBase2()*(LDensity+RDensity);
+ // Should always split in some non-trivial place
+ DEBUG(errs() <<"=>Step\n"
+ << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
+ << "LDensity: " << LDensity
+ << ", RDensity: " << RDensity << '\n'
+ << "Metric: " << Metric << '\n');
+ if (FMetric < Metric) {
+ Pivot = J;
+ FMetric = Metric;
+ DEBUG(errs() << "Current metric set to: " << FMetric << '\n');
+ }
+
+ LSize += J->size();
+ RSize -= J->size();
+ }
+ if (areJTsAllowed(TLI)) {
+ // If our case is dense we *really* should handle it earlier!
+ assert((FMetric > 0) && "Should handle dense range earlier!");
+ } else {
+ Pivot = CR.Range.first + Size/2;
+ }
+
+ CaseRange LHSR(CR.Range.first, Pivot);
+ CaseRange RHSR(Pivot, CR.Range.second);
+ Constant *C = Pivot->Low;
+ MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+
+ // We know that we branch to the LHS if the Value being switched on is
+ // less than the Pivot value, C. We use this to optimize our binary
+ // tree a bit, by recognizing that if SV is greater than or equal to the
+ // LHS's Case Value, and that Case Value is exactly one less than the
+ // Pivot's Value, then we can branch directly to the LHS's Target,
+ // rather than creating a leaf node for it.
+ if ((LHSR.second - LHSR.first) == 1 &&
+ LHSR.first->High == CR.GE &&
+ cast<ConstantInt>(C)->getValue() ==
+ (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+ TrueBB = LHSR.first->BB;
+ } else {
+ TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, TrueBB);
+ WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Similar to the optimization above, if the Value being switched on is
+ // known to be less than the Constant CR.LT, and the current Case Value
+ // is CR.LT - 1, then we can branch directly to the target block for
+ // the current Case Value, rather than emitting a RHS leaf node for it.
+ if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+ cast<ConstantInt>(RHSR.first->Low)->getValue() ==
+ (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+ FalseBB = RHSR.first->BB;
+ } else {
+ FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, FalseBB);
+ WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the LHS node if the value being switched on SV is less than C.
+ // Otherwise, branch to LHS.
+ CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+
+ if (CR.CaseBB == CurMBB)
+ visitSwitchCase(CB);
+ else
+ SwitchCases.push_back(CB);
+
+ return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default){
+ unsigned IntPtrBits = TLI.getPointerTy().getSizeInBits();
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ // If target does not have legal shift left, do not emit bit tests at all.
+ if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+ return false;
+
+ size_t numCmps = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I) {
+ // Single case counts one, case range - two.
+ numCmps += (I->Low == I->High ? 1 : 2);
+ }
+
+ // Count unique destinations
+ SmallSet<MachineBasicBlock*, 4> Dests;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ Dests.insert(I->BB);
+ if (Dests.size() > 3)
+ // Don't bother the code below, if there are too much unique destinations
+ return false;
+ }
+ DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n'
+ << "Total number of comparisons: " << numCmps << '\n');
+
+ // Compute span of values.
+ const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+ APInt cmpRange = maxValue - minValue;
+
+ DEBUG(errs() << "Compare range: " << cmpRange << '\n'
+ << "Low bound: " << minValue << '\n'
+ << "High bound: " << maxValue << '\n');
+
+ if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||
+ (!(Dests.size() == 1 && numCmps >= 3) &&
+ !(Dests.size() == 2 && numCmps >= 5) &&
+ !(Dests.size() >= 3 && numCmps >= 6)))
+ return false;
+
+ DEBUG(errs() << "Emitting bit tests\n");
+ APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
+
+ // Optimize the case where all the case values fit in a
+ // word without having to subtract minValue. In this case,
+ // we can optimize away the subtraction.
+ if (minValue.isNonNegative() &&
+ maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {
+ cmpRange = maxValue;
+ } else {
+ lowBound = minValue;
+ }
+
+ CaseBitsVector CasesBits;
+ unsigned i, count = 0;
+
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ MachineBasicBlock* Dest = I->BB;
+ for (i = 0; i < count; ++i)
+ if (Dest == CasesBits[i].BB)
+ break;
+
+ if (i == count) {
+ assert((count < 3) && "Too much destinations to test!");
+ CasesBits.push_back(CaseBits(0, Dest, 0));
+ count++;
+ }
+
+ const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
+ const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+
+ uint64_t lo = (lowValue - lowBound).getZExtValue();
+ uint64_t hi = (highValue - lowBound).getZExtValue();
+
+ for (uint64_t j = lo; j <= hi; j++) {
+ CasesBits[i].Mask |= 1ULL << j;
+ CasesBits[i].Bits++;
+ }
+
+ }
+ std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+
+ BitTestInfo BTC;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ DEBUG(errs() << "Cases:\n");
+ for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+ DEBUG(errs() << "Mask: " << CasesBits[i].Mask
+ << ", Bits: " << CasesBits[i].Bits
+ << ", BB: " << CasesBits[i].BB << '\n');
+
+ MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+ CurMF->insert(BBI, CaseBB);
+ BTC.push_back(BitTestCase(CasesBits[i].Mask,
+ CaseBB,
+ CasesBits[i].BB));
+
+ // Put SV in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(SV);
+ }
+
+ BitTestBlock BTB(lowBound, cmpRange, SV,
+ -1U, (CR.CaseBB == CurMBB),
+ CR.CaseBB, Default, BTC);
+
+ if (CR.CaseBB == CurMBB)
+ visitBitTestHeader(BTB);
+
+ BitTestCases.push_back(BTB);
+
+ return true;
+}
+
+
+/// Clusterify - Transform simple list of Cases into list of CaseRange's
+size_t SelectionDAGLowering::Clusterify(CaseVector& Cases,
+ const SwitchInst& SI) {
+ size_t numCmps = 0;
+
+ // Start with "simple" cases
+ for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
+ MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+ Cases.push_back(Case(SI.getSuccessorValue(i),
+ SI.getSuccessorValue(i),
+ SMBB));
+ }
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size() >= 2)
+ // Must recompute end() each iteration because it may be
+ // invalidated by erase if we hold on to it
+ for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+ const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
+ const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
+ MachineBasicBlock* nextBB = J->BB;
+ MachineBasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+
+ MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+ // If there is only the default destination, branch to it if it is not the
+ // next basic block. Otherwise, just fall through.
+ if (SI.getNumOperands() == 2) {
+ // Update machine-CFG edges.
+
+ // If this is not a fall-through branch, emit the branch.
+ CurMBB->addSuccessor(Default);
+ if (Default != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Default)));
+ return;
+ }
+
+ // If there are any non-default case statements, create a vector of Cases
+ // representing each one, and sort the vector so that we can efficiently
+ // create a binary search tree from them.
+ CaseVector Cases;
+ size_t numCmps = Clusterify(Cases, SI);
+ DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << '\n');
+ numCmps = 0;
+
+ // Get the Value to be switched on and default basic blocks, which will be
+ // inserted into CaseBlock records, representing basic blocks in the binary
+ // search tree.
+ Value *SV = SI.getOperand(0);
+
+ // Push the initial CaseRec onto the worklist
+ CaseRecVector WorkList;
+ WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
+
+ while (!WorkList.empty()) {
+ // Grab a record representing a case range to process off the worklist
+ CaseRec CR = WorkList.back();
+ WorkList.pop_back();
+
+ if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
+ continue;
+
+ // If the range has few cases (two or less) emit a series of specific
+ // tests.
+ if (handleSmallSwitchRange(CR, WorkList, SV, Default))
+ continue;
+
+ // If the switch has more than 5 blocks, and at least 40% dense, and the
+ // target supports indirect branches, then emit a jump table rather than
+ // lowering the switch to a binary tree of conditional branches.
+ if (handleJTSwitchCase(CR, WorkList, SV, Default))
+ continue;
+
+ // Emit binary tree. We need to pick a pivot, and push left and right ranges
+ // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+ handleBTSplitSwitchCase(CR, WorkList, SV, Default);
+ }
+}
+
+
+void SelectionDAGLowering::visitSub(User &I) {
+ // -0.0 - X --> fneg
+ const Type *Ty = I.getType();
+ if (isa<VectorType>(Ty)) {
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
+ const VectorType *DestTy = cast<VectorType>(I.getType());
+ const Type *ElTy = DestTy->getElementType();
+ if (ElTy->isFloatingPoint()) {
+ unsigned VL = DestTy->getNumElements();
+ std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
+ Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
+ if (CV == CNZ) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
+ }
+ }
+ }
+ }
+ if (Ty->isFloatingPoint()) {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
+ if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
+ }
+ }
+
+ visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB);
+}
+
+void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ if (!isa<VectorType>(I.getType()) &&
+ Op2.getValueType() != TLI.getShiftAmountTy()) {
+ // If the operand is smaller than the shift count type, promote it.
+ if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType()))
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
+ TLI.getShiftAmountTy(), Op2);
+ // If the operand is larger than the shift count type but the shift
+ // count type has enough bits to represent any shift value, truncate
+ // it now. This is a common case and it exposes the truncate to
+ // optimization early.
+ else if (TLI.getShiftAmountTy().getSizeInBits() >=
+ Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getShiftAmountTy(), Op2);
+ // Otherwise we'll need to temporarily settle for some other
+ // convenient type; type legalization will make adjustments as
+ // needed.
+ else if (TLI.getPointerTy().bitsLT(Op2.getValueType()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(), Op2);
+ else if (TLI.getPointerTy().bitsGT(Op2.getValueType()))
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(), Op2);
+ }
+
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
+ Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGLowering::visitICmp(User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode = getICmpCondCode(predicate);
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(),MVT::i1, Op1, Op2, Opcode));
+}
+
+void SelectionDAGLowering::visitFCmp(User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ setValue(&I, DAG.getSetCC(getCurDebugLoc(), MVT::i1, Op1, Op2, Condition));
+}
+
+void SelectionDAGLowering::visitVICmp(User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (VICmpInst *IC = dyn_cast<VICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode = getICmpCondCode(predicate);
+ setValue(&I, DAG.getVSetCC(getCurDebugLoc(), Op1.getValueType(),
+ Op1, Op2, Opcode));
+}
+
+void SelectionDAGLowering::visitVFCmp(User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (VFCmpInst *FC = dyn_cast<VFCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ MVT DestVT = TLI.getValueType(I.getType());
+
+ setValue(&I, DAG.getVSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+}
+
+void SelectionDAGLowering::visitSelect(User &I) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues != 0) {
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Cond = getValue(I.getOperand(0));
+ SDValue TrueVal = getValue(I.getOperand(1));
+ SDValue FalseVal = getValue(I.getOperand(2));
+
+ for (unsigned i = 0; i != NumValues; ++i)
+ Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
+ TrueVal.getValueType(), Cond,
+ SDValue(TrueVal.getNode(), TrueVal.getResNo() + i),
+ SDValue(FalseVal.getNode(), FalseVal.getResNo() + i));
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+ }
+}
+
+
+void SelectionDAGLowering::visitTrunc(User &I) {
+ // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitZExt(User &I) {
+ // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitSExt(User &I) {
+ // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // SExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPTrunc(User &I) {
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
+ DestVT, N, DAG.getIntPtrConstant(0)));
+}
+
+void SelectionDAGLowering::visitFPExt(User &I){
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPToUI(User &I) {
+ // FPToUI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPToSI(User &I) {
+ // FPToSI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitUIToFP(User &I) {
+ // UIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitSIToFP(User &I){
+ // SIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGLowering::visitPtrToInt(User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ MVT SrcVT = N.getValueType();
+ MVT DestVT = TLI.getValueType(I.getType());
+ SDValue Result;
+ if (DestVT.bitsLT(SrcVT))
+ Result = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N);
+ else
+ // Note: ZERO_EXTEND can handle cases where the sizes are equal too
+ Result = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N);
+ setValue(&I, Result);
+}
+
+void SelectionDAGLowering::visitIntToPtr(User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ MVT SrcVT = N.getValueType();
+ MVT DestVT = TLI.getValueType(I.getType());
+ if (DestVT.bitsLT(SrcVT))
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+ else
+ // Note: ZERO_EXTEND can handle cases where the sizes are equal too
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ DestVT, N));
+}
+
+void SelectionDAGLowering::visitBitCast(User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ MVT DestVT = TLI.getValueType(I.getType());
+
+ // BitCast assures us that source and destination are the same size so this
+ // is either a BIT_CONVERT or a no-op.
+ if (DestVT != N.getValueType())
+ setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ DestVT, N)); // convert types
+ else
+ setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGLowering::visitInsertElement(User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InVal = getValue(I.getOperand(1));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(2)));
+
+ setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()),
+ InVec, InVal, InIdx));
+}
+
+void SelectionDAGLowering::visitExtractElement(User &I) {
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ getValue(I.getOperand(1)));
+ setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+
+// Utility for visitShuffleVector - Returns true if the mask is mask starting
+// from SIndx and increasing to the element length (undefs are allowed).
+static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
+ unsigned MaskNumElts = Mask.size();
+ for (unsigned i = 0; i != MaskNumElts; ++i)
+ if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+ return false;
+ return true;
+}
+
+void SelectionDAGLowering::visitShuffleVector(User &I) {
+ SmallVector<int, 8> Mask;
+ SDValue Src1 = getValue(I.getOperand(0));
+ SDValue Src2 = getValue(I.getOperand(1));
+
+ // Convert the ConstantVector mask operand into an array of ints, with -1
+ // representing undef values.
+ SmallVector<Constant*, 8> MaskElts;
+ cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
+ unsigned MaskNumElts = MaskElts.size();
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ if (isa<UndefValue>(MaskElts[i]))
+ Mask.push_back(-1);
+ else
+ Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
+ }
+
+ MVT VT = TLI.getValueType(I.getType());
+ MVT SrcVT = Src1.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+ if (SrcNumElts == MaskNumElts) {
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &Mask[0]));
+ return;
+ }
+
+ // Normalize the shuffle vector since mask and vector length don't match.
+ if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+ // Mask is longer than the source vectors and is a multiple of the source
+ // vectors. We can use concatenate vector to make the mask and vectors
+ // lengths match.
+ if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src1, Src2));
+ return;
+ }
+
+ // Pad both vectors with undefs to make them the same length as the mask.
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+ bool Src1U = Src1.getOpcode() == ISD::UNDEF;
+ bool Src2U = Src2.getOpcode() == ISD::UNDEF;
+ SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+ SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+ SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+ MOps1[0] = Src1;
+ MOps2[0] = Src2;
+
+ Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps1[0], NumConcat);
+ Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurDebugLoc(), VT,
+ &MOps2[0], NumConcat);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < (int)SrcNumElts)
+ MappedOps.push_back(Idx);
+ else
+ MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+ }
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+
+ if (SrcNumElts > MaskNumElts) {
+ // Analyze the access pattern of the vector to see if we can extract
+ // two subvectors and do the shuffle. The analysis is done by calculating
+ // the range of elements the mask access on both vectors.
+ int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+ int MaxRange[2] = {-1, -1};
+
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ int Input = 0;
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= (int)SrcNumElts) {
+ Input = 1;
+ Idx -= SrcNumElts;
+ }
+ if (Idx > MaxRange[Input])
+ MaxRange[Input] = Idx;
+ if (Idx < MinRange[Input])
+ MinRange[Input] = Idx;
+ }
+
+ // Check if the access is smaller than the vector size and can we find
+ // a reasonable extract index.
+ int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not Extract.
+ int StartIdx[2]; // StartIdx to extract from
+ for (int Input=0; Input < 2; ++Input) {
+ if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+ RangeUse[Input] = 0; // Unused
+ StartIdx[Input] = 0;
+ } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
+ // Fits within range but we should see if we can find a good
+ // start index that is a multiple of the mask length.
+ if (MaxRange[Input] < (int)MaskNumElts) {
+ RangeUse[Input] = 1; // Extract from beginning of the vector
+ StartIdx[Input] = 0;
+ } else {
+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+ if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+ StartIdx[Input] + MaskNumElts < SrcNumElts)
+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ }
+ }
+ }
+
+ if (RangeUse[0] == 0 && RangeUse[0] == 0) {
+ setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+ return;
+ }
+ else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+ // Extract appropriate subvector and generate a vector shuffle
+ for (int Input=0; Input < 2; ++Input) {
+ SDValue& Src = Input == 0 ? Src1 : Src2;
+ if (RangeUse[Input] == 0) {
+ Src = DAG.getUNDEF(VT);
+ } else {
+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
+ Src, DAG.getIntPtrConstant(StartIdx[Input]));
+ }
+ }
+ // Calculate new mask.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < 0)
+ MappedOps.push_back(Idx);
+ else if (Idx < (int)SrcNumElts)
+ MappedOps.push_back(Idx - StartIdx[0]);
+ else
+ MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+ }
+ setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+ &MappedOps[0]));
+ return;
+ }
+ }
+
+ // We can't use either concat vectors or extract subvectors so fall back to
+ // replacing the shuffle with extract and build vector.
+ // to insert and build vector.
+ MVT EltVT = VT.getVectorElementType();
+ MVT PtrVT = TLI.getPointerTy();
+ SmallVector<SDValue,8> Ops;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ } else {
+ int Idx = Mask[i];
+ if (Idx < (int)SrcNumElts)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src1, DAG.getConstant(Idx, PtrVT)));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+ EltVT, Src2,
+ DAG.getConstant(Idx - SrcNumElts, PtrVT)));
+ }
+ }
+ setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size()));
+}
+
+void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ const Value *Op1 = I.getOperand(1);
+ const Type *AggTy = I.getType();
+ const Type *ValTy = Op1->getType();
+ bool IntoUndef = isa<UndefValue>(Op0);
+ bool FromUndef = isa<UndefValue>(Op1);
+
+ unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
+ I.idx_begin(), I.idx_end());
+
+ SmallVector<MVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
+ SmallVector<MVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumAggValues = AggValueVTs.size();
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumAggValues);
+
+ SDValue Agg = getValue(Op0);
+ SDValue Val = getValue(Op1);
+ unsigned i = 0;
+ // Copy the beginning value(s) from the original aggregate.
+ for (; i != LinearIndex; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+ // Copy values from the inserted value(s).
+ for (; i != LinearIndex + NumValValues; ++i)
+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ // Copy remaining value(s) from the original aggregate.
+ for (; i != NumAggValues; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&AggValueVTs[0], NumAggValues),
+ &Values[0], NumAggValues));
+}
+
+void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ const Type *AggTy = Op0->getType();
+ const Type *ValTy = I.getType();
+ bool OutOfUndef = isa<UndefValue>(Op0);
+
+ unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
+ I.idx_begin(), I.idx_end());
+
+ SmallVector<MVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumValValues);
+
+ SDValue Agg = getValue(Op0);
+ // Copy out the selected value(s).
+ for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+ Values[i - LinearIndex] =
+ OutOfUndef ?
+ DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValValueVTs[0], NumValValues),
+ &Values[0], NumValValues));
+}
+
+
+void SelectionDAGLowering::visitGetElementPtr(User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ const Type *Ty = I.getOperand(0)->getType();
+
+ for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
+ OI != E; ++OI) {
+ Value *Idx = *OI;
+ if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ DAG.getIntPtrConstant(Offset));
+ }
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->getZExtValue() == 0) continue;
+ uint64_t Offs =
+ TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ SDValue OffsVal;
+ unsigned PtrBits = TLI.getPointerTy().getSizeInBits();
+ if (PtrBits < 64) {
+ OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ DAG.getConstant(Offs, MVT::i64));
+ } else
+ OffsVal = DAG.getIntPtrConstant(Offs);
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+ OffsVal);
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = TD->getTypeAllocSize(Ty);
+ SDValue IdxN = getValue(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend
+ // it.
+ if (IdxN.getValueType().bitsLT(N.getValueType()))
+ IdxN = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(),
+ N.getValueType(), IdxN);
+ else if (IdxN.getValueType().bitsGT(N.getValueType()))
+ IdxN = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ N.getValueType(), IdxN);
+
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementSize != 1) {
+ if (isPowerOf2_64(ElementSize)) {
+ unsigned Amt = Log2_64(ElementSize);
+ IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, TLI.getPointerTy()));
+ } else {
+ SDValue Scale = DAG.getIntPtrConstant(ElementSize);
+ IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
+ N.getValueType(), IdxN, Scale);
+ }
+ }
+
+ N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ N.getValueType(), N, IdxN);
+ }
+ }
+ setValue(&I, N);
+}
+
+void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
+ // If this is a fixed sized alloca in the entry block of the function,
+ // allocate it statically on the stack.
+ if (FuncInfo.StaticAllocaMap.count(&I))
+ return; // getValue will auto-populate this.
+
+ const Type *Ty = I.getAllocatedType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ I.getAlignment());
+
+ SDValue AllocSize = getValue(I.getArraySize());
+
+ AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
+ AllocSize,
+ DAG.getConstant(TySize, AllocSize.getValueType()));
+
+
+
+ MVT IntPtr = TLI.getPointerTy();
+ if (IntPtr.bitsLT(AllocSize.getValueType()))
+ AllocSize = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+ IntPtr, AllocSize);
+ else if (IntPtr.bitsGT(AllocSize.getValueType()))
+ AllocSize = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+ IntPtr, AllocSize);
+
+ // Handle alignment. If the requested alignment is less than or equal to
+ // the stack alignment, ignore it. If the size is greater than or equal to
+ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+ unsigned StackAlign =
+ TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+ if (Align <= StackAlign)
+ Align = 0;
+
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size.
+ AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(StackAlign-1));
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+
+ SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+ SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+ SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
+ VTs, Ops, 3);
+ setValue(&I, DSA);
+ DAG.setRoot(DSA.getValue(1));
+
+ // Inform the Frame Information that we have just allocated a variable-sized
+ // object.
+ CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject();
+}
+
+void SelectionDAGLowering::visitLoad(LoadInst &I) {
+ const Value *SV = I.getOperand(0);
+ SDValue Ptr = getValue(SV);
+
+ const Type *Ty = I.getType();
+ bool isVolatile = I.isVolatile();
+ unsigned Alignment = I.getAlignment();
+
+ SmallVector<MVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ SDValue Root;
+ bool ConstantMemory = false;
+ if (I.isVolatile())
+ // Serialize volatile loads with other side effects.
+ Root = getRoot();
+ else if (AA->pointsToConstantMemory(SV)) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ Root = DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = DAG.getRoot();
+ }
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+ MVT PtrVT = Ptr.getValueType();
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
+ DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT)),
+ SV, Offsets[i],
+ isVolatile, Alignment);
+ Values[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ if (!ConstantMemory) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other,
+ &Chains[0], NumValues);
+ if (isVolatile)
+ DAG.setRoot(Chain);
+ else
+ PendingLoads.push_back(Chain);
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+ DAG.getVTList(&ValueVTs[0], NumValues),
+ &Values[0], NumValues));
+}
+
+
+void SelectionDAGLowering::visitStore(StoreInst &I) {
+ Value *SrcV = I.getOperand(0);
+ Value *PtrV = I.getOperand(1);
+
+ SmallVector<MVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ // Get the lowered operands. Note that we do this after
+ // checking if NumResults is zero, because with zero results
+ // the operands won't have values in the map.
+ SDValue Src = getValue(SrcV);
+ SDValue Ptr = getValue(PtrV);
+
+ SDValue Root = getRoot();
+ SmallVector<SDValue, 4> Chains(NumValues);
+ MVT PtrVT = Ptr.getValueType();
+ bool isVolatile = I.isVolatile();
+ unsigned Alignment = I.getAlignment();
+ for (unsigned i = 0; i != NumValues; ++i)
+ Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
+ SDValue(Src.getNode(), Src.getResNo() + i),
+ DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], PtrVT)),
+ PtrV, Offsets[i],
+ isVolatile, Alignment);
+
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues));
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
+ unsigned Intrinsic) {
+ bool HasChain = !I.doesNotAccessMemory();
+ bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+ // Build the operand list.
+ SmallVector<SDValue, 8> Ops;
+ if (HasChain) { // If this intrinsic has side-effects, chainify it.
+ if (OnlyLoad) {
+ // We don't need to serialize loads against other loads.
+ Ops.push_back(DAG.getRoot());
+ } else {
+ Ops.push_back(getRoot());
+ }
+ }
+
+ // Info is set by getTgtMemInstrinsic
+ TargetLowering::IntrinsicInfo Info;
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+ // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+ if (!IsTgtIntrinsic)
+ Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+
+ // Add all operands of the call to the operand list.
+ for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+ SDValue Op = getValue(I.getOperand(i));
+ assert(TLI.isTypeLegal(Op.getValueType()) &&
+ "Intrinsic uses a non-legal type?");
+ Ops.push_back(Op);
+ }
+
+ std::vector<MVT> VTArray;
+ if (I.getType() != Type::VoidTy) {
+ MVT VT = TLI.getValueType(I.getType());
+ if (VT.isVector()) {
+ const VectorType *DestTy = cast<VectorType>(I.getType());
+ MVT EltVT = TLI.getValueType(DestTy->getElementType());
+
+ VT = MVT::getVectorVT(EltVT, DestTy->getNumElements());
+ assert(VT != MVT::Other && "Intrinsic uses a non-legal type?");
+ }
+
+ assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?");
+ VTArray.push_back(VT);
+ }
+ if (HasChain)
+ VTArray.push_back(MVT::Other);
+
+ SDVTList VTs = DAG.getVTList(&VTArray[0], VTArray.size());
+
+ // Create the node.
+ SDValue Result;
+ if (IsTgtIntrinsic) {
+ // This is target intrinsic that touches memory
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size(),
+ Info.memVT, Info.ptrVal, Info.offset,
+ Info.align, Info.vol,
+ Info.readMem, Info.writeMem);
+ }
+ else if (!HasChain)
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ else if (I.getType() != Type::VoidTy)
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+ else
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
+ VTs, &Ops[0], Ops.size());
+
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+ if (I.getType() != Type::VoidTy) {
+ if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+ MVT VT = TLI.getValueType(PTy);
+ Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+ }
+ setValue(&I, Result);
+ }
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+static GlobalVariable *ExtractTypeInfo(Value *V) {
+ V = V->stripPointerCasts();
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+ assert ((GV || isa<ConstantPointerNull>(V)) &&
+ "TypeInfo must be a global variable or NULL");
+ return GV;
+}
+
+namespace llvm {
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+ MachineBasicBlock *MBB) {
+ // Inform the MachineModuleInfo of the personality for this landing pad.
+ ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+ assert(CE->getOpcode() == Instruction::BitCast &&
+ isa<Function>(CE->getOperand(0)) &&
+ "Personality should be a function");
+ MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+ // Gather all the type infos for this landing pad and pass them along to
+ // MachineModuleInfo.
+ std::vector<GlobalVariable *> TyInfo;
+ unsigned N = I.getNumOperands();
+
+ for (unsigned i = N - 1; i > 2; --i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+ unsigned FilterLength = CI->getZExtValue();
+ unsigned FirstCatch = i + FilterLength + !FilterLength;
+ assert (FirstCatch <= N && "Invalid filter length");
+
+ if (FirstCatch < N) {
+ TyInfo.reserve(N - FirstCatch);
+ for (unsigned j = FirstCatch; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ if (!FilterLength) {
+ // Cleanup.
+ MMI->addCleanup(MBB);
+ } else {
+ // Filter.
+ TyInfo.reserve(FilterLength - 1);
+ for (unsigned j = i + 1; j < FirstCatch; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addFilterTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ N = i;
+ }
+ }
+
+ if (N > 3) {
+ TyInfo.reserve(N - 3);
+ for (unsigned j = 3; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ }
+}
+
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+/// Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
+ SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x007fffff, MVT::i32));
+ SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+ DAG.getConstant(0x3f800000, MVT::i32));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
+ DebugLoc dl) {
+ SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x7f800000, MVT::i32));
+ SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+ DAG.getConstant(23, TLI.getPointerTy()));
+ SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+ DAG.getConstant(127, MVT::i32));
+ return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue
+getF32Constant(SelectionDAG &DAG, unsigned Flt) {
+ return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
+}
+
+/// Inlined utility function to implement binary input atomic intrinsics for
+/// visitIntrinsicCall: I is a call instruction
+/// Op is the associated NodeType for I
+const char *
+SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
+ SDValue Root = getRoot();
+ SDValue L =
+ DAG.getAtomic(Op, getCurDebugLoc(),
+ getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+ Root,
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)),
+ I.getOperand(1));
+ setValue(&I, L);
+ DAG.setRoot(L.getValue(1));
+ return 0;
+}
+
+// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
+const char *
+SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+
+ SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2);
+
+ setValue(&I, Result);
+ return 0;
+}
+
+/// visitExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGLowering::visitExp(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OFe 1.4426950f
+ // IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ getF32Constant(DAG, 0x3fb8aa3b));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // 0.000107046256 error, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ //
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::i32, t13);
+
+ // Add the exponent into the result in integer domain.
+ SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ TwoToFracPartOfX, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FEXP, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGLowering::visitLog(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+ // Scale the exponent by log(2) [0.69314718f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3f317218));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // LogofMantissa =
+ // -1.1609546f +
+ // (1.4034025f - 0.23903021f * x) * x;
+ //
+ // error 0.0034276066, which is better than 8 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbe74c456));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3fb3a2b1));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f949a29));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // LogOfMantissa =
+ // -1.7417939f +
+ // (2.8212026f +
+ // (-1.4699568f +
+ // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+ //
+ // error 0.000061011436, which is 14 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbd67b6d6));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ee4f4b8));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fbc278b));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40348e95));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fdef31a));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // LogOfMantissa =
+ // -2.1072184f +
+ // (4.2372794f +
+ // (-3.7029485f +
+ // (2.2781945f +
+ // (-0.87823314f +
+ // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+ //
+ // error 0.0000023660568, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbc91e5ac));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e4350aa));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f60d3e3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x4011cdf0));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x406cfd1c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x408797cb));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4006dcab));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, LogOfMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGLowering::visitLog2(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+ // Get the exponent.
+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ // Different possible minimax approximations of significand in
+ // floating-point for various degrees of accuracy over [1,2].
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+ //
+ // error 0.0049451742, which is more than 7 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbeb08fe0));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x40019463));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fd6633d));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log2ofMantissa =
+ // -2.51285454f +
+ // (4.07009056f +
+ // (-2.12067489f +
+ // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+ //
+ // error 0.0000876136000, which is better than 13 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbda7262e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f25280b));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x4007b923));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40823e2f));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x4020d29c));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log2ofMantissa =
+ // -3.0400495f +
+ // (6.1129976f +
+ // (-5.3420409f +
+ // (3.2865683f +
+ // (-1.2669343f +
+ // (0.27515199f -
+ // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+ //
+ // error 0.0000018516, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbcd2769e));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e8ce0b9));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fa22ae7));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40525723));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x40aaf200));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x40c39dad));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4042902c));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log2ofMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG2, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGLowering::visitLog10(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+ SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+ // Scale the exponent by log10(2) [0.30102999f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3e9a209a));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log10ofMantissa =
+ // -0.50419619f +
+ // (0.60948995f - 0.10380950f * x) * x;
+ //
+ // error 0.0014886165, which is 6 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbdd49a13));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f1c0789));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f011300));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log10ofMantissa =
+ // -0.64831180f +
+ // (0.91751397f +
+ // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+ //
+ // error 0.00019228036, which is better than 12 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3d431f31));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ea21fb2));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f6ae232));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f25f7c3));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log10ofMantissa =
+ // -0.84299375f +
+ // (1.5327582f +
+ // (-1.0688956f +
+ // (0.49102474f +
+ // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+ //
+ // error 0.0000037995730, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3c5d51ce));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e00685a));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3efb6798));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f88d192));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fc4316c));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3f57ce70));
+
+ result = DAG.getNode(ISD::FADD, dl,
+ MVT::f32, LogOfExponent, Log10ofMantissa);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FLOG10, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGLowering::visitExp2(CallInst &I) {
+ SDValue result;
+ DebugLoc dl = getCurDebugLoc();
+
+ if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(1));
+
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FEXP2, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+void
+SelectionDAGLowering::visitPow(CallInst &I) {
+ SDValue result;
+ Value *Val = I.getOperand(1);
+ DebugLoc dl = getCurDebugLoc();
+ bool IsExp10 = false;
+
+ if (getValue(Val).getValueType() == MVT::f32 &&
+ getValue(I.getOperand(2)).getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ APFloat Ten(10.0f);
+ IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
+ }
+ }
+ }
+
+ if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op = getValue(I.getOperand(2));
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OF10 3.3219281f
+ // IntegerPartOfX = (int32_t)(x * LOG2OF10);
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ getF32Constant(DAG, 0x40549a78));
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = x - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, TLI.getPointerTy()));
+
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // twoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue TwoToFractionalPartOfX =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+ result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f32, TwoToFractionalPartOfX);
+ }
+ } else {
+ // No special expansion.
+ result = DAG.getNode(ISD::FPOW, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)));
+ }
+
+ setValue(&I, result);
+}
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+ DebugLoc dl = getCurDebugLoc();
+ switch (Intrinsic) {
+ default:
+ // By default, turn this into a target intrinsic node.
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ case Intrinsic::vastart: visitVAStart(I); return 0;
+ case Intrinsic::vaend: visitVAEnd(I); return 0;
+ case Intrinsic::vacopy: visitVACopy(I); return 0;
+ case Intrinsic::returnaddress:
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::frameaddress:
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::setjmp:
+ return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+ break;
+ case Intrinsic::longjmp:
+ return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+ break;
+ case Intrinsic::memcpy: {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+ SDValue Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+ I.getOperand(1), 0, I.getOperand(2), 0));
+ return 0;
+ }
+ case Intrinsic::memset: {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+ SDValue Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
+ I.getOperand(1), 0));
+ return 0;
+ }
+ case Intrinsic::memmove: {
+ SDValue Op1 = getValue(I.getOperand(1));
+ SDValue Op2 = getValue(I.getOperand(2));
+ SDValue Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+
+ // If the source and destination are known to not be aliases, we can
+ // lower memmove as memcpy.
+ uint64_t Size = -1ULL;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
+ Size = C->getZExtValue();
+ if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
+ AliasAnalysis::NoAlias) {
+ DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+ I.getOperand(1), 0, I.getOperand(2), 0));
+ return 0;
+ }
+
+ DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
+ I.getOperand(1), 0, I.getOperand(2), 0));
+ return 0;
+ }
+ case Intrinsic::dbg_stoppoint: {
+ DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
+ if (DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLevel)) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ DICompileUnit CU(cast<GlobalVariable>(SPI.getContext()));
+ DebugLoc Loc = DebugLoc::get(MF.getOrCreateDebugLocID(CU.getGV(),
+ SPI.getLine(), SPI.getColumn()));
+ setCurDebugLoc(Loc);
+
+ if (OptLevel == CodeGenOpt::None)
+ DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(),
+ SPI.getLine(),
+ SPI.getColumn(),
+ SPI.getContext()));
+ }
+ return 0;
+ }
+ case Intrinsic::dbg_region_start: {
+ DwarfWriter *DW = DAG.getDwarfWriter();
+ DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I);
+
+ if (DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLevel) &&
+ DW && DW->ShouldEmitDwarfDebug()) {
+ unsigned LabelID =
+ DW->RecordRegionStart(cast<GlobalVariable>(RSI.getContext()));
+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+ getRoot(), LabelID));
+ }
+
+ return 0;
+ }
+ case Intrinsic::dbg_region_end: {
+ DwarfWriter *DW = DAG.getDwarfWriter();
+ DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I);
+
+ if (DIDescriptor::ValidDebugInfo(REI.getContext(), OptLevel) &&
+ DW && DW->ShouldEmitDwarfDebug()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext()));
+
+ if (Subprogram.isNull() || Subprogram.describes(MF.getFunction())) {
+ unsigned LabelID =
+ DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext()));
+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+ getRoot(), LabelID));
+ } else {
+ // This is end of inlined function. Debugging information for inlined
+ // function is not handled yet (only supported by FastISel).
+ if (OptLevel == CodeGenOpt::None) {
+ unsigned ID = DW->RecordInlinedFnEnd(Subprogram);
+ if (ID != 0)
+ // Returned ID is 0 if this is unbalanced "end of inlined
+ // scope". This could happen if optimizer eats dbg intrinsics or
+ // "beginning of inlined scope" is not recoginized due to missing
+ // location info. In such cases, do ignore this region.end.
+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+ getRoot(), ID));
+ }
+ }
+ }
+
+ return 0;
+ }
+ case Intrinsic::dbg_func_start: {
+ DwarfWriter *DW = DAG.getDwarfWriter();
+ DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
+ Value *SP = FSI.getSubprogram();
+ if (!DIDescriptor::ValidDebugInfo(SP, OptLevel))
+ return 0;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (OptLevel == CodeGenOpt::None) {
+ // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what
+ // (most?) gdb expects.
+ DebugLoc PrevLoc = CurDebugLoc;
+ DISubprogram Subprogram(cast<GlobalVariable>(SP));
+ DICompileUnit CompileUnit = Subprogram.getCompileUnit();
+
+ if (!Subprogram.describes(MF.getFunction())) {
+ // This is a beginning of an inlined function.
+
+ // If llvm.dbg.func.start is seen in a new block before any
+ // llvm.dbg.stoppoint intrinsic then the location info is unknown.
+ // FIXME : Why DebugLoc is reset at the beginning of each block ?
+ if (PrevLoc.isUnknown())
+ return 0;
+
+ // Record the source line.
+ unsigned Line = Subprogram.getLineNumber();
+ setCurDebugLoc(DebugLoc::get(
+ MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
+
+ if (DW && DW->ShouldEmitDwarfDebug()) {
+ DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
+ unsigned LabelID = DW->RecordInlinedFnStart(Subprogram,
+ DICompileUnit(PrevLocTpl.CompileUnit),
+ PrevLocTpl.Line,
+ PrevLocTpl.Col);
+ DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
+ getRoot(), LabelID));
+ }
+ } else {
+ // Record the source line.
+ unsigned Line = Subprogram.getLineNumber();
+ MF.setDefaultDebugLoc(DebugLoc::get(
+ MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
+ if (DW && DW->ShouldEmitDwarfDebug()) {
+ // llvm.dbg.func_start also defines beginning of function scope.
+ DW->RecordRegionStart(cast<GlobalVariable>(FSI.getSubprogram()));
+ }
+ }
+ } else {
+ DISubprogram Subprogram(cast<GlobalVariable>(SP));
+
+ std::string SPName;
+ Subprogram.getLinkageName(SPName);
+ if (!SPName.empty()
+ && strcmp(SPName.c_str(), MF.getFunction()->getNameStart())) {
+ // This is beginning of inlined function. Debugging information for
+ // inlined function is not handled yet (only supported by FastISel).
+ return 0;
+ }
+
+ // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is
+ // what (most?) gdb expects.
+ DICompileUnit CompileUnit = Subprogram.getCompileUnit();
+
+ // Record the source line but does not create a label for the normal
+ // function start. It will be emitted at asm emission time. However,
+ // create a label if this is a beginning of inlined function.
+ unsigned Line = Subprogram.getLineNumber();
+ setCurDebugLoc(DebugLoc::get(
+ MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0)));
+ // FIXME - Start new region because llvm.dbg.func_start also defines
+ // beginning of function scope.
+ }
+
+ return 0;
+ }
+ case Intrinsic::dbg_declare: {
+ if (OptLevel == CodeGenOpt::None) {
+ DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+ Value *Variable = DI.getVariable();
+ if (DIDescriptor::ValidDebugInfo(Variable, OptLevel))
+ DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(),
+ getValue(DI.getAddress()), getValue(Variable)));
+ } else {
+ // FIXME: Do something sensible here when we support debug declare.
+ }
+ return 0;
+ }
+ case Intrinsic::eh_exception: {
+ // Insert the EXCEPTIONADDR instruction.
+ assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDValue Ops[1];
+ Ops[0] = DAG.getRoot();
+ SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
+ setValue(&I, Op);
+ DAG.setRoot(Op.getValue(1));
+ return 0;
+ }
+
+ case Intrinsic::eh_selector_i32:
+ case Intrinsic::eh_selector_i64: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ MVT VT = (Intrinsic == Intrinsic::eh_selector_i32 ?
+ MVT::i32 : MVT::i64);
+
+ if (MMI) {
+ if (CurMBB->isLandingPad())
+ AddCatchInfo(I, MMI, CurMBB);
+ else {
+#ifndef NDEBUG
+ FuncInfo.CatchInfoLost.insert(&I);
+#endif
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) CurMBB->addLiveIn(Reg);
+ }
+
+ // Insert the EHSELECTION instruction.
+ SDVTList VTs = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[2];
+ Ops[0] = getValue(I.getOperand(1));
+ Ops[1] = getRoot();
+ SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
+ setValue(&I, Op);
+ DAG.setRoot(Op.getValue(1));
+ } else {
+ setValue(&I, DAG.getConstant(0, VT));
+ }
+
+ return 0;
+ }
+
+ case Intrinsic::eh_typeid_for_i32:
+ case Intrinsic::eh_typeid_for_i64: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ MVT VT = (Intrinsic == Intrinsic::eh_typeid_for_i32 ?
+ MVT::i32 : MVT::i64);
+
+ if (MMI) {
+ // Find the type id for the given typeinfo.
+ GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
+
+ unsigned TypeID = MMI->getTypeIDFor(GV);
+ setValue(&I, DAG.getConstant(TypeID, VT));
+ } else {
+ // Return something different to eh_selector.
+ setValue(&I, DAG.getConstant(1, VT));
+ }
+
+ return 0;
+ }
+
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+ MMI->setCallsEHReturn(true);
+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+ MVT::Other,
+ getControlRoot(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2))));
+ } else {
+ setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+ }
+
+ return 0;
+ case Intrinsic::eh_unwind_init:
+ if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+ MMI->setCallsUnwindInit(true);
+ }
+
+ return 0;
+
+ case Intrinsic::eh_dwarf_cfa: {
+ MVT VT = getValue(I.getOperand(1)).getValueType();
+ SDValue CfaArg;
+ if (VT.bitsGT(TLI.getPointerTy()))
+ CfaArg = DAG.getNode(ISD::TRUNCATE, dl,
+ TLI.getPointerTy(), getValue(I.getOperand(1)));
+ else
+ CfaArg = DAG.getNode(ISD::SIGN_EXTEND, dl,
+ TLI.getPointerTy(), getValue(I.getOperand(1)));
+
+ SDValue Offset = DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+ TLI.getPointerTy()),
+ CfaArg);
+ setValue(&I, DAG.getNode(ISD::ADD, dl,
+ TLI.getPointerTy(),
+ DAG.getNode(ISD::FRAMEADDR, dl,
+ TLI.getPointerTy(),
+ DAG.getConstant(0,
+ TLI.getPointerTy())),
+ Offset));
+ return 0;
+ }
+
+ case Intrinsic::convertff:
+ case Intrinsic::convertfsi:
+ case Intrinsic::convertfui:
+ case Intrinsic::convertsif:
+ case Intrinsic::convertuif:
+ case Intrinsic::convertss:
+ case Intrinsic::convertsu:
+ case Intrinsic::convertus:
+ case Intrinsic::convertuu: {
+ ISD::CvtCode Code = ISD::CVT_INVALID;
+ switch (Intrinsic) {
+ case Intrinsic::convertff: Code = ISD::CVT_FF; break;
+ case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
+ case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
+ case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
+ case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
+ case Intrinsic::convertss: Code = ISD::CVT_SS; break;
+ case Intrinsic::convertsu: Code = ISD::CVT_SU; break;
+ case Intrinsic::convertus: Code = ISD::CVT_US; break;
+ case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
+ }
+ MVT DestVT = TLI.getValueType(I.getType());
+ Value* Op1 = I.getOperand(1);
+ setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
+ DAG.getValueType(DestVT),
+ DAG.getValueType(getValue(Op1).getValueType()),
+ getValue(I.getOperand(2)),
+ getValue(I.getOperand(3)),
+ Code));
+ return 0;
+ }
+
+ case Intrinsic::sqrt:
+ setValue(&I, DAG.getNode(ISD::FSQRT, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::powi:
+ setValue(&I, DAG.getNode(ISD::FPOWI, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2))));
+ return 0;
+ case Intrinsic::sin:
+ setValue(&I, DAG.getNode(ISD::FSIN, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::cos:
+ setValue(&I, DAG.getNode(ISD::FCOS, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::log:
+ visitLog(I);
+ return 0;
+ case Intrinsic::log2:
+ visitLog2(I);
+ return 0;
+ case Intrinsic::log10:
+ visitLog10(I);
+ return 0;
+ case Intrinsic::exp:
+ visitExp(I);
+ return 0;
+ case Intrinsic::exp2:
+ visitExp2(I);
+ return 0;
+ case Intrinsic::pow:
+ visitPow(I);
+ return 0;
+ case Intrinsic::pcmarker: {
+ SDValue Tmp = getValue(I.getOperand(1));
+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
+ return 0;
+ }
+ case Intrinsic::readcyclecounter: {
+ SDValue Op = getRoot();
+ SDValue Tmp = DAG.getNode(ISD::READCYCLECOUNTER, dl,
+ DAG.getVTList(MVT::i64, MVT::Other),
+ &Op, 1);
+ setValue(&I, Tmp);
+ DAG.setRoot(Tmp.getValue(1));
+ return 0;
+ }
+ case Intrinsic::part_select: {
+ // Currently not implemented: just abort
+ assert(0 && "part_select intrinsic not implemented");
+ abort();
+ }
+ case Intrinsic::part_set: {
+ // Currently not implemented: just abort
+ assert(0 && "part_set intrinsic not implemented");
+ abort();
+ }
+ case Intrinsic::bswap:
+ setValue(&I, DAG.getNode(ISD::BSWAP, dl,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::cttz: {
+ SDValue Arg = getValue(I.getOperand(1));
+ MVT Ty = Arg.getValueType();
+ SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg);
+ setValue(&I, result);
+ return 0;
+ }
+ case Intrinsic::ctlz: {
+ SDValue Arg = getValue(I.getOperand(1));
+ MVT Ty = Arg.getValueType();
+ SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg);
+ setValue(&I, result);
+ return 0;
+ }
+ case Intrinsic::ctpop: {
+ SDValue Arg = getValue(I.getOperand(1));
+ MVT Ty = Arg.getValueType();
+ SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg);
+ setValue(&I, result);
+ return 0;
+ }
+ case Intrinsic::stacksave: {
+ SDValue Op = getRoot();
+ SDValue Tmp = DAG.getNode(ISD::STACKSAVE, dl,
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
+ setValue(&I, Tmp);
+ DAG.setRoot(Tmp.getValue(1));
+ return 0;
+ }
+ case Intrinsic::stackrestore: {
+ SDValue Tmp = getValue(I.getOperand(1));
+ DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Tmp));
+ return 0;
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code into the DAG to store the stack guard onto the stack.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MVT PtrTy = TLI.getPointerTy();
+
+ SDValue Src = getValue(I.getOperand(1)); // The guard's value.
+ AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MFI->setStackProtectorIndex(FI);
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+ // Store the stack protector onto the stack.
+ SDValue Result = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
+ PseudoSourceValue::getFixedStack(FI),
+ 0, true);
+ setValue(&I, Result);
+ DAG.setRoot(Result);
+ return 0;
+ }
+ case Intrinsic::var_annotation:
+ // Discard annotate attributes
+ return 0;
+
+ case Intrinsic::init_trampoline: {
+ const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts());
+
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getOperand(1));
+ Ops[2] = getValue(I.getOperand(2));
+ Ops[3] = getValue(I.getOperand(3));
+ Ops[4] = DAG.getSrcValue(I.getOperand(1));
+ Ops[5] = DAG.getSrcValue(F);
+
+ SDValue Tmp = DAG.getNode(ISD::TRAMPOLINE, dl,
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other),
+ Ops, 6);
+
+ setValue(&I, Tmp);
+ DAG.setRoot(Tmp.getValue(1));
+ return 0;
+ }
+
+ case Intrinsic::gcroot:
+ if (GFI) {
+ Value *Alloca = I.getOperand(1);
+ Constant *TypeMap = cast<Constant>(I.getOperand(2));
+
+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+ GFI->addStackRoot(FI->getIndex(), TypeMap);
+ }
+ return 0;
+
+ case Intrinsic::gcread:
+ case Intrinsic::gcwrite:
+ assert(0 && "GC failed to lower gcread/gcwrite intrinsics!");
+ return 0;
+
+ case Intrinsic::flt_rounds: {
+ setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
+ return 0;
+ }
+
+ case Intrinsic::trap: {
+ DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+ return 0;
+ }
+
+ case Intrinsic::uadd_with_overflow:
+ return implVisitAluOverflow(I, ISD::UADDO);
+ case Intrinsic::sadd_with_overflow:
+ return implVisitAluOverflow(I, ISD::SADDO);
+ case Intrinsic::usub_with_overflow:
+ return implVisitAluOverflow(I, ISD::USUBO);
+ case Intrinsic::ssub_with_overflow:
+ return implVisitAluOverflow(I, ISD::SSUBO);
+ case Intrinsic::umul_with_overflow:
+ return implVisitAluOverflow(I, ISD::UMULO);
+ case Intrinsic::smul_with_overflow:
+ return implVisitAluOverflow(I, ISD::SMULO);
+
+ case Intrinsic::prefetch: {
+ SDValue Ops[4];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getOperand(1));
+ Ops[2] = getValue(I.getOperand(2));
+ Ops[3] = getValue(I.getOperand(3));
+ DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
+ return 0;
+ }
+
+ case Intrinsic::memory_barrier: {
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ for (int x = 1; x < 6; ++x)
+ Ops[x] = getValue(I.getOperand(x));
+
+ DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
+ return 0;
+ }
+ case Intrinsic::atomic_cmp_swap: {
+ SDValue Root = getRoot();
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
+ getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+ Root,
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)),
+ getValue(I.getOperand(3)),
+ I.getOperand(1));
+ setValue(&I, L);
+ DAG.setRoot(L.getValue(1));
+ return 0;
+ }
+ case Intrinsic::atomic_load_add:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
+ case Intrinsic::atomic_load_sub:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
+ case Intrinsic::atomic_load_or:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
+ case Intrinsic::atomic_load_xor:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
+ case Intrinsic::atomic_load_and:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
+ case Intrinsic::atomic_load_nand:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
+ case Intrinsic::atomic_load_max:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
+ case Intrinsic::atomic_load_min:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
+ case Intrinsic::atomic_load_umin:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
+ case Intrinsic::atomic_load_umax:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
+ case Intrinsic::atomic_swap:
+ return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
+ }
+}
+
+
+void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
+ bool IsTailCall,
+ MachineBasicBlock *LandingPad) {
+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ unsigned BeginLabel = 0, EndLabel = 0;
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+ for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ SDValue ArgNode = getValue(*i);
+ Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+
+ unsigned attrInd = i - CS.arg_begin() + 1;
+ Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
+ Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
+ Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+ Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
+ Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
+ Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+ Entry.Alignment = CS.getParamAlignment(attrInd);
+ Args.push_back(Entry);
+ }
+
+ if (LandingPad && MMI) {
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI->NextLabelID();
+ // Both PendingLoads and PendingExports must be flushed here;
+ // this call might not return.
+ (void)getRoot();
+ DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
+ getControlRoot(), BeginLabel));
+ }
+
+ std::pair<SDValue,SDValue> Result =
+ TLI.LowerCallTo(getRoot(), CS.getType(),
+ CS.paramHasAttr(0, Attribute::SExt),
+ CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
+ CS.paramHasAttr(0, Attribute::InReg),
+ CS.getCallingConv(),
+ IsTailCall && PerformTailCallOpt,
+ Callee, Args, DAG, getCurDebugLoc());
+ if (CS.getType() != Type::VoidTy)
+ setValue(CS.getInstruction(), Result.first);
+ DAG.setRoot(Result.second);
+
+ if (LandingPad && MMI) {
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ EndLabel = MMI->NextLabelID();
+ DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
+ getRoot(), EndLabel));
+
+ // Inform MachineModuleInfo of range.
+ MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
+ }
+}
+
+
+void SelectionDAGLowering::visitCall(CallInst &I) {
+ const char *RenameFn = 0;
+ if (Function *F = I.getCalledFunction()) {
+ if (F->isDeclaration()) {
+ const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();
+ if (II) {
+ if (unsigned IID = II->getIntrinsicID(F)) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+ if (unsigned IID = F->getIntrinsicID()) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+
+ // Check for well-known libc/libm calls. If the function is internal, it
+ // can't be a library call.
+ unsigned NameLen = F->getNameLen();
+ if (!F->hasLocalLinkage() && NameLen) {
+ const char *NameStr = F->getNameStart();
+ if (NameStr[0] == 'c' &&
+ ((NameLen == 8 && !strcmp(NameStr, "copysign")) ||
+ (NameLen == 9 && !strcmp(NameStr, "copysignf")))) {
+ if (I.getNumOperands() == 3 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType() &&
+ I.getType() == I.getOperand(2)->getType()) {
+ SDValue LHS = getValue(I.getOperand(1));
+ SDValue RHS = getValue(I.getOperand(2));
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
+ LHS.getValueType(), LHS, RHS));
+ return;
+ }
+ } else if (NameStr[0] == 'f' &&
+ ((NameLen == 4 && !strcmp(NameStr, "fabs")) ||
+ (NameLen == 5 && !strcmp(NameStr, "fabsf")) ||
+ (NameLen == 5 && !strcmp(NameStr, "fabsl")))) {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType()) {
+ SDValue Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (NameStr[0] == 's' &&
+ ((NameLen == 3 && !strcmp(NameStr, "sin")) ||
+ (NameLen == 4 && !strcmp(NameStr, "sinf")) ||
+ (NameLen == 4 && !strcmp(NameStr, "sinl")))) {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType()) {
+ SDValue Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (NameStr[0] == 'c' &&
+ ((NameLen == 3 && !strcmp(NameStr, "cos")) ||
+ (NameLen == 4 && !strcmp(NameStr, "cosf")) ||
+ (NameLen == 4 && !strcmp(NameStr, "cosl")))) {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType()) {
+ SDValue Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ }
+ }
+ } else if (isa<InlineAsm>(I.getOperand(0))) {
+ visitInlineAsm(&I);
+ return;
+ }
+
+ SDValue Callee;
+ if (!RenameFn)
+ Callee = getValue(I.getOperand(0));
+ else
+ Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+ LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain,
+ SDValue *Flag) const {
+ // Assemble the legal parts into the final values.
+ SmallVector<SDValue, 4> Values(ValueVTs.size());
+ SmallVector<SDValue, 8> Parts;
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ // Copy the legal parts from the registers.
+ MVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI->getNumRegisters(ValueVT);
+ MVT RegisterVT = RegVTs[Value];
+
+ Parts.resize(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue P;
+ if (Flag == 0)
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+ else {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+ *Flag = P.getValue(2);
+ }
+ Chain = P.getValue(1);
+
+ // If the source register was virtual and if we know something about it,
+ // add an assert node.
+ if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
+ RegisterVT.isInteger() && !RegisterVT.isVector()) {
+ unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
+ FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+ if (FLI.LiveOutRegInfo.size() > SlotNo) {
+ FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo];
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI.NumSignBits;
+ unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ MVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+
+ if (FromVT != MVT::Other) {
+ P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
+
+ }
+ }
+ }
+
+ Parts[i] = P;
+ }
+
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
+ NumRegs, RegisterVT, ValueVT);
+ Part += NumRegs;
+ Parts.clear();
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const {
+ // Get the list of the values's legal parts.
+ unsigned NumRegs = Regs.size();
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ MVT ValueVT = ValueVTs[Value];
+ unsigned NumParts = TLI->getNumRegisters(ValueVT);
+ MVT RegisterVT = RegVTs[Value];
+
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
+ &Parts[Part], NumParts, RegisterVT);
+ Part += NumParts;
+ }
+
+ // Copy the parts into the registers.
+ SmallVector<SDValue, 8> Chains(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue Part;
+ if (Flag == 0)
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+ else {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+ *Flag = Part.getValue(1);
+ }
+ Chains[i] = Part.getValue(0);
+ }
+
+ if (NumRegs == 1 || Flag)
+ // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ // flagged to it. That is the CopyToReg nodes and the user are considered
+ // a single scheduling unit. If we create a TokenFactor and return it as
+ // chain, then the TokenFactor is both a predecessor (operand) of the
+ // user as well as a successor (the TF operands are flagged to the user).
+ // c1, f1 = CopyToReg
+ // c2, f2 = CopyToReg
+ // c3 = TokenFactor c1, c2
+ // ...
+ // = op c3, ..., f2
+ Chain = Chains[NumRegs-1];
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list. This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code,
+ bool HasMatching,unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const {
+ MVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
+ assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
+ unsigned Flag = Code | (Regs.size() << 3);
+ if (HasMatching)
+ Flag |= 0x80000000 | (MatchingIdx << 16);
+ Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy));
+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumRegs = TLI->getNumRegisters(ValueVTs[Value]);
+ MVT RegisterVT = RegVTs[Value];
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ assert(Reg < Regs.size() && "Mismatch in # registers expected");
+ Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+ }
+ }
+}
+
+/// isAllocatableRegister - If the specified register is safe to allocate,
+/// i.e. it isn't a stack pointer or some other special register, return the
+/// register class for the register. Otherwise, return null.
+static const TargetRegisterClass *
+isAllocatableRegister(unsigned Reg, MachineFunction &MF,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo *TRI) {
+ MVT FoundVT = MVT::Other;
+ const TargetRegisterClass *FoundRC = 0;
+ for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
+ E = TRI->regclass_end(); RCI != E; ++RCI) {
+ MVT ThisVT = MVT::Other;
+
+ const TargetRegisterClass *RC = *RCI;
+ // If none of the the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (TLI.isTypeLegal(*I)) {
+ // If we have already found this register in a different register class,
+ // choose the one with the largest VT specified. For example, on
+ // PowerPC, we favor f64 register classes over f32.
+ if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
+ ThisVT = *I;
+ break;
+ }
+ }
+ }
+
+ if (ThisVT == MVT::Other) continue;
+
+ // NOTE: This isn't ideal. In particular, this might allocate the
+ // frame pointer in functions that need it (due to them not being taken
+ // out of allocation, because a variable sized allocation hasn't been seen
+ // yet). This is a slight code pessimization, but should still work.
+ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+ E = RC->allocation_order_end(MF); I != E; ++I)
+ if (*I == Reg) {
+ // We found a matching register class. Keep looking at others in case
+ // we find one with larger registers that this physreg is also in.
+ FoundRC = RC;
+ FoundVT = ThisVT;
+ break;
+ }
+ }
+ return FoundRC;
+}
+
+
+namespace llvm {
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class VISIBILITY_HIDDEN SDISelAsmOperandInfo :
+ public TargetLowering::AsmOperandInfo {
+public:
+ /// CallOperand - If this is the result output operand or a clobber
+ /// this is null, otherwise it is the incoming operand to the CallInst.
+ /// This gets modified as the asm is processed.
+ SDValue CallOperand;
+
+ /// AssignedRegs - If this is a register or register class operand, this
+ /// contains the set of register corresponding to the operand.
+ RegsForValue AssignedRegs;
+
+ explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+ : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
+ }
+
+ /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
+ /// busy in OutputRegs/InputRegs.
+ void MarkAllocatedRegs(bool isOutReg, bool isInReg,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs,
+ const TargetRegisterInfo &TRI) const {
+ if (isOutReg) {
+ for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+ MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
+ }
+ if (isInReg) {
+ for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+ MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
+ }
+ }
+
+ /// getCallOperandValMVT - Return the MVT of the Value* that this operand
+ /// corresponds to. If there is no Value* for this operand, it returns
+ /// MVT::Other.
+ MVT getCallOperandValMVT(const TargetLowering &TLI,
+ const TargetData *TD) const {
+ if (CallOperandVal == 0) return MVT::Other;
+
+ if (isa<BasicBlock>(CallOperandVal))
+ return TLI.getPointerTy();
+
+ const llvm::Type *OpTy = CallOperandVal->getType();
+
+ // If this is an indirect operand, the operand is a pointer to the
+ // accessed type.
+ if (isIndirect)
+ OpTy = cast<PointerType>(OpTy)->getElementType();
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpTy = IntegerType::get(BitSize);
+ break;
+ }
+ }
+
+ return TLI.getValueType(OpTy, true);
+ }
+
+private:
+ /// MarkRegAndAliases - Mark the specified register and all aliases in the
+ /// specified set.
+ static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
+ const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
+ Regs.insert(Reg);
+ if (const unsigned *Aliases = TRI.getAliasSet(Reg))
+ for (; *Aliases; ++Aliases)
+ Regs.insert(*Aliases);
+ }
+};
+} // end llvm namespace.
+
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand. We prefer to assign virtual registers, to allow the
+/// register allocator handle the assignment process. However, if the asm uses
+/// features that we can't model on machineinstrs, we have SDISel do the
+/// allocation. This produces generally horrible, but correct, code.
+///
+/// OpInfo describes the operand.
+/// Input and OutputRegs are the set of already allocated physical registers.
+///
+void SelectionDAGLowering::
+GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs) {
+ // Compute whether this value requires an input register, an output register,
+ // or both.
+ bool isOutReg = false;
+ bool isInReg = false;
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ isOutReg = true;
+
+ // If there is an input constraint that matches this, we need to reserve
+ // the input register so no other inputs allocate to it.
+ isInReg = OpInfo.hasMatchingInput();
+ break;
+ case InlineAsm::isInput:
+ isInReg = true;
+ isOutReg = false;
+ break;
+ case InlineAsm::isClobber:
+ isOutReg = true;
+ isInReg = true;
+ break;
+ }
+
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<unsigned, 4> Regs;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other) {
+ // If this is a FP input in an integer register (or visa versa) insert a bit
+ // cast of the input value. More generally, handle any case where the input
+ // value disagrees with the register class we plan to stick this in.
+ if (OpInfo.Type == InlineAsm::isInput &&
+ PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+ // Try to convert to the first MVT that the reg class contains. If the
+ // types are identical size, use a bitcast to convert (e.g. two differing
+ // vector types).
+ MVT RegVT = *PhysReg.second->vt_begin();
+ if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+ // If the input is a FP value and we want it in FP registers, do a
+ // bitcast to the corresponding integer type. This turns an f64 value
+ // into i64, which can be passed with two i32 values on a 32-bit
+ // machine.
+ RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+ OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ }
+ }
+
+ NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT);
+ }
+
+ MVT RegVT;
+ MVT ValueVT = OpInfo.ConstraintVT;
+
+ // If this is a constraint for a specific physical register, like {r17},
+ // assign it now.
+ if (unsigned AssignedReg = PhysReg.first) {
+ const TargetRegisterClass *RC = PhysReg.second;
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = *RC->vt_begin();
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ RegVT = *RC->vt_begin();
+
+ // This is a explicit reference to a physical register.
+ Regs.push_back(AssignedReg);
+
+ // If this is an expanded reference, add the rest of the regs to Regs.
+ if (NumRegs != 1) {
+ TargetRegisterClass::iterator I = RC->begin();
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "Didn't find reg!");
+
+ // Already added the first reg.
+ --NumRegs; ++I;
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ Regs.push_back(*I);
+ }
+ }
+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+ return;
+ }
+
+ // Otherwise, if this was a reference to an LLVM register class, create vregs
+ // for this reference.
+ if (const TargetRegisterClass *RC = PhysReg.second) {
+ RegVT = *RC->vt_begin();
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Create the appropriate number of virtual registers.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (; NumRegs; --NumRegs)
+ Regs.push_back(RegInfo.createVirtualRegister(RC));
+
+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+ return;
+ }
+
+ // This is a reference to a register class that doesn't directly correspond
+ // to an LLVM register class. Allocate NumRegs consecutive, available,
+ // registers from the class.
+ std::vector<unsigned> RegClassRegs
+ = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ unsigned NumAllocated = 0;
+ for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
+ unsigned Reg = RegClassRegs[i];
+ // See if this register is available.
+ if ((isOutReg && OutputRegs.count(Reg)) || // Already used.
+ (isInReg && InputRegs.count(Reg))) { // Already used.
+ // Make sure we find consecutive registers.
+ NumAllocated = 0;
+ continue;
+ }
+
+ // Check to see if this register is allocatable (i.e. don't give out the
+ // stack pointer).
+ const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
+ if (!RC) { // Couldn't allocate this register.
+ // Reset NumAllocated to make sure we return consecutive registers.
+ NumAllocated = 0;
+ continue;
+ }
+
+ // Okay, this register is good, we can use it.
+ ++NumAllocated;
+
+ // If we allocated enough consecutive registers, succeed.
+ if (NumAllocated == NumRegs) {
+ unsigned RegStart = (i-NumAllocated)+1;
+ unsigned RegEnd = i+1;
+ // Mark all of the allocated registers used.
+ for (unsigned i = RegStart; i != RegEnd; ++i)
+ Regs.push_back(RegClassRegs[i]);
+
+ OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(),
+ OpInfo.ConstraintVT);
+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+ return;
+ }
+ }
+
+ // Otherwise, we couldn't allocate enough registers for this.
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+static bool
+hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+ const TargetLowering &TLI) {
+ for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+ InlineAsm::ConstraintInfo &CI = CInfos[i];
+ for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+ if (CType == TargetLowering::C_Memory)
+ return true;
+ }
+
+ // Indirect operand accesses access memory.
+ if (CI.isIndirect)
+ return true;
+ }
+
+ return false;
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
+ InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ std::vector<SDISelAsmOperandInfo> ConstraintOperands;
+
+ std::set<unsigned> OutputRegs, InputRegs;
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ std::vector<InlineAsm::ConstraintInfo>
+ ConstraintInfos = IA->ParseConstraints();
+
+ bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
+
+ SDValue Chain, Flag;
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ if (hasMemory || IA->hasSideEffects())
+ Chain = getRoot();
+ else
+ Chain = DAG.getRoot();
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ MVT OpVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(CS.getType() != Type::VoidTy && "Bad inline asm!");
+ if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpVT = TLI.getValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpVT = TLI.getValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // If this is an input or an indirect output, process the call argument.
+ // BasicBlocks are labels, currently appearing only in asm's.
+ if (OpInfo.CallOperandVal) {
+ if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+ } else {
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+ }
+
+ OpVT = OpInfo.getCallOperandValMVT(TLI, TD);
+ }
+
+ OpInfo.ConstraintVT = OpVT;
+ }
+
+ // Second pass over the constraints: compute which constraint option to use
+ // and assign registers to constraints that want a specific physreg.
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ cerr << "llvm: error: Unsupported asm: input constraint with a "
+ << "matching output constraint of incompatible type!\n";
+ exit(1);
+ }
+ Input.ConstraintVT = OpInfo.ConstraintVT;
+ }
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG);
+
+ // If this is a memory input, and if the operand is not indirect, do what we
+ // need to to provide an address for the memory input.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ !OpInfo.isIndirect) {
+ assert(OpInfo.Type == InlineAsm::isInput &&
+ "Can only indirectify direct input operands!");
+
+ // Memory operands really want the address of the value. If we don't have
+ // an indirect input, put it in the constpool if we can, otherwise spill
+ // it to a stack slot.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+ TLI.getPointerTy());
+ } else {
+ // Otherwise, create a stack slot and emit a store to it before the
+ // asm.
+ const Type *Ty = OpVal->getType();
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Chain = DAG.getStore(Chain, getCurDebugLoc(),
+ OpInfo.CallOperand, StackSlot, NULL, 0);
+ OpInfo.CallOperand = StackSlot;
+ }
+
+ // There is no longer a Value* corresponding to this operand.
+ OpInfo.CallOperandVal = 0;
+ // It is now an indirect operand.
+ OpInfo.isIndirect = true;
+ }
+
+ // If this constraint is for a specific register, allocate it before
+ // anything else.
+ if (OpInfo.ConstraintType == TargetLowering::C_Register)
+ GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+ }
+ ConstraintInfos.clear();
+
+
+ // Second pass - Loop over all of the operands, assigning virtual or physregs
+ // to register class operands.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // C_Register operands have already been allocated, Other/Memory don't need
+ // to be.
+ if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+ GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+ }
+
+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+ std::vector<SDValue> AsmNodeOperands;
+ AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
+ AsmNodeOperands.push_back(
+ DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
+
+
+ // Loop over all of the inputs, copying the operand values into the
+ // appropriate registers and processing the output regs.
+ RegsForValue RetValRegs;
+
+ // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+ std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput: {
+ if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+ OpInfo.ConstraintType != TargetLowering::C_Register) {
+ // Memory output, or 'other' output (e.g. 'X' constraint).
+ assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+ // Add information to the INLINEASM node to know about this output.
+ unsigned ResOpType = 4/*MEM*/ | (1<<3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(OpInfo.CallOperand);
+ break;
+ }
+
+ // Otherwise, this is a register or register class output.
+
+ // Copy the output from the appropriate register. Find a register that
+ // we can use.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ cerr << "llvm: error: Couldn't allocate output reg for constraint '"
+ << OpInfo.ConstraintCode << "'!\n";
+ exit(1);
+ }
+
+ // If this is an indirect operand, store through the pointer after the
+ // asm.
+ if (OpInfo.isIndirect) {
+ IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+ OpInfo.CallOperandVal));
+ } else {
+ // This is the result value of the call.
+ assert(CS.getType() != Type::VoidTy && "Bad inline asm!");
+ // Concatenate this output onto the outputs list.
+ RetValRegs.append(OpInfo.AssignedRegs);
+ }
+
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
+ 6 /* EARLYCLOBBER REGDEF */ :
+ 2 /* REGDEF */ ,
+ false,
+ 0,
+ DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isInput: {
+ SDValue InOperandVal = OpInfo.CallOperand;
+
+ if (OpInfo.isMatchingInputConstraint()) { // Matching constraint?
+ // If this is required to match an output register we have already set,
+ // just use its register.
+ unsigned OperandNo = OpInfo.getMatchedOperand();
+
+ // Scan until we find the definition we already emitted of this operand.
+ // When we find it, create a RegsForValue operand.
+ unsigned CurOp = 2; // The first operand.
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ assert(((OpFlag & 7) == 2 /*REGDEF*/ ||
+ (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
+ (OpFlag & 7) == 4 /*MEM*/) &&
+ "Skipped past definitions?");
+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+ }
+
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ if ((OpFlag & 7) == 2 /*REGDEF*/
+ || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
+ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle tied indirect register inputs yet!");
+ RegsForValue MatchedRegs;
+ MatchedRegs.TLI = &TLI;
+ MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+ MVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+ MatchedRegs.RegVTs.push_back(RegVT);
+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+ i != e; ++i)
+ MatchedRegs.Regs.
+ push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+
+ // Use the produced MatchedRegs object to
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag);
+ MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
+ true, OpInfo.getMatchedOperand(),
+ DAG, AsmNodeOperands);
+ break;
+ } else {
+ assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
+ assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
+ "Unexpected number of operands");
+ // Add information to the INLINEASM node to know about this input.
+ // See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+ break;
+ }
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle indirect other inputs yet!");
+
+ std::vector<SDValue> Ops;
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
+ hasMemory, Ops, DAG);
+ if (Ops.empty()) {
+ cerr << "llvm: error: Invalid operand for inline asm constraint '"
+ << OpInfo.ConstraintCode << "'!\n";
+ exit(1);
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+ break;
+ } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+ "Memory operands expect pointer values");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = 4/*MEM*/ | (1<<3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle indirect register inputs yet!");
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ cerr << "llvm: error: Couldn't allocate output reg for constraint '"
+ << OpInfo.ConstraintCode << "'!\n";
+ exit(1);
+ }
+
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+ Chain, &Flag);
+
+ OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
+ DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isClobber: {
+ // Add the clobbered value to the operand list, so that the register
+ // allocator is aware that the physreg got clobbered.
+ if (!OpInfo.AssignedRegs.Regs.empty())
+ OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
+ false, 0, DAG,AsmNodeOperands);
+ break;
+ }
+ }
+ }
+
+ // Finish up input operands.
+ AsmNodeOperands[0] = Chain;
+ if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+ Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
+ DAG.getVTList(MVT::Other, MVT::Flag),
+ &AsmNodeOperands[0], AsmNodeOperands.size());
+ Flag = Chain.getValue(1);
+
+ // If this asm returns a register value, copy the result from that register
+ // and set it as the value of the call.
+ if (!RetValRegs.Regs.empty()) {
+ SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+ Chain, &Flag);
+
+ // FIXME: Why don't we do this for inline asms with MRVs?
+ if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+ MVT ResultType = TLI.getValueType(CS.getType());
+
+ // If any of the results of the inline asm is a vector, it may have the
+ // wrong width/num elts. This can happen for register classes that can
+ // contain multiple different value types. The preg or vreg allocated may
+ // not have the same VT as was expected. Convert it to the right type
+ // with bit_convert.
+ if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+ Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ ResultType, Val);
+
+ } else if (ResultType != Val.getValueType() &&
+ ResultType.isInteger() && Val.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result may
+ // have a wider width than the expected result. Extract the relevant
+ // portion.
+ Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
+ }
+
+ assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+ }
+
+ setValue(CS.getInstruction(), Val);
+ // Don't need to use this as a chain in this case.
+ if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+ return;
+ }
+
+ std::vector<std::pair<SDValue, Value*> > StoresToEmit;
+
+ // Process indirect outputs, first output all of the flagged copies out of
+ // physregs.
+ for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+ RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+ Value *Ptr = IndirectStoresToEmit[i].second;
+ SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+ Chain, &Flag);
+ StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+
+ }
+
+ // Emit the non-flagged stores from the physregs.
+ SmallVector<SDValue, 8> OutChains;
+ for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)
+ OutChains.push_back(DAG.getStore(Chain, getCurDebugLoc(),
+ StoresToEmit[i].first,
+ getValue(StoresToEmit[i].second),
+ StoresToEmit[i].second, 0));
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+ &OutChains[0], OutChains.size());
+ DAG.setRoot(Chain);
+}
+
+
+void SelectionDAGLowering::visitMalloc(MallocInst &I) {
+ SDValue Src = getValue(I.getOperand(0));
+
+ // Scale up by the type size in the original i32 type width. Various
+ // mid-level optimizers may make assumptions about demanded bits etc from the
+ // i32-ness of the optimizer: we do not want to promote to i64 and then
+ // multiply on 64-bit targets.
+ // FIXME: Malloc inst should go away: PR715.
+ uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType());
+ if (ElementSize != 1)
+ Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(),
+ Src, DAG.getConstant(ElementSize, Src.getValueType()));
+
+ MVT IntPtr = TLI.getPointerTy();
+
+ if (IntPtr.bitsLT(Src.getValueType()))
+ Src = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), IntPtr, Src);
+ else if (IntPtr.bitsGT(Src.getValueType()))
+ Src = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), IntPtr, Src);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Src;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Args.push_back(Entry);
+
+ std::pair<SDValue,SDValue> Result =
+ TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false,
+ CallingConv::C, PerformTailCallOpt,
+ DAG.getExternalSymbol("malloc", IntPtr),
+ Args, DAG, getCurDebugLoc());
+ setValue(&I, Result.first); // Pointers always fit in registers
+ DAG.setRoot(Result.second);
+}
+
+void SelectionDAGLowering::visitFree(FreeInst &I) {
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = getValue(I.getOperand(0));
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Args.push_back(Entry);
+ MVT IntPtr = TLI.getPointerTy();
+ std::pair<SDValue,SDValue> Result =
+ TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, false, false,
+ CallingConv::C, PerformTailCallOpt,
+ DAG.getExternalSymbol("free", IntPtr), Args, DAG,
+ getCurDebugLoc());
+ DAG.setRoot(Result.second);
+}
+
+void SelectionDAGLowering::visitVAStart(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGLowering::visitVAArg(VAArgInst &I) {
+ SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
+ getRoot(), getValue(I.getOperand(0)),
+ DAG.getSrcValue(I.getOperand(0)));
+ setValue(&I, V);
+ DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGLowering::visitVAEnd(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGLowering::visitVACopy(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)),
+ DAG.getSrcValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(2))));
+}
+
+/// TargetLowering::LowerArguments - This is the default LowerArguments
+/// implementation, which just inserts a FORMAL_ARGUMENTS node. FIXME: When all
+/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be
+/// integrated into SDISel.
+void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &ArgValues,
+ DebugLoc dl) {
+ // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node.
+ SmallVector<SDValue, 3+16> Ops;
+ Ops.push_back(DAG.getRoot());
+ Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy()));
+ Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy()));
+
+ // Add one result value for each formal argument.
+ SmallVector<MVT, 16> RetVals;
+ unsigned j = 1;
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I, ++j) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(*this, I->getType(), ValueVTs);
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ MVT VT = ValueVTs[Value];
+ const Type *ArgTy = VT.getTypeForMVT();
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ getTargetData()->getABITypeAlignment(ArgTy);
+
+ if (F.paramHasAttr(j, Attribute::ZExt))
+ Flags.setZExt();
+ if (F.paramHasAttr(j, Attribute::SExt))
+ Flags.setSExt();
+ if (F.paramHasAttr(j, Attribute::InReg))
+ Flags.setInReg();
+ if (F.paramHasAttr(j, Attribute::StructRet))
+ Flags.setSRet();
+ if (F.paramHasAttr(j, Attribute::ByVal)) {
+ Flags.setByVal();
+ const PointerType *Ty = cast<PointerType>(I->getType());
+ const Type *ElementTy = Ty->getElementType();
+ unsigned FrameAlign = getByValTypeAlignment(ElementTy);
+ unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy);
+ // For ByVal, alignment should be passed from FE. BE will guess if
+ // this info is not there but there are cases it cannot get right.
+ if (F.getParamAlignment(j))
+ FrameAlign = F.getParamAlignment(j);
+ Flags.setByValAlign(FrameAlign);
+ Flags.setByValSize(FrameSize);
+ }
+ if (F.paramHasAttr(j, Attribute::Nest))
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT RegisterVT = getRegisterType(VT);
+ unsigned NumRegs = getNumRegisters(VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ RetVals.push_back(RegisterVT);
+ ISD::ArgFlagsTy MyFlags = Flags;
+ if (NumRegs > 1 && i == 0)
+ MyFlags.setSplit();
+ // if it isn't first piece, alignment must be 1
+ else if (i > 0)
+ MyFlags.setOrigAlign(1);
+ Ops.push_back(DAG.getArgFlags(MyFlags));
+ }
+ }
+ }
+
+ RetVals.push_back(MVT::Other);
+
+ // Create the node.
+ SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS, dl,
+ DAG.getVTList(&RetVals[0], RetVals.size()),
+ &Ops[0], Ops.size()).getNode();
+
+ // Prelower FORMAL_ARGUMENTS. This isn't required for functionality, but
+ // allows exposing the loads that may be part of the argument access to the
+ // first DAGCombiner pass.
+ SDValue TmpRes = LowerOperation(SDValue(Result, 0), DAG);
+
+ // The number of results should match up, except that the lowered one may have
+ // an extra flag result.
+ assert((Result->getNumValues() == TmpRes.getNode()->getNumValues() ||
+ (Result->getNumValues()+1 == TmpRes.getNode()->getNumValues() &&
+ TmpRes.getValue(Result->getNumValues()).getValueType() == MVT::Flag))
+ && "Lowering produced unexpected number of results!");
+
+ // The FORMAL_ARGUMENTS node itself is likely no longer needed.
+ if (Result != TmpRes.getNode() && Result->use_empty()) {
+ HandleSDNode Dummy(DAG.getRoot());
+ DAG.RemoveDeadNode(Result);
+ }
+
+ Result = TmpRes.getNode();
+
+ unsigned NumArgRegs = Result->getNumValues() - 1;
+ DAG.setRoot(SDValue(Result, NumArgRegs));
+
+ // Set up the return result vector.
+ unsigned i = 0;
+ unsigned Idx = 1;
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+ ++I, ++Idx) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(*this, I->getType(), ValueVTs);
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ MVT VT = ValueVTs[Value];
+ MVT PartVT = getRegisterType(VT);
+
+ unsigned NumParts = getNumRegisters(VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ for (unsigned j = 0; j != NumParts; ++j)
+ Parts[j] = SDValue(Result, i++);
+
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (F.paramHasAttr(Idx, Attribute::SExt))
+ AssertOp = ISD::AssertSext;
+ else if (F.paramHasAttr(Idx, Attribute::ZExt))
+ AssertOp = ISD::AssertZext;
+
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &Parts[0], NumParts,
+ PartVT, VT, AssertOp));
+ }
+ }
+ assert(i == NumArgRegs && "Argument register count mismatch!");
+}
+
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just inserts an ISD::CALL node, which is later custom
+/// lowered by the target to something concrete. FIXME: When all targets are
+/// migrated to using ISD::CALL, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+ bool RetSExt, bool RetZExt, bool isVarArg,
+ bool isInreg,
+ unsigned CallingConv, bool isTailCall,
+ SDValue Callee,
+ ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
+ assert((!isTailCall || PerformTailCallOpt) &&
+ "isTailCall set when tail-call optimizations are disabled!");
+
+ SmallVector<SDValue, 32> Ops;
+ Ops.push_back(Chain); // Op#0 - Chain
+ Ops.push_back(Callee);
+
+ // Handle all of the outgoing arguments.
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ MVT VT = ValueVTs[Value];
+ const Type *ArgTy = VT.getTypeForMVT();
+ SDValue Op = SDValue(Args[i].Node.getNode(),
+ Args[i].Node.getResNo() + Value);
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment =
+ getTargetData()->getABITypeAlignment(ArgTy);
+
+ if (Args[i].isZExt)
+ Flags.setZExt();
+ if (Args[i].isSExt)
+ Flags.setSExt();
+ if (Args[i].isInReg)
+ Flags.setInReg();
+ if (Args[i].isSRet)
+ Flags.setSRet();
+ if (Args[i].isByVal) {
+ Flags.setByVal();
+ const PointerType *Ty = cast<PointerType>(Args[i].Ty);
+ const Type *ElementTy = Ty->getElementType();
+ unsigned FrameAlign = getByValTypeAlignment(ElementTy);
+ unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy);
+ // For ByVal, alignment should come from FE. BE will guess if this
+ // info is not there but there are cases it cannot get right.
+ if (Args[i].Alignment)
+ FrameAlign = Args[i].Alignment;
+ Flags.setByValAlign(FrameAlign);
+ Flags.setByValSize(FrameSize);
+ }
+ if (Args[i].isNest)
+ Flags.setNest();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT PartVT = getRegisterType(VT);
+ unsigned NumParts = getNumRegisters(VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (Args[i].isSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind);
+
+ for (unsigned i = 0; i != NumParts; ++i) {
+ // if it isn't first piece, alignment must be 1
+ ISD::ArgFlagsTy MyFlags = Flags;
+ if (NumParts > 1 && i == 0)
+ MyFlags.setSplit();
+ else if (i != 0)
+ MyFlags.setOrigAlign(1);
+
+ Ops.push_back(Parts[i]);
+ Ops.push_back(DAG.getArgFlags(MyFlags));
+ }
+ }
+ }
+
+ // Figure out the result value types. We start by making a list of
+ // the potentially illegal return value types.
+ SmallVector<MVT, 4> LoweredRetTys;
+ SmallVector<MVT, 4> RetTys;
+ ComputeValueVTs(*this, RetTy, RetTys);
+
+ // Then we translate that to a list of legal types.
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ MVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(VT);
+ unsigned NumRegs = getNumRegisters(VT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ LoweredRetTys.push_back(RegisterVT);
+ }
+
+ LoweredRetTys.push_back(MVT::Other); // Always has a chain.
+
+ // Create the CALL node.
+ SDValue Res = DAG.getCall(CallingConv, dl,
+ isVarArg, isTailCall, isInreg,
+ DAG.getVTList(&LoweredRetTys[0],
+ LoweredRetTys.size()),
+ &Ops[0], Ops.size()
+ );
+ Chain = Res.getValue(LoweredRetTys.size() - 1);
+
+ // Gather up the call result into a single value.
+ if (RetTy != Type::VoidTy && !RetTys.empty()) {
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+
+ if (RetSExt)
+ AssertOp = ISD::AssertSext;
+ else if (RetZExt)
+ AssertOp = ISD::AssertZext;
+
+ SmallVector<SDValue, 4> ReturnValues;
+ unsigned RegNo = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ MVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(VT);
+ unsigned NumRegs = getNumRegisters(VT);
+ unsigned RegNoEnd = NumRegs + RegNo;
+ SmallVector<SDValue, 4> Results;
+ for (; RegNo != RegNoEnd; ++RegNo)
+ Results.push_back(Res.getValue(RegNo));
+ SDValue ReturnValue =
+ getCopyFromParts(DAG, dl, &Results[0], NumRegs, RegisterVT, VT,
+ AssertOp);
+ ReturnValues.push_back(ReturnValue);
+ }
+ Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&RetTys[0], RetTys.size()),
+ &ReturnValues[0], ReturnValues.size());
+ }
+
+ return std::make_pair(Res, Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+ if (Res.getNode())
+ Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ assert(0 && "LowerOperation not implemented for this target!");
+ abort();
+ return SDValue();
+}
+
+
+void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
+ SDValue Op = getValue(V);
+ assert((Op.getOpcode() != ISD::CopyFromReg ||
+ cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+ "Copy from a reg to the same reg!");
+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+
+ RegsForValue RFV(TLI, Reg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+ PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+void SelectionDAGISel::
+LowerArguments(BasicBlock *LLVMBB) {
+ // If this is the entry block, emit arguments.
+ Function &F = *LLVMBB->getParent();
+ SDValue OldRoot = SDL->DAG.getRoot();
+ SmallVector<SDValue, 16> Args;
+ TLI.LowerArguments(F, SDL->DAG, Args, SDL->getCurDebugLoc());
+
+ unsigned a = 0;
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+ AI != E; ++AI) {
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, AI->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (!AI->use_empty()) {
+ SDL->setValue(AI, SDL->DAG.getMergeValues(&Args[a], NumValues,
+ SDL->getCurDebugLoc()));
+ // If this argument is live outside of the entry block, insert a copy from
+ // whereever we got it to the vreg that other BB's will reference it as.
+ SDL->CopyToExportRegsIfNeeded(AI);
+ }
+ a += NumValues;
+ }
+
+ // Finally, if the target has anything special to do, allow it to do so.
+ // FIXME: this should insert code into the DAG!
+ EmitFunctionEntryCode(F, SDL->DAG.getMachineFunction());
+}
+
+/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
+/// ensure constants are generated when needed. Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input. We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB. As such, the start of the BB might correspond to a different MBB than
+/// the end.
+///
+void
+SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
+ TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+ PHINode *PN;
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::iterator I = SuccBB->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I) {
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ unsigned Reg;
+ Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ if (Constant *C = dyn_cast<Constant>(PHIOp)) {
+ unsigned &RegOut = SDL->ConstantsOut[C];
+ if (RegOut == 0) {
+ RegOut = FuncInfo->CreateRegForValue(C);
+ SDL->CopyValueToVirtualRegister(C, RegOut);
+ }
+ Reg = RegOut;
+ } else {
+ Reg = FuncInfo->ValueMap[PHIOp];
+ if (Reg == 0) {
+ assert(isa<AllocaInst>(PHIOp) &&
+ FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+ "Didn't codegen value into a register!??");
+ Reg = FuncInfo->CreateRegForValue(PHIOp);
+ SDL->CopyValueToVirtualRegister(PHIOp, Reg);
+ }
+ }
+
+ // Remember that this register needs to added to the machine PHI node as
+ // the input for this MBB.
+ SmallVector<MVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ MVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+ Reg += NumRegisters;
+ }
+ }
+ }
+ SDL->ConstantsOut.clear();
+}
+
+/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
+/// supports legal types, and it emits MachineInstrs directly instead of
+/// creating SelectionDAG nodes.
+///
+bool
+SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
+ FastISel *F) {
+ TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+ unsigned OrigNumPHINodesToUpdate = SDL->PHINodesToUpdate.size();
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB)) continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+ PHINode *PN;
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::iterator I = SuccBB->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I) {
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ // Only handle legal types. Two interesting things to note here. First,
+ // by bailing out early, we may leave behind some dead instructions,
+ // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+ // own moves. Second, this check is necessary becuase FastISel doesn't
+ // use CreateRegForValue to create registers, so it always creates
+ // exactly one register for each non-void instruction.
+ MVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+ if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+ // Promote MVT::i1.
+ if (VT == MVT::i1)
+ VT = TLI.getTypeToTransformTo(VT);
+ else {
+ SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ }
+
+ Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ unsigned Reg = F->getRegForValue(PHIOp);
+ if (Reg == 0) {
+ SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ return false;
+ }
+ SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ }
+ }
+
+ return true;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
new file mode 100644
index 0000000..578aa591
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@@ -0,0 +1,558 @@
+//===-- SelectionDAGBuild.h - Selection-DAG building ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAGBUILD_H
+#define SELECTIONDAGBUILD_H
+
+#include "llvm/Constants.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SmallSet.h"
+#endif
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetMachine.h"
+#include <vector>
+#include <set>
+
+namespace llvm {
+
+class AliasAnalysis;
+class AllocaInst;
+class BasicBlock;
+class BitCastInst;
+class BranchInst;
+class CallInst;
+class ExtractElementInst;
+class ExtractValueInst;
+class FCmpInst;
+class FPExtInst;
+class FPToSIInst;
+class FPToUIInst;
+class FPTruncInst;
+class FreeInst;
+class Function;
+class GetElementPtrInst;
+class GCFunctionInfo;
+class ICmpInst;
+class IntToPtrInst;
+class InvokeInst;
+class InsertElementInst;
+class InsertValueInst;
+class Instruction;
+class LoadInst;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineModuleInfo;
+class MachineRegisterInfo;
+class MallocInst;
+class PHINode;
+class PtrToIntInst;
+class ReturnInst;
+class SDISelAsmOperandInfo;
+class SExtInst;
+class SelectInst;
+class ShuffleVectorInst;
+class SIToFPInst;
+class StoreInst;
+class SwitchInst;
+class TargetData;
+class TargetLowering;
+class TruncInst;
+class UIToFPInst;
+class UnreachableInst;
+class UnwindInst;
+class VICmpInst;
+class VFCmpInst;
+class VAArgInst;
+class ZExtInst;
+
+//===--------------------------------------------------------------------===//
+/// FunctionLoweringInfo - This contains information that is global to a
+/// function that is used when lowering a region of the function.
+///
+class FunctionLoweringInfo {
+public:
+ TargetLowering &TLI;
+ Function *Fn;
+ MachineFunction *MF;
+ MachineRegisterInfo *RegInfo;
+
+ explicit FunctionLoweringInfo(TargetLowering &TLI);
+
+ /// set - Initialize this FunctionLoweringInfo with the given Function
+ /// and its associated MachineFunction.
+ ///
+ void set(Function &Fn, MachineFunction &MF, SelectionDAG &DAG,
+ bool EnableFastISel);
+
+ /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
+ DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
+
+ /// ValueMap - Since we emit code for the function a basic block at a time,
+ /// we must remember which virtual registers hold the values for
+ /// cross-basic-block values.
+ DenseMap<const Value*, unsigned> ValueMap;
+
+ /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
+ /// the entry block. This allows the allocas to be efficiently referenced
+ /// anywhere in the function.
+ DenseMap<const AllocaInst*, int> StaticAllocaMap;
+
+#ifndef NDEBUG
+ SmallSet<Instruction*, 8> CatchInfoLost;
+ SmallSet<Instruction*, 8> CatchInfoFound;
+#endif
+
+ unsigned MakeReg(MVT VT);
+
+ /// isExportedInst - Return true if the specified value is an instruction
+ /// exported from its block.
+ bool isExportedInst(const Value *V) {
+ return ValueMap.count(V);
+ }
+
+ unsigned CreateRegForValue(const Value *V);
+
+ unsigned InitializeRegForValue(const Value *V) {
+ unsigned &R = ValueMap[V];
+ assert(R == 0 && "Already initialized this value register!");
+ return R = CreateRegForValue(V);
+ }
+
+ struct LiveOutInfo {
+ unsigned NumSignBits;
+ APInt KnownOne, KnownZero;
+ LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
+ };
+
+ /// LiveOutRegInfo - Information about live out vregs, indexed by their
+ /// register number offset by 'FirstVirtualRegister'.
+ std::vector<LiveOutInfo> LiveOutRegInfo;
+
+ /// clear - Clear out all the function-specific state. This returns this
+ /// FunctionLoweringInfo to an empty state, ready to be used for a
+ /// different function.
+ void clear() {
+ MBBMap.clear();
+ ValueMap.clear();
+ StaticAllocaMap.clear();
+#ifndef NDEBUG
+ CatchInfoLost.clear();
+ CatchInfoFound.clear();
+#endif
+ LiveOutRegInfo.clear();
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLowering - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+/// Also, targets can overload any lowering method.
+///
+class SelectionDAGLowering {
+ MachineBasicBlock *CurMBB;
+
+ /// CurDebugLoc - current file + line number. Changes as we build the DAG.
+ DebugLoc CurDebugLoc;
+
+ DenseMap<const Value*, SDValue> NodeMap;
+
+ /// PendingLoads - Loads are not emitted to the program immediately. We bunch
+ /// them up and then emit token factor nodes when possible. This allows us to
+ /// get simple disambiguation between loads without worrying about alias
+ /// analysis.
+ SmallVector<SDValue, 8> PendingLoads;
+
+ /// PendingExports - CopyToReg nodes that copy values to virtual registers
+ /// for export to other blocks need to be emitted before any terminator
+ /// instruction, but they have no other ordering requirements. We bunch them
+ /// up and the emit a single tokenfactor for them just before terminator
+ /// instructions.
+ SmallVector<SDValue, 8> PendingExports;
+
+ /// Case - A struct to record the Value for a switch case, and the
+ /// case's target basic block.
+ struct Case {
+ Constant* Low;
+ Constant* High;
+ MachineBasicBlock* BB;
+
+ Case() : Low(0), High(0), BB(0) { }
+ Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
+ Low(low), High(high), BB(bb) { }
+ uint64_t size() const {
+ uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue();
+ uint64_t rLow = cast<ConstantInt>(Low)->getSExtValue();
+ return (rHigh - rLow + 1ULL);
+ }
+ };
+
+ struct CaseBits {
+ uint64_t Mask;
+ MachineBasicBlock* BB;
+ unsigned Bits;
+
+ CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
+ Mask(mask), BB(bb), Bits(bits) { }
+ };
+
+ typedef std::vector<Case> CaseVector;
+ typedef std::vector<CaseBits> CaseBitsVector;
+ typedef CaseVector::iterator CaseItr;
+ typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+ /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+ /// of conditional branches.
+ struct CaseRec {
+ CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
+ CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+ /// CaseBB - The MBB in which to emit the compare and branch
+ MachineBasicBlock *CaseBB;
+ /// LT, GE - If nonzero, we know the current case value must be less-than or
+ /// greater-than-or-equal-to these Constants.
+ Constant *LT;
+ Constant *GE;
+ /// Range - A pair of iterators representing the range of case values to be
+ /// processed at this point in the binary search tree.
+ CaseRange Range;
+ };
+
+ typedef std::vector<CaseRec> CaseRecVector;
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator () (const Case& C1, const Case& C2) {
+ assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+
+ struct CaseBitsCmp {
+ bool operator () (const CaseBits& C1, const CaseBits& C2) {
+ return C1.Bits > C2.Bits;
+ }
+ };
+
+ size_t Clusterify(CaseVector& Cases, const SwitchInst &SI);
+
+ /// CaseBlock - This structure is used to communicate between SDLowering and
+ /// SDISel for the code generation of additional basic blocks needed by multi-
+ /// case switch statements.
+ struct CaseBlock {
+ CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle,
+ MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+ MachineBasicBlock *me)
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}
+ // CC - the condition code to use for the case block's setcc node
+ ISD::CondCode CC;
+ // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
+ // Emit by default LHS op RHS. MHS is used for range comparisons:
+ // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+ Value *CmpLHS, *CmpMHS, *CmpRHS;
+ // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
+ MachineBasicBlock *TrueBB, *FalseBB;
+ // ThisBB - the block into which to emit the code for the setcc and branches
+ MachineBasicBlock *ThisBB;
+ };
+ struct JumpTable {
+ JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+ MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
+
+ /// Reg - the virtual register containing the index of the jump table entry
+ //. to jump to.
+ unsigned Reg;
+ /// JTI - the JumpTableIndex for this jump table in the function.
+ unsigned JTI;
+ /// MBB - the MBB into which to emit the code for the indirect jump.
+ MachineBasicBlock *MBB;
+ /// Default - the MBB of the default bb, which is a successor of the range
+ /// check MBB. This is when updating PHI nodes in successors.
+ MachineBasicBlock *Default;
+ };
+ struct JumpTableHeader {
+ JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H,
+ bool E = false):
+ First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
+ APInt First;
+ APInt Last;
+ Value *SValue;
+ MachineBasicBlock *HeaderBB;
+ bool Emitted;
+ };
+ typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+
+ struct BitTestCase {
+ BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
+ Mask(M), ThisBB(T), TargetBB(Tr) { }
+ uint64_t Mask;
+ MachineBasicBlock* ThisBB;
+ MachineBasicBlock* TargetBB;
+ };
+
+ typedef SmallVector<BitTestCase, 3> BitTestInfo;
+
+ struct BitTestBlock {
+ BitTestBlock(APInt F, APInt R, Value* SV,
+ unsigned Rg, bool E,
+ MachineBasicBlock* P, MachineBasicBlock* D,
+ const BitTestInfo& C):
+ First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
+ Parent(P), Default(D), Cases(C) { }
+ APInt First;
+ APInt Range;
+ Value *SValue;
+ unsigned Reg;
+ bool Emitted;
+ MachineBasicBlock *Parent;
+ MachineBasicBlock *Default;
+ BitTestInfo Cases;
+ };
+
+public:
+ // TLI - This is information that describes the available target features we
+ // need for lowering. This indicates when operations are unavailable,
+ // implemented with a libcall, etc.
+ TargetLowering &TLI;
+ SelectionDAG &DAG;
+ const TargetData *TD;
+ AliasAnalysis *AA;
+
+ /// SwitchCases - Vector of CaseBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<CaseBlock> SwitchCases;
+ /// JTCases - Vector of JumpTable structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<JumpTableBlock> JTCases;
+ /// BitTestCases - Vector of BitTestBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<BitTestBlock> BitTestCases;
+
+ std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+
+ // Emit PHI-node-operand constants only once even if used by multiple
+ // PHI nodes.
+ DenseMap<Constant*, unsigned> ConstantsOut;
+
+ /// FuncInfo - Information about the function as a whole.
+ ///
+ FunctionLoweringInfo &FuncInfo;
+
+ /// OptLevel - What optimization level we're generating code for.
+ ///
+ CodeGenOpt::Level OptLevel;
+
+ /// GFI - Garbage collection metadata for the function.
+ GCFunctionInfo *GFI;
+
+ SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
+ FunctionLoweringInfo &funcinfo,
+ CodeGenOpt::Level ol)
+ : CurDebugLoc(DebugLoc::getUnknownLoc()),
+ TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol) {
+ }
+
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+
+ /// clear - Clear out the curret SelectionDAG and the associated
+ /// state and prepare this SelectionDAGLowering object to be used
+ /// for a new block. This doesn't clear out information about
+ /// additional blocks that are needed to complete switch lowering
+ /// or PHI node updating; that information is cleared out as it is
+ /// consumed.
+ void clear();
+
+ /// getRoot - Return the current virtual root of the Selection DAG,
+ /// flushing any PendingLoad items. This must be done before emitting
+ /// a store or any other node that may need to be ordered after any
+ /// prior load instructions.
+ ///
+ SDValue getRoot();
+
+ /// getControlRoot - Similar to getRoot, but instead of flushing all the
+ /// PendingLoad items, flush all the PendingExports items. It is necessary
+ /// to do this before emitting a terminator instruction.
+ ///
+ SDValue getControlRoot();
+
+ DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
+ void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; }
+
+ void CopyValueToVirtualRegister(Value *V, unsigned Reg);
+
+ void visit(Instruction &I);
+
+ void visit(unsigned Opcode, User &I);
+
+ void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
+
+ SDValue getValue(const Value *V);
+
+ void setValue(const Value *V, SDValue NewN) {
+ SDValue &N = NodeMap[V];
+ assert(N.getNode() == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs);
+
+ void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ unsigned Opc);
+ void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB);
+ bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+ bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
+ void CopyToExportRegsIfNeeded(Value *V);
+ void ExportFromCurrentBlock(Value *V);
+ void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall,
+ MachineBasicBlock *LandingPad = NULL);
+
+private:
+ // Terminator instructions.
+ void visitRet(ReturnInst &I);
+ void visitBr(BranchInst &I);
+ void visitSwitch(SwitchInst &I);
+ void visitUnreachable(UnreachableInst &I) { /* noop */ }
+
+ // Helpers for visitSwitch
+ bool handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+public:
+ void visitSwitchCase(CaseBlock &CB);
+ void visitBitTestHeader(BitTestBlock &B);
+ void visitBitTestCase(MachineBasicBlock* NextMBB,
+ unsigned Reg,
+ BitTestCase &B);
+ void visitJumpTable(JumpTable &JT);
+ void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH);
+
+private:
+ // These all get lowered before this pass.
+ void visitInvoke(InvokeInst &I);
+ void visitUnwind(UnwindInst &I);
+
+ void visitBinary(User &I, unsigned OpCode);
+ void visitShift(User &I, unsigned Opcode);
+ void visitAdd(User &I);
+ void visitSub(User &I);
+ void visitMul(User &I);
+ void visitURem(User &I) { visitBinary(I, ISD::UREM); }
+ void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
+ void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
+ void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
+ void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
+ void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
+ void visitAnd (User &I) { visitBinary(I, ISD::AND); }
+ void visitOr (User &I) { visitBinary(I, ISD::OR); }
+ void visitXor (User &I) { visitBinary(I, ISD::XOR); }
+ void visitShl (User &I) { visitShift(I, ISD::SHL); }
+ void visitLShr(User &I) { visitShift(I, ISD::SRL); }
+ void visitAShr(User &I) { visitShift(I, ISD::SRA); }
+ void visitICmp(User &I);
+ void visitFCmp(User &I);
+ void visitVICmp(User &I);
+ void visitVFCmp(User &I);
+ // Visit the conversion instructions
+ void visitTrunc(User &I);
+ void visitZExt(User &I);
+ void visitSExt(User &I);
+ void visitFPTrunc(User &I);
+ void visitFPExt(User &I);
+ void visitFPToUI(User &I);
+ void visitFPToSI(User &I);
+ void visitUIToFP(User &I);
+ void visitSIToFP(User &I);
+ void visitPtrToInt(User &I);
+ void visitIntToPtr(User &I);
+ void visitBitCast(User &I);
+
+ void visitExtractElement(User &I);
+ void visitInsertElement(User &I);
+ void visitShuffleVector(User &I);
+
+ void visitExtractValue(ExtractValueInst &I);
+ void visitInsertValue(InsertValueInst &I);
+
+ void visitGetElementPtr(User &I);
+ void visitSelect(User &I);
+
+ void visitMalloc(MallocInst &I);
+ void visitFree(FreeInst &I);
+ void visitAlloca(AllocaInst &I);
+ void visitLoad(LoadInst &I);
+ void visitStore(StoreInst &I);
+ void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
+ void visitCall(CallInst &I);
+ void visitInlineAsm(CallSite CS);
+ const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
+ void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
+
+ void visitPow(CallInst &I);
+ void visitExp2(CallInst &I);
+ void visitExp(CallInst &I);
+ void visitLog(CallInst &I);
+ void visitLog2(CallInst &I);
+ void visitLog10(CallInst &I);
+
+ void visitVAStart(CallInst &I);
+ void visitVAArg(VAArgInst &I);
+ void visitVAEnd(CallInst &I);
+ void visitVACopy(CallInst &I);
+
+ void visitUserOp1(Instruction &I) {
+ assert(0 && "UserOp1 should not exist at instruction selection time!");
+ abort();
+ }
+ void visitUserOp2(Instruction &I) {
+ assert(0 && "UserOp2 should not exist at instruction selection time!");
+ abort();
+ }
+
+ const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
+ const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
+};
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+ MachineBasicBlock *MBB);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 0000000..9d72a12
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,1347 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuild.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Timer.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableLegalizeTypes("disable-legalize-types", cl::Hidden);
+static cl::opt<bool>
+EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
+ cl::desc("Enable verbose messages in the \"fast\" "
+ "instruction selector"));
+static cl::opt<bool>
+EnableFastISelAbort("fast-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction fails"));
+static cl::opt<bool>
+SchedLiveInCopies("schedule-livein-copies",
+ cl::desc("Schedule copies of livein registers"),
+ cl::init(false));
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the first "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the second "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the post legalize types"
+ " dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+ cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+ cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false,
+ ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
+ ViewDAGCombine2 = false,
+ ViewDAGCombineLT = false,
+ ViewISelDAGs = false, ViewSchedDAGs = false,
+ ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+ RegisterPassParser<RegisterScheduler> >
+ISHeuristic("pre-RA-sched",
+ cl::init(&createDefaultScheduler),
+ cl::desc("Instruction schedulers available (before register"
+ " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+ createDefaultScheduler);
+
+namespace llvm {
+ //===--------------------------------------------------------------------===//
+ /// createDefaultScheduler - This creates an instruction scheduler appropriate
+ /// for the target.
+ ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetLowering &TLI = IS->getTargetLowering();
+
+ if (OptLevel == CodeGenOpt::None)
+ return createFastDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency)
+ return createTDListDAGScheduler(IS, OptLevel);
+ assert(TLI.getSchedulingPreference() ==
+ TargetLowering::SchedulingForRegPressure && "Unknown sched type!");
+ return createBURRListDAGScheduler(IS, OptLevel);
+ }
+}
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomDAGSchedInserter' flag. These
+// instructions are special in various ways, which require special support to
+// insert. The specified MachineInstr is created but not inserted into any
+// basic blocks, and the scheduler passes ownership of it to this method.
+MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ cerr << "If a target marks an instruction with "
+ << "'usesCustomDAGSchedInserter', it must implement "
+ << "TargetLowering::EmitInstrWithCustomInserter!\n";
+ abort();
+ return 0;
+}
+
+/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
+/// physical register has only a single copy use, then coalesced the copy
+/// if possible.
+static void EmitLiveInCopy(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &InsertPos,
+ unsigned VirtReg, unsigned PhysReg,
+ const TargetRegisterClass *RC,
+ DenseMap<MachineInstr*, unsigned> &CopyRegMap,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ unsigned NumUses = 0;
+ MachineInstr *UseMI = NULL;
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
+ UE = MRI.use_end(); UI != UE; ++UI) {
+ UseMI = &*UI;
+ if (++NumUses > 1)
+ break;
+ }
+
+ // If the number of uses is not one, or the use is not a move instruction,
+ // don't coalesce. Also, only coalesce away a virtual register to virtual
+ // register copy.
+ bool Coalesced = false;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (NumUses == 1 &&
+ TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ VirtReg = DstReg;
+ Coalesced = true;
+ }
+
+ // Now find an ideal location to insert the copy.
+ MachineBasicBlock::iterator Pos = InsertPos;
+ while (Pos != MBB->begin()) {
+ MachineInstr *PrevMI = prior(Pos);
+ DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
+ // copyRegToReg might emit multiple instructions to do a copy.
+ unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
+ if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
+ // This is what the BB looks like right now:
+ // r1024 = mov r0
+ // ...
+ // r1 = mov r1024
+ //
+ // We want to insert "r1025 = mov r1". Inserting this copy below the
+ // move to r1024 makes it impossible for that move to be coalesced.
+ //
+ // r1025 = mov r1
+ // r1024 = mov r0
+ // ...
+ // r1 = mov 1024
+ // r2 = mov 1025
+ break; // Woot! Found a good location.
+ --Pos;
+ }
+
+ TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
+ CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
+ if (Coalesced) {
+ if (&*InsertPos == UseMI) ++InsertPos;
+ MBB->erase(UseMI);
+ }
+}
+
+/// EmitLiveInCopies - If this is the first basic block in the function,
+/// and if it has live ins that need to be copied into vregs, emit the
+/// copies into the block.
+static void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ if (SchedLiveInCopies) {
+ // Emit the copies at a heuristically-determined location in the block.
+ DenseMap<MachineInstr*, unsigned> CopyRegMap;
+ MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ E = MRI.livein_end(); LI != E; ++LI)
+ if (LI->second) {
+ const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
+ EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
+ RC, CopyRegMap, MRI, TRI, TII);
+ }
+ } else {
+ // Emit the copies into the top of the block.
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ E = MRI.livein_end(); LI != E; ++LI)
+ if (LI->second) {
+ const TargetRegisterClass *RC = MRI.getRegClass(LI->second);
+ TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
+ LI->second, LI->first, RC, RC);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :
+ FunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
+ FuncInfo(new FunctionLoweringInfo(TLI)),
+ CurDAG(new SelectionDAG(TLI, *FuncInfo)),
+ SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)),
+ GFI(),
+ OptLevel(OL),
+ DAGSize(0)
+{}
+
+SelectionDAGISel::~SelectionDAGISel() {
+ delete SDL;
+ delete CurDAG;
+ delete FuncInfo;
+}
+
+unsigned SelectionDAGISel::MakeReg(MVT VT) {
+ return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<GCModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ AU.setPreservesAll();
+}
+
+bool SelectionDAGISel::runOnFunction(Function &Fn) {
+ // Do some sanity-checking on the command-line options.
+ assert((!EnableFastISelVerbose || EnableFastISel) &&
+ "-fast-isel-verbose requires -fast-isel");
+ assert((!EnableFastISelAbort || EnableFastISel) &&
+ "-fast-isel-abort requires -fast-isel");
+
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (Fn.hasAvailableExternallyLinkage())
+ return false;
+
+
+ // Get alias analysis for load/store combining.
+ AA = &getAnalysis<AliasAnalysis>();
+
+ TargetMachine &TM = TLI.getTargetMachine();
+ MF = &MachineFunction::construct(&Fn, TM);
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+ if (MF->getFunction()->hasGC())
+ GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF->getFunction());
+ else
+ GFI = 0;
+ RegInfo = &MF->getRegInfo();
+ DOUT << "\n\n\n=== " << Fn.getName() << "\n";
+
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+ CurDAG->init(*MF, MMI, DW);
+ FuncInfo->set(Fn, *MF, *CurDAG, EnableFastISel);
+ SDL->init(GFI, *AA);
+
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
+ // Mark landing pad.
+ FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+
+ SelectAllBasicBlocks(Fn, *MF, MMI, DW, TII);
+
+ // If the first basic block in the function has live ins that need to be
+ // copied into vregs, emit the copies into the top of the block before
+ // emitting the code for the block.
+ EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII);
+
+ // Add function live-ins to entry block live-in set.
+ for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(),
+ E = RegInfo->livein_end(); I != E; ++I)
+ MF->begin()->addLiveIn(I->first);
+
+#ifndef NDEBUG
+ assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() &&
+ "Not all catch info was assigned to a landing pad!");
+#endif
+
+ FuncInfo->clear();
+
+ return true;
+}
+
+static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+ MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+ for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
+ if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+ // Apply the catch info to DestBB.
+ AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
+#ifndef NDEBUG
+ if (!FLI.MBBMap[SrcBB]->isLandingPad())
+ FLI.CatchInfoFound.insert(EHSel);
+#endif
+ }
+}
+
+/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and
+/// whether object offset >= 0.
+static bool
+IsFixedFrameObjectWithPosOffset(MachineFrameInfo *MFI, SDValue Op) {
+ if (!isa<FrameIndexSDNode>(Op)) return false;
+
+ FrameIndexSDNode * FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op);
+ int FrameIdx = FrameIdxNode->getIndex();
+ return MFI->isFixedObjectIndex(FrameIdx) &&
+ MFI->getObjectOffset(FrameIdx) >= 0;
+}
+
+/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could
+/// possibly be overwritten when lowering the outgoing arguments in a tail
+/// call. Currently the implementation of this call is very conservative and
+/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with
+/// virtual registers would be overwritten by direct lowering.
+static bool IsPossiblyOverwrittenArgumentOfTailCall(SDValue Op,
+ MachineFrameInfo *MFI) {
+ RegisterSDNode * OpReg = NULL;
+ if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
+ (Op.getOpcode()== ISD::CopyFromReg &&
+ (OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) &&
+ (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) ||
+ (Op.getOpcode() == ISD::LOAD &&
+ IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) ||
+ (Op.getOpcode() == ISD::MERGE_VALUES &&
+ Op.getOperand(Op.getResNo()).getOpcode() == ISD::LOAD &&
+ IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.getResNo()).
+ getOperand(1))))
+ return true;
+ return false;
+}
+
+/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the
+/// DAG and fixes their tailcall attribute operand.
+static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG,
+ const TargetLowering& TLI) {
+ SDNode * Ret = NULL;
+ SDValue Terminator = DAG.getRoot();
+
+ // Find RET node.
+ if (Terminator.getOpcode() == ISD::RET) {
+ Ret = Terminator.getNode();
+ }
+
+ // Fix tail call attribute of CALL nodes.
+ for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(),
+ BI = DAG.allnodes_end(); BI != BE; ) {
+ --BI;
+ if (CallSDNode *TheCall = dyn_cast<CallSDNode>(BI)) {
+ SDValue OpRet(Ret, 0);
+ SDValue OpCall(BI, 0);
+ bool isMarkedTailCall = TheCall->isTailCall();
+ // If CALL node has tail call attribute set to true and the call is not
+ // eligible (no RET or the target rejects) the attribute is fixed to
+ // false. The TargetLowering::IsEligibleForTailCallOptimization function
+ // must correctly identify tail call optimizable calls.
+ if (!isMarkedTailCall) continue;
+ if (Ret==NULL ||
+ !TLI.IsEligibleForTailCallOptimization(TheCall, OpRet, DAG)) {
+ // Not eligible. Mark CALL node as non tail call. Note that we
+ // can modify the call node in place since calls are not CSE'd.
+ TheCall->setNotTailCall();
+ } else {
+ // Look for tail call clobbered arguments. Emit a series of
+ // copyto/copyfrom virtual register nodes to protect them.
+ SmallVector<SDValue, 32> Ops;
+ SDValue Chain = TheCall->getChain(), InFlag;
+ Ops.push_back(Chain);
+ Ops.push_back(TheCall->getCallee());
+ for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) {
+ SDValue Arg = TheCall->getArg(i);
+ bool isByVal = TheCall->getArgFlags(i).isByVal();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (!isByVal &&
+ IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) {
+ MVT VT = Arg.getValueType();
+ unsigned VReg = MF.getRegInfo().
+ createVirtualRegister(TLI.getRegClassFor(VT));
+ Chain = DAG.getCopyToReg(Chain, Arg.getDebugLoc(),
+ VReg, Arg, InFlag);
+ InFlag = Chain.getValue(1);
+ Arg = DAG.getCopyFromReg(Chain, Arg.getDebugLoc(),
+ VReg, VT, InFlag);
+ Chain = Arg.getValue(1);
+ InFlag = Arg.getValue(2);
+ }
+ Ops.push_back(Arg);
+ Ops.push_back(TheCall->getArgFlagsVal(i));
+ }
+ // Link in chain of CopyTo/CopyFromReg.
+ Ops[0] = Chain;
+ DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size());
+ }
+ }
+ }
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
+ BasicBlock::iterator Begin,
+ BasicBlock::iterator End) {
+ SDL->setCurrentBasicBlock(BB);
+
+ // Lower all of the non-terminator instructions.
+ for (BasicBlock::iterator I = Begin; I != End; ++I)
+ if (!isa<TerminatorInst>(I))
+ SDL->visit(*I);
+
+ // Ensure that all instructions which are used outside of their defining
+ // blocks are available as virtual registers. Invoke is handled elsewhere.
+ for (BasicBlock::iterator I = Begin; I != End; ++I)
+ if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
+ SDL->CopyToExportRegsIfNeeded(I);
+
+ // Handle PHI nodes in successor blocks.
+ if (End == LLVMBB->end()) {
+ HandlePHINodesInSuccessorBlocks(LLVMBB);
+
+ // Lower the terminator after the copies are emitted.
+ SDL->visit(*LLVMBB->getTerminator());
+ }
+
+ // Make sure the root of the DAG is up-to-date.
+ CurDAG->setRoot(SDL->getControlRoot());
+
+ // Check whether calls in this block are real tail calls. Fix up CALL nodes
+ // with correct tailcall attribute so that the target can rely on the tailcall
+ // attribute indicating whether the call is really eligible for tail call
+ // optimization.
+ if (PerformTailCallOpt)
+ CheckDAGForTailCallsAndFixThem(*CurDAG, TLI);
+
+ // Final step, emit the lowered DAG as machine code.
+ CodeGenAndEmitDAG();
+ SDL->clear();
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+ SmallPtrSet<SDNode*, 128> VisitedNodes;
+ SmallVector<SDNode*, 128> Worklist;
+
+ Worklist.push_back(CurDAG->getRoot().getNode());
+
+ APInt Mask;
+ APInt KnownZero;
+ APInt KnownOne;
+
+ while (!Worklist.empty()) {
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+
+ // If we've already seen this node, ignore it.
+ if (!VisitedNodes.insert(N))
+ continue;
+
+ // Otherwise, add all chain operands to the worklist.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ Worklist.push_back(N->getOperand(i).getNode());
+
+ // If this is a CopyToReg with a vreg dest, process it.
+ if (N->getOpcode() != ISD::CopyToReg)
+ continue;
+
+ unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // Ignore non-scalar or non-integer values.
+ SDValue Src = N->getOperand(2);
+ MVT SrcVT = Src.getValueType();
+ if (!SrcVT.isInteger() || SrcVT.isVector())
+ continue;
+
+ unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+ Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
+ CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+
+ // Only install this information if it tells us something.
+ if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
+ DestReg -= TargetRegisterInfo::FirstVirtualRegister;
+ FunctionLoweringInfo &FLI = CurDAG->getFunctionLoweringInfo();
+ if (DestReg >= FLI.LiveOutRegInfo.size())
+ FLI.LiveOutRegInfo.resize(DestReg+1);
+ FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[DestReg];
+ LOI.NumSignBits = NumSignBits;
+ LOI.KnownOne = KnownOne;
+ LOI.KnownZero = KnownZero;
+ }
+ }
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+ std::string GroupName;
+ if (TimePassesIsEnabled)
+ GroupName = "Instruction Selection and Scheduling";
+ std::string BlockName;
+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
+ ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+ ViewSUnitDAGs)
+ BlockName = CurDAG->getMachineFunction().getFunction()->getName() + ':' +
+ BB->getBasicBlock()->getName();
+
+ DOUT << "Initial selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+ // Run the DAG combiner in pre-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining 1", GroupName);
+ CurDAG->Combine(Unrestricted, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(Unrestricted, *AA, OptLevel);
+ }
+
+ DOUT << "Optimized lowered selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ // Second step, hack on the DAG until it only uses operations and types that
+ // the target supports.
+ if (!DisableLegalizeTypes) {
+ if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
+ BlockName);
+
+ bool Changed;
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Type Legalization", GroupName);
+ Changed = CurDAG->LegalizeTypes();
+ } else {
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+ DOUT << "Type-legalized selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ if (Changed) {
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining after legalize types", GroupName);
+ CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+ }
+
+ DOUT << "Optimized type-legalized selection DAG:\n";
+ DEBUG(CurDAG->dump());
+ }
+
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Vector Legalization", GroupName);
+ Changed = CurDAG->LegalizeVectors();
+ } else {
+ Changed = CurDAG->LegalizeVectors();
+ }
+
+ if (Changed) {
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Type Legalization 2", GroupName);
+ Changed = CurDAG->LegalizeTypes();
+ } else {
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining after legalize vectors", GroupName);
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ }
+
+ DOUT << "Optimized vector-legalized selection DAG:\n";
+ DEBUG(CurDAG->dump());
+ }
+ }
+
+ if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
+
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Legalization", GroupName);
+ CurDAG->Legalize(DisableLegalizeTypes, OptLevel);
+ } else {
+ CurDAG->Legalize(DisableLegalizeTypes, OptLevel);
+ }
+
+ DOUT << "Legalized selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+ // Run the DAG combiner in post-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining 2", GroupName);
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ }
+
+ DOUT << "Optimized legalized selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+
+ if (OptLevel != CodeGenOpt::None)
+ ComputeLiveOutVRegInfo();
+
+ // Third, instruction select all of the operations to machine code, adding the
+ // code to the MachineBasicBlock.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Selection", GroupName);
+ InstructionSelect();
+ } else {
+ InstructionSelect();
+ }
+
+ DOUT << "Selected selection DAG:\n";
+ DEBUG(CurDAG->dump());
+
+ if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
+
+ // Schedule machine code.
+ ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Scheduling", GroupName);
+ Scheduler->Run(CurDAG, BB, BB->end());
+ } else {
+ Scheduler->Run(CurDAG, BB, BB->end());
+ }
+
+ if (ViewSUnitDAGs) Scheduler->viewGraph();
+
+ // Emit machine code to BB. This can change 'BB' to the last block being
+ // inserted into.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Creation", GroupName);
+ BB = Scheduler->EmitSchedule();
+ } else {
+ BB = Scheduler->EmitSchedule();
+ }
+
+ // Free the scheduler state.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName);
+ delete Scheduler;
+ } else {
+ delete Scheduler;
+ }
+
+ DOUT << "Selected machine code:\n";
+ DEBUG(BB->dump());
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
+ MachineFunction &MF,
+ MachineModuleInfo *MMI,
+ DwarfWriter *DW,
+ const TargetInstrInfo &TII) {
+ // Initialize the Fast-ISel state, if needed.
+ FastISel *FastIS = 0;
+ if (EnableFastISel)
+ FastIS = TLI.createFastISel(MF, MMI, DW,
+ FuncInfo->ValueMap,
+ FuncInfo->MBBMap,
+ FuncInfo->StaticAllocaMap
+#ifndef NDEBUG
+ , FuncInfo->CatchInfoLost
+#endif
+ );
+
+ // Iterate over all basic blocks in the function.
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ BasicBlock *LLVMBB = &*I;
+ BB = FuncInfo->MBBMap[LLVMBB];
+
+ BasicBlock::iterator const Begin = LLVMBB->begin();
+ BasicBlock::iterator const End = LLVMBB->end();
+ BasicBlock::iterator BI = Begin;
+
+ // Lower any arguments needed in this block if this is the entry block.
+ bool SuppressFastISel = false;
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ LowerArguments(LLVMBB);
+
+ // If any of the arguments has the byval attribute, forgo
+ // fast-isel in the entry block.
+ if (FastIS) {
+ unsigned j = 1;
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
+ I != E; ++I, ++j)
+ if (Fn.paramHasAttr(j, Attribute::ByVal)) {
+ if (EnableFastISelVerbose || EnableFastISelAbort)
+ cerr << "FastISel skips entry block due to byval argument\n";
+ SuppressFastISel = true;
+ break;
+ }
+ }
+ }
+
+ if (MMI && BB->isLandingPad()) {
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ unsigned LabelID = MMI->addLandingPad(BB);
+
+ const TargetInstrDesc &II = TII.get(TargetInstrInfo::EH_LABEL);
+ BuildMI(BB, SDL->getCurDebugLoc(), II).addImm(LabelID);
+
+ // Mark exception register as live in.
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ if (Reg) BB->addLiveIn(Reg);
+
+ // Mark exception selector register as live in.
+ Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) BB->addLiveIn(Reg);
+
+ // FIXME: Hack around an exception handling flaw (PR1508): the personality
+ // function and list of typeids logically belong to the invoke (or, if you
+ // like, the basic block containing the invoke), and need to be associated
+ // with it in the dwarf exception handling tables. Currently however the
+ // information is provided by an intrinsic (eh.selector) that can be moved
+ // to unexpected places by the optimizers: if the unwind edge is critical,
+ // then breaking it can result in the intrinsics being in the successor of
+ // the landing pad, not the landing pad itself. This results in exceptions
+ // not being caught because no typeids are associated with the invoke.
+ // This may not be the only way things can go wrong, but it is the only way
+ // we try to work around for the moment.
+ BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+ if (Br && Br->isUnconditional()) { // Critical edge?
+ BasicBlock::iterator I, E;
+ for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+ if (isa<EHSelectorInst>(I))
+ break;
+
+ if (I == E)
+ // No catch info found - try to extract some from the successor.
+ copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo);
+ }
+ }
+
+ // Before doing SelectionDAG ISel, see if FastISel has been requested.
+ if (FastIS && !SuppressFastISel) {
+ // Emit code for any incoming arguments. This must happen before
+ // beginning FastISel on the entry block.
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ CurDAG->setRoot(SDL->getControlRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+ }
+ FastIS->startNewBlock(BB);
+ // Do FastISel on as many instructions as possible.
+ for (; BI != End; ++BI) {
+ // Just before the terminator instruction, insert instructions to
+ // feed PHI nodes in successor blocks.
+ if (isa<TerminatorInst>(BI))
+ if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ cerr << "FastISel miss: ";
+ BI->dump();
+ }
+ if (EnableFastISelAbort)
+ assert(0 && "FastISel didn't handle a PHI in a successor");
+ break;
+ }
+
+ // First try normal tablegen-generated "fast" selection.
+ if (FastIS->SelectInstruction(BI))
+ continue;
+
+ // Next, try calling the target to attempt to handle the instruction.
+ if (FastIS->TargetSelectInstruction(BI))
+ continue;
+
+ // Then handle certain instructions as single-LLVM-Instruction blocks.
+ if (isa<CallInst>(BI)) {
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ cerr << "FastISel missed call: ";
+ BI->dump();
+ }
+
+ if (BI->getType() != Type::VoidTy) {
+ unsigned &R = FuncInfo->ValueMap[BI];
+ if (!R)
+ R = FuncInfo->CreateRegForValue(BI);
+ }
+
+ SDL->setCurDebugLoc(FastIS->getCurDebugLoc());
+ SelectBasicBlock(LLVMBB, BI, next(BI));
+ // If the instruction was codegen'd with multiple blocks,
+ // inform the FastISel object where to resume inserting.
+ FastIS->setCurrentBlock(BB);
+ continue;
+ }
+
+ // Otherwise, give up on FastISel for the rest of the block.
+ // For now, be a little lenient about non-branch terminators.
+ if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ cerr << "FastISel miss: ";
+ BI->dump();
+ }
+ if (EnableFastISelAbort)
+ // The "fast" selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ assert(0 && "FastISel didn't select the entire block");
+ }
+ break;
+ }
+ }
+
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ if (BI != End) {
+ // If FastISel is run and it has known DebugLoc then use it.
+ if (FastIS && !FastIS->getCurDebugLoc().isUnknown())
+ SDL->setCurDebugLoc(FastIS->getCurDebugLoc());
+ SelectBasicBlock(LLVMBB, BI, End);
+ }
+
+ FinishBasicBlock();
+ }
+
+ delete FastIS;
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+
+ DOUT << "Target-post-processed machine code:\n";
+ DEBUG(BB->dump());
+
+ DOUT << "Total amount of phi nodes to update: "
+ << SDL->PHINodesToUpdate.size() << "\n";
+ DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i)
+ DOUT << "Node " << i << " : (" << SDL->PHINodesToUpdate[i].first
+ << ", " << SDL->PHINodesToUpdate[i].second << ")\n";);
+
+ // Next, now that we know what the last MBB the LLVM BB expanded is, update
+ // PHI nodes in successors.
+ if (SDL->SwitchCases.empty() &&
+ SDL->JTCases.empty() &&
+ SDL->BitTestCases.empty()) {
+ for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstr *PHI = SDL->PHINodesToUpdate[i].first;
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(BB));
+ }
+ SDL->PHINodesToUpdate.clear();
+ return;
+ }
+
+ for (unsigned i = 0, e = SDL->BitTestCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDL->BitTestCases[i].Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDL->BitTestCases[i].Parent;
+ SDL->setCurrentBasicBlock(BB);
+ // Emit the code
+ SDL->visitBitTestHeader(SDL->BitTestCases[i]);
+ CurDAG->setRoot(SDL->getRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+ }
+
+ for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDL->BitTestCases[i].Cases[j].ThisBB;
+ SDL->setCurrentBasicBlock(BB);
+ // Emit the code
+ if (j+1 != ej)
+ SDL->visitBitTestCase(SDL->BitTestCases[i].Cases[j+1].ThisBB,
+ SDL->BitTestCases[i].Reg,
+ SDL->BitTestCases[i].Cases[j]);
+ else
+ SDL->visitBitTestCase(SDL->BitTestCases[i].Default,
+ SDL->BitTestCases[i].Reg,
+ SDL->BitTestCases[i].Cases[j]);
+
+
+ CurDAG->setRoot(SDL->getRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+ }
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {
+ MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ // This is "default" BB. We have two jumps to it. From "header" BB and
+ // from last "case" BB.
+ if (PHIBB == SDL->BitTestCases[i].Default) {
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Parent));
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Cases.
+ back().ThisBB));
+ }
+ // One of "cases" BB.
+ for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size();
+ j != ej; ++j) {
+ MachineBasicBlock* cBB = SDL->BitTestCases[i].Cases[j].ThisBB;
+ if (cBB->succ_end() !=
+ std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) {
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(cBB));
+ }
+ }
+ }
+ }
+ SDL->BitTestCases.clear();
+
+ // If the JumpTable record is filled in, then we need to emit a jump table.
+ // Updating the PHI nodes is tricky in this case, since we need to determine
+ // whether the PHI is a successor of the range check MBB or the jump table MBB
+ for (unsigned i = 0, e = SDL->JTCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDL->JTCases[i].first.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDL->JTCases[i].first.HeaderBB;
+ SDL->setCurrentBasicBlock(BB);
+ // Emit the code
+ SDL->visitJumpTableHeader(SDL->JTCases[i].second, SDL->JTCases[i].first);
+ CurDAG->setRoot(SDL->getRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+ }
+
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDL->JTCases[i].second.MBB;
+ SDL->setCurrentBasicBlock(BB);
+ // Emit the code
+ SDL->visitJumpTable(SDL->JTCases[i].second);
+ CurDAG->setRoot(SDL->getRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {
+ MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ // "default" BB. We can go there only from header BB.
+ if (PHIBB == SDL->JTCases[i].second.Default) {
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB));
+ }
+ // JT BB. Just iterate over successors here
+ if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(BB));
+ }
+ }
+ }
+ SDL->JTCases.clear();
+
+ // If the switch block involved a branch to one of the actual successors, we
+ // need to update PHI nodes in that block.
+ for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstr *PHI = SDL->PHINodesToUpdate[i].first;
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ if (BB->isSuccessor(PHI->getParent())) {
+ PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second,
+ false));
+ PHI->addOperand(MachineOperand::CreateMBB(BB));
+ }
+ }
+
+ // If we generated any switch lowering information, build and codegen any
+ // additional DAGs necessary.
+ for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SDL->SwitchCases[i].ThisBB;
+ SDL->setCurrentBasicBlock(BB);
+
+ // Emit the code
+ SDL->visitSwitchCase(SDL->SwitchCases[i]);
+ CurDAG->setRoot(SDL->getRoot());
+ CodeGenAndEmitDAG();
+ SDL->clear();
+
+ // Handle any PHI nodes in successors of this chunk, as if we were coming
+ // from the original BB before switch expansion. Note that PHI nodes can
+ // occur multiple times in PHINodesToUpdate. We have to be very careful to
+ // handle them the right number of times.
+ while ((BB = SDL->SwitchCases[i].TrueBB)) { // Handle LHS and RHS.
+ for (MachineBasicBlock::iterator Phi = BB->begin();
+ Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){
+ // This value for this PHI node is recorded in PHINodesToUpdate, get it.
+ for (unsigned pn = 0; ; ++pn) {
+ assert(pn != SDL->PHINodesToUpdate.size() &&
+ "Didn't find PHI entry!");
+ if (SDL->PHINodesToUpdate[pn].first == Phi) {
+ Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn].
+ second, false));
+ Phi->addOperand(MachineOperand::CreateMBB(SDL->SwitchCases[i].ThisBB));
+ break;
+ }
+ }
+ }
+
+ // Don't process RHS if same block as LHS.
+ if (BB == SDL->SwitchCases[i].FalseBB)
+ SDL->SwitchCases[i].FalseBB = 0;
+
+ // If we haven't handled the RHS, do so now. Otherwise, we're done.
+ SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB;
+ SDL->SwitchCases[i].FalseBB = 0;
+ }
+ assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0);
+ }
+ SDL->SwitchCases.clear();
+
+ SDL->PHINodesToUpdate.clear();
+}
+
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+ RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+
+ if (!Ctor) {
+ Ctor = ISHeuristic;
+ RegisterScheduler::setDefault(Ctor);
+ }
+
+ return Ctor(this, OptLevel);
+}
+
+ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
+ return new ScheduleHazardRecognizer();
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+ if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+
+ APInt KnownZero, KnownOne;
+ CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+
+ // If all the missing bits in the or are already known to be set, match!
+ if ((NeededMask & KnownOne) == NeededMask)
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen. Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
+ std::vector<SDValue> InOps;
+ std::swap(InOps, Ops);
+
+ Ops.push_back(InOps[0]); // input chain.
+ Ops.push_back(InOps[1]); // input asm string.
+
+ unsigned i = 2, e = InOps.size();
+ if (InOps[e-1].getValueType() == MVT::Flag)
+ --e; // Don't process a flag operand if it is here.
+
+ while (i != e) {
+ unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+ if ((Flags & 7) != 4 /*MEM*/) {
+ // Just skip over this operand, copying the operands verbatim.
+ Ops.insert(Ops.end(), InOps.begin()+i,
+ InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+ i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+ } else {
+ assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+ "Memory operand with multiple values?");
+ // Otherwise, this is a memory operand. Ask the target to select it.
+ std::vector<SDValue> SelOps;
+ if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) {
+ cerr << "Could not match memory address. Inline asm failure!\n";
+ exit(1);
+ }
+
+ // Add this to the output node.
+ MVT IntPtrTy = CurDAG->getTargetLoweringInfo().getPointerTy();
+ Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3),
+ IntPtrTy));
+ Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+ i += 2;
+ }
+ }
+
+ // Add the flag input back if present.
+ if (e != InOps.size())
+ Ops.push_back(InOps.back());
+}
+
+/// findFlagUse - Return use of MVT::Flag value produced by the specified
+/// SDNode.
+///
+static SDNode *findFlagUse(SDNode *N) {
+ unsigned FlagResNo = N->getNumValues()-1;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDUse &Use = I.getUse();
+ if (Use.getResNo() == FlagResNo)
+ return Use.getUser();
+ }
+ return NULL;
+}
+
+/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
+/// This function recursively traverses up the operand chain, ignoring
+/// certain nodes.
+static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
+ SDNode *Root,
+ SmallPtrSet<SDNode*, 16> &Visited) {
+ if (Use->getNodeId() < Def->getNodeId() ||
+ !Visited.insert(Use))
+ return false;
+
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ SDNode *N = Use->getOperand(i).getNode();
+ if (N == Def) {
+ if (Use == ImmedUse || Use == Root)
+ continue; // We are not looking for immediate use.
+ assert(N != Root);
+ return true;
+ }
+
+ // Traverse up the operand chain.
+ if (findNonImmUse(N, Def, ImmedUse, Root, Visited))
+ return true;
+ }
+ return false;
+}
+
+/// isNonImmUse - Start searching from Root up the DAG to check is Def can
+/// be reached. Return true if that's the case. However, ignore direct uses
+/// by ImmedUse (which would be U in the example illustrated in
+/// IsLegalAndProfitableToFold) and by Root (which can happen in the store
+/// case).
+/// FIXME: to be really generic, we should allow direct use by any node
+/// that is being folded. But realisticly since we only fold loads which
+/// have one non-chain use, we only need to watch out for load/op/store
+/// and load/op/cmp case where the root (store / cmp) may reach the load via
+/// its chain operand.
+static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
+ SmallPtrSet<SDNode*, 16> Visited;
+ return findNonImmUse(Root, Def, ImmedUse, Root, Visited);
+}
+
+/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root and
+/// folding N is profitable.
+bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ // If Root use can somehow reach N through a path that that doesn't contain
+ // U then folding N would create a cycle. e.g. In the following
+ // diagram, Root can reach N through X. If N is folded into into Root, then
+ // X is both a predecessor and a successor of U.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ / //
+ // \ / //
+ // [Root*] //
+ //
+ // * indicates nodes to be folded together.
+ //
+ // If Root produces a flag, then it gets (even more) interesting. Since it
+ // will be "glued" together with its flag use in the scheduler, we need to
+ // check if it might reach N.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ \ //
+ // \ | //
+ // [Root*] | //
+ // ^ | //
+ // f | //
+ // | / //
+ // [Y] / //
+ // ^ / //
+ // f / //
+ // | / //
+ // [FU] //
+ //
+ // If FU (flag use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of FU and a successor of
+ // Fold. But since Fold and FU are flagged together, this will create
+ // a cycle in the scheduling graph.
+
+ MVT VT = Root->getValueType(Root->getNumValues()-1);
+ while (VT == MVT::Flag) {
+ SDNode *FU = findFlagUse(Root);
+ if (FU == NULL)
+ break;
+ Root = FU;
+ VT = Root->getValueType(Root->getNumValues()-1);
+ }
+
+ return !isNonImmUse(Root, N, U);
+}
+
+
+char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 0000000..3eec684
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,416 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+ static bool hasEdgeDestLabels() {
+ return true;
+ }
+
+ static unsigned numEdgeDestLabels(const void *Node) {
+ return ((const SDNode *) Node)->getNumValues();
+ }
+
+ static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+ return ((const SDNode *) Node)->getValueType(i).getMVTString();
+ }
+
+ /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+ /// should actually target another edge source, not a node. If this method is
+ /// implemented, getEdgeTarget should be implemented.
+ template<typename EdgeIter>
+ static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+ return true;
+ }
+
+ /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+ /// called to determine which outgoing edge of Node is the target of this
+ /// edge.
+ template<typename EdgeIter>
+ static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+ SDNode *TargetNode = *I;
+ SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+ std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+ return NI;
+ }
+
+ static std::string getGraphName(const SelectionDAG *G) {
+ return G->getMachineFunction().getFunction()->getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool hasNodeAddressLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ template<typename EdgeIter>
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+ SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+ MVT VT = Op.getValueType();
+ if (VT == MVT::Flag)
+ return "color=red,style=bold";
+ else if (VT == MVT::Other)
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getNodeLabel(const SDNode *Node,
+ const SelectionDAG *Graph);
+ static std::string getNodeAttributes(const SDNode *N,
+ const SelectionDAG *Graph) {
+#ifndef NDEBUG
+ const std::string &Attrs = Graph->getGraphAttrs(N);
+ if (!Attrs.empty()) {
+ if (Attrs.find("shape=") == std::string::npos)
+ return std::string("shape=Mrecord,") + Attrs;
+ else
+ return Attrs;
+ }
+#endif
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(SelectionDAG *G,
+ GraphWriter<SelectionDAG*> &GW) {
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ if (G->getRoot().getNode())
+ GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+ "color=blue,style=dashed");
+ }
+ };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ std::string Op = Node->getOperationName(G);
+
+ if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Node)) {
+ Op += ": " + utostr(CSDN->getZExtValue());
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(Node)) {
+ Op += ": " + ftostr(CSDN->getValueAPF());
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(Node)) {
+ Op += ": " + GADN->getGlobal()->getName();
+ if (int64_t Offset = GADN->getOffset()) {
+ if (Offset > 0)
+ Op += "+" + itostr(Offset);
+ else
+ Op += itostr(Offset);
+ }
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(Node)) {
+ Op += " " + itostr(FIDN->getIndex());
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(Node)) {
+ Op += " " + itostr(JTDN->getIndex());
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Node)){
+ if (CP->isMachineConstantPoolEntry()) {
+ Op += '<';
+ {
+ raw_string_ostream OSS(Op);
+ OSS << *CP->getMachineCPVal();
+ }
+ Op += '>';
+ } else {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ Op += "<" + ftostr(CFP->getValueAPF()) + ">";
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
+ Op += "<" + utostr(CI->getZExtValue()) + ">";
+ else {
+ Op += '<';
+ {
+ raw_string_ostream OSS(Op);
+ WriteAsOperand(OSS, CP->getConstVal(), false);
+ }
+ Op += '>';
+ }
+ }
+ Op += " A=" + itostr(CP->getAlignment());
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) {
+ Op = "BB: ";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ Op += LBB->getName();
+ //Op += " " + (const void*)BBDN->getBasicBlock();
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node)) {
+ if (G && R->getReg() != 0 &&
+ TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ Op = Op + " " +
+ G->getTarget().getRegisterInfo()->getName(R->getReg());
+ } else {
+ Op += " #" + utostr(R->getReg());
+ }
+ } else if (const DbgStopPointSDNode *D = dyn_cast<DbgStopPointSDNode>(Node)) {
+ DICompileUnit CU(cast<GlobalVariable>(D->getCompileUnit()));
+ std::string FN;
+ Op += ": " + CU.getFilename(FN);
+ Op += ":" + utostr(D->getLine());
+ if (D->getColumn() != 0)
+ Op += ":" + utostr(D->getColumn());
+ } else if (const LabelSDNode *L = dyn_cast<LabelSDNode>(Node)) {
+ Op += ": LabelID=" + utostr(L->getLabelID());
+ } else if (const CallSDNode *C = dyn_cast<CallSDNode>(Node)) {
+ Op += ": CallingConv=" + utostr(C->getCallingConv());
+ if (C->isVarArg())
+ Op += ", isVarArg";
+ if (C->isTailCall())
+ Op += ", isTailCall";
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(Node)) {
+ Op += "'" + std::string(ES->getSymbol()) + "'";
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(Node)) {
+ if (M->getValue())
+ Op += "<" + M->getValue()->getName() + ">";
+ else
+ Op += "<null>";
+ } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(Node)) {
+ const Value *V = M->MO.getValue();
+ Op += '<';
+ if (!V) {
+ Op += "(unknown)";
+ } else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ // PseudoSourceValues don't have names, so use their print method.
+ raw_string_ostream OSS(Op);
+ PSV->print(OSS);
+ } else {
+ Op += V->getName();
+ }
+ Op += '+' + itostr(M->MO.getOffset()) + '>';
+ } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(Node)) {
+ Op = Op + " AF=" + N->getArgFlags().getArgFlagsString();
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(Node)) {
+ Op = Op + " VT=" + N->getVT().getMVTString();
+ } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node)) {
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD:
+ Op = Op + "<anyext ";
+ break;
+ case ISD::SEXTLOAD:
+ Op = Op + " <sext ";
+ break;
+ case ISD::ZEXTLOAD:
+ Op = Op + " <zext ";
+ break;
+ }
+ if (doExt)
+ Op += LD->getMemoryVT().getMVTString() + ">";
+ if (LD->isVolatile())
+ Op += "<V>";
+ Op += LD->getIndexedModeName(LD->getAddressingMode());
+ if (LD->getAlignment() > 1)
+ Op += " A=" + utostr(LD->getAlignment());
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Node)) {
+ if (ST->isTruncatingStore())
+ Op += "<trunc " + ST->getMemoryVT().getMVTString() + ">";
+ if (ST->isVolatile())
+ Op += "<V>";
+ Op += ST->getIndexedModeName(ST->getAddressingMode());
+ if (ST->getAlignment() > 1)
+ Op += " A=" + utostr(ST->getAlignment());
+ }
+
+#if 0
+ Op += " Id=" + itostr(Node->getNodeId());
+#endif
+
+ return Op;
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),
+ Title);
+#else
+ cerr << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+ viewGraph("");
+}
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+ NodeGraphAttrs.clear();
+#else
+ cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = Attrs;
+#else
+ cerr << "SelectionDAG::setGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+ std::map<const SDNode *, std::string>::const_iterator I =
+ NodeGraphAttrs.find(N);
+
+ if (I != NodeGraphAttrs.end())
+ return I->second;
+ else
+ return "";
+#else
+ cerr << "SelectionDAG::getGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+ return std::string("");
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+ cerr << "SelectionDAG::setGraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor. Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+ int level, bool &printed) {
+ bool hit_limit = false;
+
+#ifndef NDEBUG
+ if (level >= 20) {
+ if (!printed) {
+ printed = true;
+ DOUT << "setSubgraphColor hit max level\n";
+ }
+ return true;
+ }
+
+ unsigned oldSize = visited.size();
+ visited.insert(N);
+ if (visited.size() != oldSize) {
+ setGraphColor(N, Color);
+ for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+ i != iend;
+ ++i) {
+ hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+ }
+ }
+#else
+ cerr << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+ return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ DenseSet<SDNode *> visited;
+ bool printed = false;
+ if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+ // Visually mark that we hit the limit
+ if (strcmp(Color, "red") == 0) {
+ setSubgraphColorHelper(N, "blue", visited, 0, printed);
+ }
+ else if (strcmp(Color, "yellow") == 0) {
+ setSubgraphColorHelper(N, "green", visited, 0, printed);
+ }
+ }
+
+#else
+ cerr << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream O(s);
+ O << "SU(" << SU->NodeNum << "): ";
+ if (SU->getNode()) {
+ SmallVector<SDNode *, 4> FlaggedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
+ FlaggedNodes.push_back(N);
+ while (!FlaggedNodes.empty()) {
+ O << DOTGraphTraits<SelectionDAG*>::getNodeLabel(FlaggedNodes.back(), DAG);
+ FlaggedNodes.pop_back();
+ if (!FlaggedNodes.empty())
+ O << "\n ";
+ }
+ } else {
+ O << "CROSS RC COPY";
+ }
+ return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+ if (DAG) {
+ // Draw a special "GraphRoot" node to indicate the root of the graph.
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ const SDNode *N = DAG->getRoot().getNode();
+ if (N && N->getNodeId() != -1)
+ GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,
+ "color=blue,style=dashed");
+ }
+}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 0000000..3334e53
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,2592 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+namespace llvm {
+TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
+ bool isLocal = GV->hasLocalLinkage();
+ bool isDeclaration = GV->isDeclaration();
+ // FIXME: what should we do for protected and internal visibility?
+ // For variables, is internal different from hidden?
+ bool isHidden = GV->hasHiddenVisibility();
+
+ if (reloc == Reloc::PIC_) {
+ if (isLocal || isHidden)
+ return TLSModel::LocalDynamic;
+ else
+ return TLSModel::GeneralDynamic;
+ } else {
+ if (!isDeclaration || isHidden)
+ return TLSModel::LocalExec;
+ else
+ return TLSModel::InitialExec;
+ }
+}
+}
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names) {
+ Names[RTLIB::SHL_I16] = "__ashlhi3";
+ Names[RTLIB::SHL_I32] = "__ashlsi3";
+ Names[RTLIB::SHL_I64] = "__ashldi3";
+ Names[RTLIB::SHL_I128] = "__ashlti3";
+ Names[RTLIB::SRL_I16] = "__lshrhi3";
+ Names[RTLIB::SRL_I32] = "__lshrsi3";
+ Names[RTLIB::SRL_I64] = "__lshrdi3";
+ Names[RTLIB::SRL_I128] = "__lshrti3";
+ Names[RTLIB::SRA_I16] = "__ashrhi3";
+ Names[RTLIB::SRA_I32] = "__ashrsi3";
+ Names[RTLIB::SRA_I64] = "__ashrdi3";
+ Names[RTLIB::SRA_I128] = "__ashrti3";
+ Names[RTLIB::MUL_I16] = "__mulhi3";
+ Names[RTLIB::MUL_I32] = "__mulsi3";
+ Names[RTLIB::MUL_I64] = "__muldi3";
+ Names[RTLIB::MUL_I128] = "__multi3";
+ Names[RTLIB::SDIV_I16] = "__divhi3";
+ Names[RTLIB::SDIV_I32] = "__divsi3";
+ Names[RTLIB::SDIV_I64] = "__divdi3";
+ Names[RTLIB::SDIV_I128] = "__divti3";
+ Names[RTLIB::UDIV_I16] = "__udivhi3";
+ Names[RTLIB::UDIV_I32] = "__udivsi3";
+ Names[RTLIB::UDIV_I64] = "__udivdi3";
+ Names[RTLIB::UDIV_I128] = "__udivti3";
+ Names[RTLIB::SREM_I16] = "__modhi3";
+ Names[RTLIB::SREM_I32] = "__modsi3";
+ Names[RTLIB::SREM_I64] = "__moddi3";
+ Names[RTLIB::SREM_I128] = "__modti3";
+ Names[RTLIB::UREM_I16] = "__umodhi3";
+ Names[RTLIB::UREM_I32] = "__umodsi3";
+ Names[RTLIB::UREM_I64] = "__umoddi3";
+ Names[RTLIB::UREM_I128] = "__umodti3";
+ Names[RTLIB::NEG_I32] = "__negsi2";
+ Names[RTLIB::NEG_I64] = "__negdi2";
+ Names[RTLIB::ADD_F32] = "__addsf3";
+ Names[RTLIB::ADD_F64] = "__adddf3";
+ Names[RTLIB::ADD_F80] = "__addxf3";
+ Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+ Names[RTLIB::SUB_F32] = "__subsf3";
+ Names[RTLIB::SUB_F64] = "__subdf3";
+ Names[RTLIB::SUB_F80] = "__subxf3";
+ Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+ Names[RTLIB::MUL_F32] = "__mulsf3";
+ Names[RTLIB::MUL_F64] = "__muldf3";
+ Names[RTLIB::MUL_F80] = "__mulxf3";
+ Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+ Names[RTLIB::DIV_F32] = "__divsf3";
+ Names[RTLIB::DIV_F64] = "__divdf3";
+ Names[RTLIB::DIV_F80] = "__divxf3";
+ Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+ Names[RTLIB::REM_F32] = "fmodf";
+ Names[RTLIB::REM_F64] = "fmod";
+ Names[RTLIB::REM_F80] = "fmodl";
+ Names[RTLIB::REM_PPCF128] = "fmodl";
+ Names[RTLIB::POWI_F32] = "__powisf2";
+ Names[RTLIB::POWI_F64] = "__powidf2";
+ Names[RTLIB::POWI_F80] = "__powixf2";
+ Names[RTLIB::POWI_PPCF128] = "__powitf2";
+ Names[RTLIB::SQRT_F32] = "sqrtf";
+ Names[RTLIB::SQRT_F64] = "sqrt";
+ Names[RTLIB::SQRT_F80] = "sqrtl";
+ Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+ Names[RTLIB::LOG_F32] = "logf";
+ Names[RTLIB::LOG_F64] = "log";
+ Names[RTLIB::LOG_F80] = "logl";
+ Names[RTLIB::LOG_PPCF128] = "logl";
+ Names[RTLIB::LOG2_F32] = "log2f";
+ Names[RTLIB::LOG2_F64] = "log2";
+ Names[RTLIB::LOG2_F80] = "log2l";
+ Names[RTLIB::LOG2_PPCF128] = "log2l";
+ Names[RTLIB::LOG10_F32] = "log10f";
+ Names[RTLIB::LOG10_F64] = "log10";
+ Names[RTLIB::LOG10_F80] = "log10l";
+ Names[RTLIB::LOG10_PPCF128] = "log10l";
+ Names[RTLIB::EXP_F32] = "expf";
+ Names[RTLIB::EXP_F64] = "exp";
+ Names[RTLIB::EXP_F80] = "expl";
+ Names[RTLIB::EXP_PPCF128] = "expl";
+ Names[RTLIB::EXP2_F32] = "exp2f";
+ Names[RTLIB::EXP2_F64] = "exp2";
+ Names[RTLIB::EXP2_F80] = "exp2l";
+ Names[RTLIB::EXP2_PPCF128] = "exp2l";
+ Names[RTLIB::SIN_F32] = "sinf";
+ Names[RTLIB::SIN_F64] = "sin";
+ Names[RTLIB::SIN_F80] = "sinl";
+ Names[RTLIB::SIN_PPCF128] = "sinl";
+ Names[RTLIB::COS_F32] = "cosf";
+ Names[RTLIB::COS_F64] = "cos";
+ Names[RTLIB::COS_F80] = "cosl";
+ Names[RTLIB::COS_PPCF128] = "cosl";
+ Names[RTLIB::POW_F32] = "powf";
+ Names[RTLIB::POW_F64] = "pow";
+ Names[RTLIB::POW_F80] = "powl";
+ Names[RTLIB::POW_PPCF128] = "powl";
+ Names[RTLIB::CEIL_F32] = "ceilf";
+ Names[RTLIB::CEIL_F64] = "ceil";
+ Names[RTLIB::CEIL_F80] = "ceill";
+ Names[RTLIB::CEIL_PPCF128] = "ceill";
+ Names[RTLIB::TRUNC_F32] = "truncf";
+ Names[RTLIB::TRUNC_F64] = "trunc";
+ Names[RTLIB::TRUNC_F80] = "truncl";
+ Names[RTLIB::TRUNC_PPCF128] = "truncl";
+ Names[RTLIB::RINT_F32] = "rintf";
+ Names[RTLIB::RINT_F64] = "rint";
+ Names[RTLIB::RINT_F80] = "rintl";
+ Names[RTLIB::RINT_PPCF128] = "rintl";
+ Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+ Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+ Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+ Names[RTLIB::FLOOR_F32] = "floorf";
+ Names[RTLIB::FLOOR_F64] = "floor";
+ Names[RTLIB::FLOOR_F80] = "floorl";
+ Names[RTLIB::FLOOR_PPCF128] = "floorl";
+ Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+ Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+ Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+ Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+ Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+ Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+ Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+ Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+ Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+ Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+ Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+ Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+ Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+ Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+ Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+ Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+ Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+ Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+ Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+ Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+ Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+ Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+ Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+ Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+ Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+ Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+ Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+ Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+ Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+ Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+ Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+ Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+ Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+ Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+ Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+ Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+ Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+ Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+ Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+ Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+ Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+ Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+ Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+ Names[RTLIB::OEQ_F32] = "__eqsf2";
+ Names[RTLIB::OEQ_F64] = "__eqdf2";
+ Names[RTLIB::UNE_F32] = "__nesf2";
+ Names[RTLIB::UNE_F64] = "__nedf2";
+ Names[RTLIB::OGE_F32] = "__gesf2";
+ Names[RTLIB::OGE_F64] = "__gedf2";
+ Names[RTLIB::OLT_F32] = "__ltsf2";
+ Names[RTLIB::OLT_F64] = "__ltdf2";
+ Names[RTLIB::OLE_F32] = "__lesf2";
+ Names[RTLIB::OLE_F64] = "__ledf2";
+ Names[RTLIB::OGT_F32] = "__gtsf2";
+ Names[RTLIB::OGT_F64] = "__gtdf2";
+ Names[RTLIB::UO_F32] = "__unordsf2";
+ Names[RTLIB::UO_F64] = "__unorddf2";
+ Names[RTLIB::O_F32] = "__unordsf2";
+ Names[RTLIB::O_F64] = "__unorddf2";
+ Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::f64)
+ return FPEXT_F32_F64;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) {
+ if (RetVT == MVT::f32) {
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F32;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F32;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F32;
+ } else if (RetVT == MVT::f64) {
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F64;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F64;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F80_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F80_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I32_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I32_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I32_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I64_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I64_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I64_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I128_F32;
+ else if (RetVT == MVT::f64)
+ return SINTTOFP_I128_F64;
+ else if (RetVT == MVT::f80)
+ return SINTTOFP_I128_F80;
+ else if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(MVT OpVT, MVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I32_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I32_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I32_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I64_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I64_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I64_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I128_F32;
+ else if (RetVT == MVT::f64)
+ return UINTTOFP_I128_F64;
+ else if (RetVT == MVT::f80)
+ return UINTTOFP_I128_F80;
+ else if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+ memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+ CCs[RTLIB::UNE_F32] = ISD::SETNE;
+ CCs[RTLIB::UNE_F64] = ISD::SETNE;
+ CCs[RTLIB::OGE_F32] = ISD::SETGE;
+ CCs[RTLIB::OGE_F64] = ISD::SETGE;
+ CCs[RTLIB::OLT_F32] = ISD::SETLT;
+ CCs[RTLIB::OLT_F64] = ISD::SETLT;
+ CCs[RTLIB::OLE_F32] = ISD::SETLE;
+ CCs[RTLIB::OLE_F64] = ISD::SETLE;
+ CCs[RTLIB::OGT_F32] = ISD::SETGT;
+ CCs[RTLIB::OGT_F64] = ISD::SETGT;
+ CCs[RTLIB::UO_F32] = ISD::SETNE;
+ CCs[RTLIB::UO_F64] = ISD::SETNE;
+ CCs[RTLIB::O_F32] = ISD::SETEQ;
+ CCs[RTLIB::O_F64] = ISD::SETEQ;
+}
+
+TargetLowering::TargetLowering(TargetMachine &tm)
+ : TM(tm), TD(TM.getTargetData()) {
+ // All operations default to being supported.
+ memset(OpActions, 0, sizeof(OpActions));
+ memset(LoadExtActions, 0, sizeof(LoadExtActions));
+ memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+ memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+ memset(ConvertActions, 0, sizeof(ConvertActions));
+ memset(CondCodeActions, 0, sizeof(CondCodeActions));
+
+ // Set default actions for various operations.
+ for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+ // Default all indexed load / store to expand.
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
+ setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // These operations default to expand.
+ setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // Most targets ignore the @llvm.prefetch intrinsic.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+ // ConstantFP nodes default to expand. Targets can either change this to
+ // Legal, in which case all fp constants are legal, or use addLegalFPImmediate
+ // to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+
+ // These library functions default to expand.
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10,MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10,MVT::f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+
+ // Default ISD::TRAP to expand (which turns it into abort).
+ setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+ IsLittleEndian = TD->isLittleEndian();
+ UsesGlobalOffsetTable = false;
+ ShiftAmountTy = PointerTy = getValueType(TD->getIntPtrType());
+ ShiftAmtHandling = Undefined;
+ memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+ memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+ maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+ allowUnalignedMemoryAccesses = false;
+ benefitFromCodePlacementOpt = false;
+ UseUnderscoreSetJmp = false;
+ UseUnderscoreLongJmp = false;
+ SelectIsExpensive = false;
+ IntDivIsCheap = false;
+ Pow2DivIsCheap = false;
+ StackPointerRegisterToSaveRestore = 0;
+ ExceptionPointerRegister = 0;
+ ExceptionSelectorRegister = 0;
+ BooleanContents = UndefinedBooleanContent;
+ SchedPreferenceInfo = SchedulingForLatency;
+ JumpBufSize = 0;
+ JumpBufAlignment = 0;
+ IfCvtBlockSizeLimit = 2;
+ IfCvtDupBlockSizeLimit = 0;
+ PrefLoopAlignment = 0;
+
+ InitLibcallNames(LibcallRoutineNames);
+ InitCmpLibcallCCs(CmpLibcallCCs);
+
+ // Tell Legalize whether the assembler supports DEBUG_LOC.
+ const TargetAsmInfo *TASM = TM.getTargetAsmInfo();
+ if (!TASM || !TASM->hasDotLocAndDotFile())
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+}
+
+TargetLowering::~TargetLowering() {}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLowering::computeRegisterProperties() {
+ assert(MVT::LAST_VALUETYPE <= 32 &&
+ "Too many value types for ValueTypeActions to hold!");
+
+ // Everything defaults to needing one register.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ NumRegistersForVT[i] = 1;
+ RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+ }
+ // ...except isVoid, which doesn't need any registers.
+ NumRegistersForVT[MVT::isVoid] = 0;
+
+ // Find the largest integer register class.
+ unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+ for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+ assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+ // Every integer value type larger than this largest register takes twice as
+ // many registers to represent as the previous ValueType.
+ for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
+ MVT EVT = (MVT::SimpleValueType)ExpandedReg;
+ if (!EVT.isInteger())
+ break;
+ NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+ RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+ TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+ ValueTypeActions.setTypeAction(EVT, Expand);
+ }
+
+ // Inspect all of the ValueType's smaller than the largest integer
+ // register to see which ones need promotion.
+ unsigned LegalIntReg = LargestIntReg;
+ for (unsigned IntReg = LargestIntReg - 1;
+ IntReg >= (unsigned)MVT::i1; --IntReg) {
+ MVT IVT = (MVT::SimpleValueType)IntReg;
+ if (isTypeLegal(IVT)) {
+ LegalIntReg = IntReg;
+ } else {
+ RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+ (MVT::SimpleValueType)LegalIntReg;
+ ValueTypeActions.setTypeAction(IVT, Promote);
+ }
+ }
+
+ // ppcf128 type is really two f64's.
+ if (!isTypeLegal(MVT::ppcf128)) {
+ NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+ TransformToType[MVT::ppcf128] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
+ }
+
+ // Decide how to handle f64. If the target does not have native f64 support,
+ // expand it to i64 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+ RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+ TransformToType[MVT::f64] = MVT::i64;
+ ValueTypeActions.setTypeAction(MVT::f64, Expand);
+ }
+
+ // Decide how to handle f32. If the target does not have native support for
+ // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+ if (!isTypeLegal(MVT::f32)) {
+ if (isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+ TransformToType[MVT::f32] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::f32, Promote);
+ } else {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f32] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f32, Expand);
+ }
+ }
+
+ // Loop over all of the vector value types to see which need transformations.
+ for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ if (!isTypeLegal(VT)) {
+ MVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] =
+ getVectorTypeBreakdown(VT,
+ IntermediateVT, NumIntermediates,
+ RegisterVT);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ // Determine if there is a legal wider type.
+ bool IsLegalWiderType = false;
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType)nVT;
+ if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts) {
+ TransformToType[i] = SVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (!IsLegalWiderType) {
+ MVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ ValueTypeActions.setTypeAction(VT, Expand);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
+ }
+ }
+ }
+ }
+}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ return NULL;
+}
+
+
+MVT TargetLowering::getSetCCResultType(MVT VT) const {
+ return getValueType(TD->getIntPtrType());
+}
+
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register. It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLowering::getVectorTypeBreakdown(MVT VT,
+ MVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const {
+ // Figure out the right, legal destination reg to copy into.
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ MVT DestVT = getRegisterType(NewVT);
+ RegisterVT = DestVT;
+ if (DestVT.bitsLT(NewVT)) {
+ // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+ } else {
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+ }
+
+ return 1;
+}
+
+/// getWidenVectorType: given a vector type, returns the type to widen to
+/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+/// If there is no vector type that we want to widen to, returns MVT::Other
+/// When and where to widen is target dependent based on the cost of
+/// scalarizing vs using the wider vector type.
+MVT TargetLowering::getWidenVectorType(MVT VT) const {
+ assert(VT.isVector());
+ if (isTypeLegal(VT))
+ return VT;
+
+ // Default is not to widen until moved to LegalizeTypes
+ return MVT::Other;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ return TD->getCallFrameTypeAlignment(Ty);
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ if (usesGlobalOffsetTable())
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+ return Table;
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // Assume that everything is safe in static mode.
+ if (getTargetMachine().getRelocationModel() == Reloc::Static)
+ return true;
+
+ // In dynamic-no-pic mode, assume that known defined values are safe.
+ if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
+ GA &&
+ !GA->getGlobal()->isDeclaration() &&
+ !GA->getGlobal()->isWeakForLinker())
+ return true;
+
+ // Otherwise assume nothing is safe.
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer. If so, check to see if there
+/// are any bits set in the constant that are not demanded. If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+ const APInt &Demanded) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // FIXME: ISD::SELECT, ISD::SELECT_CC
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::XOR:
+ case ISD::AND:
+ case ISD::OR: {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C) return false;
+
+ if (Op.getOpcode() == ISD::XOR &&
+ (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+ return false;
+
+ // if we can expand it to have all bits set, do it
+ if (C->getAPIntValue().intersects(~Demanded)) {
+ MVT VT = Op.getValueType();
+ SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
+ DAG.getConstant(Demanded &
+ C->getAPIntValue(),
+ VT));
+ return CombineTo(Op, New);
+ }
+
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
+/// cast, but it could be generalized for targets with other types of
+/// implicit widening casts.
+bool
+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+ unsigned BitWidth,
+ const APInt &Demanded,
+ DebugLoc dl) {
+ assert(Op.getNumOperands() == 2 &&
+ "ShrinkDemandedOp only supports binary operators!");
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "ShrinkDemandedOp only supports nodes with one result!");
+
+ // Don't do this if the node has another user, which may require the
+ // full value.
+ if (!Op.getNode()->hasOneUse())
+ return false;
+
+ // Search for the smallest integer type with free casts to and from
+ // Op's type. For expedience, just check power-of-2 integer types.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
+ if (!isPowerOf2_32(SmallVTBits))
+ SmallVTBits = NextPowerOf2(SmallVTBits);
+ for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ MVT SmallVT = MVT::getIntegerVT(SmallVTBits);
+ if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+ TLI.isZExtFree(SmallVT, Op.getValueType())) {
+ // We found a type with free casts.
+ SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(0)),
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(1)));
+ SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
+ return CombineTo(Op, Z);
+ }
+ }
+ return false;
+}
+
+/// SimplifyDemandedBits - Look at Op. At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream. If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller). The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDValue Op,
+ const APInt &DemandedMask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const {
+ unsigned BitWidth = DemandedMask.getBitWidth();
+ assert(Op.getValueSizeInBits() == BitWidth &&
+ "Mask size mismatches value type size!");
+ APInt NewMask = DemandedMask;
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Don't know anything.
+ KnownZero = KnownOne = APInt(BitWidth, 0);
+
+ // Other users may use these bits.
+ if (!Op.getNode()->hasOneUse()) {
+ if (Depth != 0) {
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // simplify things downstream.
+ TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+ return false;
+ }
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the NewMask to all bits.
+ NewMask = APInt::getAllOnesValue(BitWidth);
+ } else if (DemandedMask == 0) {
+ // Not demanding any bits from Op.
+ if (Op.getOpcode() != ISD::UNDEF)
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+ return false;
+ } else if (Depth == 6) { // Limit search depth.
+ return false;
+ }
+
+ APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
+ KnownZero = ~KnownOne & NewMask;
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::AND:
+ // If the RHS is a constant, check to see if the LHS would be zero without
+ // using the bits from the RHS. Below, we use knowledge about the RHS to
+ // simplify the LHS, here we're using information from the LHS to simplify
+ // the RHS.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt LHSZero, LHSOne;
+ TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
+ LHSZero, LHSOne, Depth+1);
+ // If the LHS already has zeros where RHSC does, this and is dead.
+ if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ // If any of the set bits in the RHS are known zero on the LHS, shrink
+ // the constant.
+ if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+ return true;
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ break;
+ case ISD::OR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ break;
+ case ISD::XOR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((KnownZero & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((KnownZero2 & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // If all of the unknown bits are known to be zero on one side or the other
+ // (but not both) turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1)));
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known
+ if ((KnownOne & KnownOne2) == KnownOne) {
+ MVT VT = Op.getValueType();
+ SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+ Op.getOperand(0), ANDC));
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // for XOR, we prefer to force bits to 1 if they will make a -1.
+ // if we can't force bits, try to shrink constant
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt Expanded = C->getAPIntValue() | (~NewMask);
+ // if we can expand it to have all bits set, do it
+ if (Expanded.isAllOnesValue()) {
+ if (Expanded != C->getAPIntValue()) {
+ MVT VT = Op.getValueType();
+ SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
+ TLO.DAG.getConstant(Expanded, VT));
+ return TLO.CombineTo(Op, New);
+ }
+ // if it already has all the bits set, nothing to change
+ // but don't shrink either!
+ } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+ return true;
+ }
+ }
+
+ KnownZero = KnownZeroOut;
+ KnownOne = KnownOneOut;
+ break;
+ case ISD::SELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SELECT_CC:
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SHL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (InOp.getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ MVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero <<= SA->getZExtValue();
+ KnownOne <<= SA->getZExtValue();
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+ }
+ break;
+ case ISD::SRL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ MVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+ unsigned VTSize = VT.getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted out)
+ // are never demanded.
+ if (InOp.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ // Compute the new bits that are at the top now.
+ if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ break;
+ case ISD::SRA:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (DemandedMask == 1)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1)));
+
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ MVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ APInt InDemandedMask = (NewMask << ShAmt);
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ if (HighBits.intersects(NewMask))
+ InDemandedMask |= APInt::getSignBit(VT.getSizeInBits());
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bit, adjusted to where it is now in the mask.
+ APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ Op.getOperand(0),
+ Op.getOperand(1)));
+ } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ }
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits = APInt::getHighBitsSet(BitWidth,
+ BitWidth - EVT.getSizeInBits()) &
+ NewMask;
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+
+ APInt InSignBit = APInt::getSignBit(EVT.getSizeInBits());
+ InSignBit.zext(BitWidth);
+ APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth,
+ EVT.getSizeInBits()) &
+ NewMask;
+
+ // Since the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InputDemandedBits |= InSignBit;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, convert this into a zero extension.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op,
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+
+ if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::ZERO_EXTEND: {
+ unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits();
+ APInt InMask = NewMask;
+ InMask.trunc(OperandBitWidth);
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
+ if (!NewBits.intersects(NewMask))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ break;
+ }
+ case ISD::SIGN_EXTEND: {
+ MVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getSizeInBits();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
+ APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+ APInt NewBits = ~InMask & NewMask;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // Since some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ APInt InDemandedBits = InMask & NewMask;
+ InDemandedBits |= InSignBit;
+ InDemandedBits.trunc(InBits);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero, convert this to a zero extend.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // If the sign bit is known one, the top bits match.
+ if (KnownOne.intersects(InSignBit)) {
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Otherwise, top bits aren't known.
+ KnownOne &= ~NewBits;
+ KnownZero &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits();
+ APInt InMask = NewMask;
+ InMask.trunc(OperandBitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero.zext(BitWidth);
+ KnownOne.zext(BitWidth);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ // Simplify the input, using demanded bit information, and compute the known
+ // zero/one bits live out.
+ APInt TruncMask = NewMask;
+ TruncMask.zext(Op.getOperand(0).getValueSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero.trunc(BitWidth);
+ KnownOne.trunc(BitWidth);
+
+ // If the input is only used by this truncate, see if we can shrink it based
+ // on the known demanded bits.
+ if (Op.getOperand(0).getNode()->hasOneUse()) {
+ SDValue In = Op.getOperand(0);
+ unsigned InBitWidth = In.getValueSizeInBits();
+ switch (In.getOpcode()) {
+ default: break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
+ APInt HighBits = APInt::getHighBitsSet(InBitWidth,
+ InBitWidth - BitWidth);
+ HighBits = HighBits.lshr(ShAmt->getZExtValue());
+ HighBits.trunc(BitWidth);
+
+ if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+ Op.getValueType(),
+ In.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+ Op.getValueType(),
+ NewTrunc,
+ In.getOperand(1)));
+ }
+ }
+ break;
+ }
+ }
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ break;
+ }
+ case ISD::AssertZext: {
+ MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero |= ~InMask & NewMask;
+ break;
+ }
+ case ISD::BIT_CONVERT:
+#if 0
+ // If this is an FP->Int bitcast and if the sign bit is the only thing that
+ // is demanded, turn this into a FGETSIGN.
+ if (NewMask == MVT::getIntegerVTSignBit(Op.getValueType()) &&
+ MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
+ !MVT::isVector(Op.getOperand(0).getValueType())) {
+ // Only do this xform if FGETSIGN is valid or if before legalize.
+ if (!TLO.AfterLegalize ||
+ isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
+ // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+ // place. We expect the SHL to be eliminated by other optimizations.
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
+ Op.getOperand(0));
+ unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(),
+ Sign, ShAmt));
+ }
+ }
+#endif
+ break;
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::SUB: {
+ // Add, Sub, and Mul don't demand any bits in positions beyond that
+ // of the highest bit demanded of them.
+ APInt LoMask = APInt::getLowBitsSet(BitWidth,
+ BitWidth - NewMask.countLeadingZeros());
+ if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ // See if the operation should be performed at a smaller bit width.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+ }
+ // FALL THROUGH
+ default:
+ // Just use ComputeMaskedBits to compute output bits.
+ TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+ break;
+ }
+
+ // If we know the value of all of the demanded bits, return this as a
+ // constant.
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+
+ return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use ComputeNumSignBits if you don't know whether Op"
+ " is a target node!");
+ return 1;
+}
+
+/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
+/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
+/// determine which bit is set.
+///
+static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
+ // A left-shift of a constant one will have exactly one bit set, because
+ // shifting the bit off the end is undefined.
+ if (Val.getOpcode() == ISD::SHL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue() == 1)
+ return true;
+
+ // Similarly, a right-shift of a constant sign-bit will have exactly
+ // one bit set.
+ if (Val.getOpcode() == ISD::SRL)
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+ if (C->getAPIntValue().isSignBit())
+ return true;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to ComputeMaskedBits to catch other known cases.
+ MVT OpVT = Val.getValueType();
+ unsigned BitWidth = OpVT.getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+ return (KnownZero.countPopulation() == BitWidth - 1) &&
+ (KnownOne.countPopulation() == 1);
+}
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// and cc. If it is unable to simplify it, return a null SDValue.
+SDValue
+TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, bool foldBooleans,
+ DAGCombinerInfo &DCI, DebugLoc dl) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return DAG.getConstant(1, VT);
+ }
+
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+ if (isa<ConstantSDNode>(N0.getNode())) {
+ return DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+ } else {
+ // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+ // equality comparison, then we're just comparing whether X itself is
+ // zero.
+ if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+ N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+ Zero, Cond);
+ }
+ }
+
+ // If the LHS is '(and load, const)', the RHS is 0,
+ // the test is for equality or unsigned, and all 1 bits of the const are
+ // in the same partial word, see if we can shorten the load.
+ if (DCI.isBeforeLegalize() &&
+ N0.getOpcode() == ISD::AND && C1 == 0 &&
+ N0.getNode()->hasOneUse() &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(0).getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+ uint64_t bestMask = 0;
+ unsigned bestWidth = 0, bestOffset = 0;
+ if (!Lod->isVolatile() && Lod->isUnindexed() &&
+ // FIXME: This uses getZExtValue() below so it only works on i64 and
+ // below.
+ N0.getValueType().getSizeInBits() <= 64) {
+ unsigned origWidth = N0.getValueType().getSizeInBits();
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // 8 bits, but have to be careful...
+ if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+ origWidth = Lod->getMemoryVT().getSizeInBits();
+ uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+ uint64_t newMask = (1ULL << width) - 1;
+ for (unsigned offset=0; offset<origWidth/width; offset++) {
+ if ((newMask & Mask) == Mask) {
+ if (!TD->isLittleEndian())
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
+ else
+ bestOffset = (uint64_t)offset * (width/8);
+ bestMask = Mask >> (offset * (width/8) * 8);
+ bestWidth = width;
+ break;
+ }
+ newMask = newMask << width;
+ }
+ }
+ }
+ if (bestWidth) {
+ MVT newVT = MVT::getIntegerVT(bestWidth);
+ if (newVT.isRound()) {
+ MVT PtrType = Lod->getOperand(1).getValueType();
+ SDValue Ptr = Lod->getBasePtr();
+ if (bestOffset != 0)
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+ DAG.getConstant(bestOffset, PtrType));
+ unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+ SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+ Lod->getSrcValue(),
+ Lod->getSrcValueOffset() + bestOffset,
+ false, NewAlign);
+ return DAG.getSetCC(dl, VT,
+ DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+ DAG.getConstant(bestMask, newVT)),
+ DAG.getConstant(0LL, newVT), Cond);
+ }
+ }
+ }
+
+ // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+
+ // If the comparison constant has bits in the upper part, the
+ // zero-extended value could never match.
+ if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+ C1.getBitWidth() - InSize))) {
+ switch (Cond) {
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETEQ: return DAG.getConstant(0, VT);
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETNE: return DAG.getConstant(1, VT);
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // True if the sign bit of C1 is set.
+ return DAG.getConstant(C1.isNegative(), VT);
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // True if the sign bit of C1 isn't set.
+ return DAG.getConstant(C1.isNonNegative(), VT);
+ default:
+ break;
+ }
+ }
+
+ // Otherwise, we can perform the comparison with the low bits.
+ switch (Cond) {
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(APInt(C1).trunc(InSize),
+ N0.getOperand(0).getValueType()),
+ Cond);
+ default:
+ break; // todo, be more careful with signed comparisons
+ }
+ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ MVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+ MVT ExtDstTy = N0.getValueType();
+ unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+ // If the extended part has any inconsistent bits, it cannot ever
+ // compare equal. In other words, they have to be all ones or all
+ // zeros.
+ APInt ExtBits =
+ APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);
+ if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
+ return DAG.getConstant(Cond == ISD::SETNE, VT);
+
+ SDValue ZextOp;
+ MVT Op0Ty = N0.getOperand(0).getValueType();
+ if (Op0Ty == ExtSrcTy) {
+ ZextOp = N0.getOperand(0);
+ } else {
+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+ ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+ DAG.getConstant(Imm, Op0Ty));
+ }
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(ZextOp.getNode());
+ // Otherwise, make this a use of a zext.
+ return DAG.getSetCC(dl, VT, ZextOp,
+ DAG.getConstant(C1 & APInt::getLowBitsSet(
+ ExtDstTyBits,
+ ExtSrcTyBits),
+ ExtDstTy),
+ Cond);
+ } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+
+ // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
+ if (N0.getOpcode() == ISD::SETCC) {
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1);
+ if (TrueWhenTrue)
+ return N0;
+
+ // Invert the condition.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ CC = ISD::getSetCCInverse(CC,
+ N0.getOperand(0).getValueType().isInteger());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ }
+
+ if ((N0.getOpcode() == ISD::XOR ||
+ (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR &&
+ N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
+ // can only do this if the top bits are known zero.
+ unsigned BitWidth = N0.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N0,
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth-1))) {
+ // Okay, get the un-inverted input value.
+ SDValue Val;
+ if (N0.getOpcode() == ISD::XOR)
+ Val = N0.getOperand(0);
+ else {
+ assert(N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR);
+ // ((X^1)&1)^1 -> X & 1
+ Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+ N0.getOperand(0).getOperand(0),
+ N0.getOperand(1));
+ }
+ return DAG.getSetCC(dl, VT, Val, N1,
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ }
+ }
+
+ APInt MinVal, MaxVal;
+ unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+ if (ISD::isSignedIntSetCC(Cond)) {
+ MinVal = APInt::getSignedMinValue(OperandBitSize);
+ MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+ } else {
+ MinVal = APInt::getMinValue(OperandBitSize);
+ MaxVal = APInt::getMaxValue(OperandBitSize);
+ }
+
+ // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+ if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
+ // X >= C0 --> X > (C0-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1-1, N1.getValueType()),
+ (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+ }
+
+ if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+ if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
+ // X <= C0 --> X < (C0+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C1+1, N1.getValueType()),
+ (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+ }
+
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+ return DAG.getConstant(0, VT); // X < MIN --> false
+ if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+ return DAG.getConstant(1, VT); // X >= MIN --> true
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+ return DAG.getConstant(0, VT); // X > MAX --> false
+ if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+ return DAG.getConstant(1, VT); // X <= MAX --> true
+
+ // Canonicalize setgt X, Min --> setne X, Min
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+ // Canonicalize setlt X, Max --> setne X, Max
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, N0.getValueType()),
+ ISD::SETEQ);
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MaxVal, N0.getValueType()),
+ ISD::SETEQ);
+
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ if (Cond == ISD::SETUGT &&
+ C1 == APInt::getSignedMaxValue(OperandBitSize))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(0, N1.getValueType()),
+ ISD::SETLT);
+
+ // SETULT X, SINTMIN -> SETGT X, -1
+ if (Cond == ISD::SETULT &&
+ C1 == APInt::getSignedMinValue(OperandBitSize)) {
+ SDValue ConstMinusOne =
+ DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+ N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ }
+
+ // Fold bit comparisons when we can.
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ MVT ShiftTy = DCI.isBeforeLegalize() ?
+ getPointerTy() : getShiftAmountTy();
+ if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
+ // Perform the xform if the AND RHS is a single bit.
+ if (isPowerOf2_64(AndRHS->getZExtValue())) {
+ return DAG.getNode(ISD::SRL, dl, VT, N0,
+ DAG.getConstant(Log2_64(AndRHS->getZExtValue()),
+ ShiftTy));
+ }
+ } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {
+ // (X & 8) == 8 --> (X & 8) >> 3
+ // Perform the xform if C1 is a single bit.
+ if (C1.isPowerOf2()) {
+ return DAG.getNode(ISD::SRL, dl, VT, N0,
+ DAG.getConstant(C1.logBase2(), ShiftTy));
+ }
+ }
+ }
+ }
+ } else if (isa<ConstantSDNode>(N0.getNode())) {
+ // Ensure that the constant occurs on the RHS.
+ return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+ }
+
+ if (isa<ConstantFPSDNode>(N0.getNode())) {
+ // Constant fold or commute setcc.
+ SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+ if (O.getNode()) return O;
+ } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ // If the RHS of an FP comparison is a constant, simplify it away in
+ // some cases.
+ if (CFP->getValueAPF().isNaN()) {
+ // If an operand is known to be a nan, we can fold it.
+ switch (ISD::getUnorderedFlavor(Cond)) {
+ default: assert(0 && "Unknown flavor!");
+ case 0: // Known false.
+ return DAG.getConstant(0, VT);
+ case 1: // Known true.
+ return DAG.getConstant(1, VT);
+ case 2: // Undefined.
+ return DAG.getUNDEF(VT);
+ }
+ }
+
+ // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
+ // constant if knowing that the operand is non-nan is enough. We prefer to
+ // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+ // materialize 0.0.
+ if (Cond == ISD::SETO || Cond == ISD::SETUO)
+ return DAG.getSetCC(dl, VT, N0, N0, Cond);
+ }
+
+ if (N0 == N1) {
+ // We can always fold X == X for integer setcc's.
+ if (N0.getValueType().isInteger())
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ unsigned UOF = ISD::getUnorderedFlavor(Cond);
+ if (UOF == 2) // FP operators that are undefined on NaNs.
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+ return DAG.getConstant(UOF, VT);
+ // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
+ // if it is not already.
+ ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+ if (NewCond != Cond)
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+ }
+
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getValueType().isInteger()) {
+ if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+ N0.getOpcode() == ISD::XOR) {
+ // Simplify (X+Y) == (X+Z) --> Y == Z
+ if (N0.getOpcode() == N1.getOpcode()) {
+ if (N0.getOperand(0) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+ if (N0.getOperand(1) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+ // If X op Y == Y op X, try other combinations.
+ if (N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ Cond);
+ if (N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ Cond);
+ }
+ }
+
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ // Turn (X+C1) == C2 --> X == C2-C1
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getAPIntValue()-
+ LHSR->getAPIntValue(),
+ N0.getValueType()), Cond);
+ }
+
+ // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+ if (N0.getOpcode() == ISD::XOR)
+ // If we know that all of the inverted bits are zero, don't bother
+ // performing the inversion.
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getAPIntValue() ^
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+
+ // Turn (C1-X) == C2 --> X == C1-C2
+ if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getAPIntValue() -
+ RHSC->getAPIntValue(),
+ N0.getValueType()),
+ Cond);
+ }
+ }
+ }
+
+ // Simplify (X+Z) == X --> Z == 0
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ else if (N0.getNode()->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // (Z-X) == X --> Z == X<<1
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
+ N1,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ }
+ }
+ }
+
+ if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+ N1.getOpcode() == ISD::XOR) {
+ // Simplify X == (X+Z) --> Z == 0
+ if (N1.getOperand(0) == N0) {
+ return DAG.getSetCC(dl, VT, N1.getOperand(1),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ } else if (N1.getOperand(1) == N0) {
+ if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+ return DAG.getSetCC(dl, VT, N1.getOperand(0),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ } else if (N1.getNode()->hasOneUse()) {
+ assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // X == (Z-X) --> X<<1 == Z
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
+ }
+ }
+ }
+
+ // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
+ // Note that where y is variable and is known to have at most
+ // one bit set (for example, if it is z&1) we cannot do this;
+ // the expressions are not equivalent when y==0.
+ if (N0.getOpcode() == ISD::AND)
+ if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
+ if (ValueHasExactlyOneBitSet(N1, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+ }
+ }
+ if (N1.getOpcode() == ISD::AND)
+ if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
+ if (ValueHasExactlyOneBitSet(N0, DAG)) {
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+ }
+ }
+ }
+
+ // Fold away ALL boolean setcc's.
+ SDValue Temp;
+ if (N0.getValueType() == MVT::i1 && foldBooleans) {
+ switch (Cond) {
+ default: assert(0 && "Unknown integer setcc!");
+ case ISD::SETEQ: // X == Y -> ~(X^Y)
+ Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ N0 = DAG.getNOT(dl, Temp, MVT::i1);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETNE: // X != Y --> (X^Y)
+ N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ break;
+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+ break;
+ }
+ if (VT != MVT::i1) {
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(N0.getNode());
+ // FIXME: If running after legalize, we probably can't do this.
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+ }
+ return N0;
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
+ int64_t &Offset) const {
+ if (isa<GlobalAddressSDNode>(N)) {
+ GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+ GA = GASD->getGlobal();
+ Offset += GASD->getOffset();
+ return true;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue N1 = N->getOperand(0);
+ SDValue N2 = N->getOperand(1);
+ if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
+ if (V) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is
+/// loading 'Bytes' bytes from a location that is 'Dist' units away from the
+/// location that the 'Base' load is loading from.
+bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
+ unsigned Bytes, int Dist,
+ const MachineFrameInfo *MFI) const {
+ if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode())
+ return false;
+ MVT VT = LD->getValueType(0);
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LD->getOperand(1);
+ SDValue BaseLoc = Base->getOperand(1);
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+
+ GlobalValue *GV1 = NULL;
+ GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+
+SDValue TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ // Default implementation: no optimization.
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+ // FIXME: lots more standard ones to handle.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
+ return C_Memory;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': // Relocatable Constant
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ return C_Other;
+ }
+ }
+
+ if (Constraint.size() > 1 && Constraint[0] == '{' &&
+ Constraint[Constraint.size()-1] == '}')
+ return C_Register;
+ return C_Unknown;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *TargetLowering::LowerXConstraint(MVT ConstraintVT) const{
+ if (ConstraintVT.isInteger())
+ return "r";
+ if (ConstraintVT.isFloatingPoint())
+ return "f"; // works for many targets
+ return 0;
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ switch (ConstraintLetter) {
+ default: break;
+ case 'X': // Allows any operand; labels (basic block) use this.
+ if (Op.getOpcode() == ISD::BasicBlock) {
+ Ops.push_back(Op);
+ return;
+ }
+ // fall through
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': { // Relocatable Constant
+ // These operands are interested in values of the form (GV+C), where C may
+ // be folded in as an offset of GV, or it may be explicitly added. Also, it
+ // is possible and fine if either GV or C are missing.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+
+ // If we have "(add GV, C)", pull out GV/C
+ if (Op.getOpcode() == ISD::ADD) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+ if (C == 0 || GA == 0) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+ }
+ if (C == 0 || GA == 0)
+ C = 0, GA = 0;
+ }
+
+ // If we find a valid operand, map to the TargetXXX version so that the
+ // value itself doesn't get selected.
+ if (GA) { // Either &GV or &GV+C
+ if (ConstraintLetter != 'n') {
+ int64_t Offs = GA->getOffset();
+ if (C) Offs += C->getZExtValue();
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ Op.getValueType(), Offs));
+ return;
+ }
+ }
+ if (C) { // just C, no GV.
+ // Simple constants are not allowed for 's'.
+ if (ConstraintLetter != 's') {
+ // gcc prints these as sign extended. Sign extend value to 64 bits
+ // now; without this it would get ZExt'd later in
+ // ScheduleDAGSDNodes::EmitNode, which is very generic.
+ Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+ MVT::i64));
+ return;
+ }
+ }
+ break;
+ }
+ }
+}
+
+std::vector<unsigned> TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ return std::vector<unsigned>();
+}
+
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint[0] != '{')
+ return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+ assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+ // Remove the braces from around the name.
+ std::string RegName(Constraint.begin()+1, Constraint.end()-1);
+
+ // Figure out which register class contains this reg.
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+ E = RI->regclass_end(); RCI != E; ++RCI) {
+ const TargetRegisterClass *RC = *RCI;
+
+ // If none of the the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ bool isLegal = false;
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I)) {
+ isLegal = true;
+ break;
+ }
+ }
+
+ if (!isLegal) continue;
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ if (StringsEqualNoCase(RegName, RI->get(*I).AsmName))
+ return std::make_pair(*I, RC);
+ }
+ }
+
+ return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// isMatchingInputConstraint - Return true of this is an input operand that is
+/// a matching constraint like "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return isdigit(ConstraintCode[0]);
+}
+
+/// getMatchedOperand - If this is an input matching constraint, this method
+/// returns the output operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return atoi(ConstraintCode.c_str());
+}
+
+
+/// getConstraintGenerality - Return an integer indicating how general CT
+/// is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ default: assert(0 && "Unknown constraint type!");
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ return 3;
+ }
+}
+
+/// ChooseConstraint - If there are multiple different constraints that we
+/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+/// Other -> immediates and magic values
+/// Register -> one specific register
+/// RegisterClass -> a group of regs
+/// Memory -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it. The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory. Because of
+/// this the heuristic we use is:
+///
+/// 1) If there is an 'other' constraint, and if the operand is valid for
+/// that constraint, use it. This makes us take advantage of 'i'
+/// constraints when available.
+/// 2) Otherwise, pick the most general constraint present. This prefers
+/// 'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+ bool hasMemory, const TargetLowering &TLI,
+ SDValue Op, SelectionDAG *DAG) {
+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+ unsigned BestIdx = 0;
+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+ int BestGenerality = -1;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+ TargetLowering::ConstraintType CType =
+ TLI.getConstraintType(OpInfo.Codes[i]);
+
+ // If this is an 'other' constraint, see if the operand is valid for it.
+ // For example, on X86 we might have an 'rI' constraint. If the operand
+ // is an integer in the range [0..31] we want to use I (saving a load
+ // of a register), otherwise we must use 'r'.
+ if (CType == TargetLowering::C_Other && Op.getNode()) {
+ assert(OpInfo.Codes[i].size() == 1 &&
+ "Unhandled multi-letter 'other' constraint");
+ std::vector<SDValue> ResultOps;
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory,
+ ResultOps, *DAG);
+ if (!ResultOps.empty()) {
+ BestType = CType;
+ BestIdx = i;
+ break;
+ }
+ }
+
+ // This constraint letter is more general than the previous one, use it.
+ int Generality = getConstraintGenerality(CType);
+ if (Generality > BestGenerality) {
+ BestType = CType;
+ BestIdx = i;
+ BestGenerality = Generality;
+ }
+ }
+
+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+ OpInfo.ConstraintType = BestType;
+}
+
+/// ComputeConstraintToUse - Determines the constraint code and constraint
+/// type to use for the specific AsmOperandInfo, setting
+/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+ SDValue Op,
+ bool hasMemory,
+ SelectionDAG *DAG) const {
+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+ // Single-letter constraints ('r') are very common.
+ if (OpInfo.Codes.size() == 1) {
+ OpInfo.ConstraintCode = OpInfo.Codes[0];
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ } else {
+ ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG);
+ }
+
+ // 'X' matches anything.
+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+ // Labels and constants are handled elsewhere ('X' is the only thing
+ // that matches labels).
+ if (isa<BasicBlock>(OpInfo.CallOperandVal) ||
+ isa<ConstantInt>(OpInfo.CallOperandVal))
+ return;
+
+ // Otherwise, try to resolve it to something we know about by looking at
+ // the actual operand type.
+ if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+ OpInfo.ConstraintCode = Repl;
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // The default implementation of this implements a conservative RISCy, r+r and
+ // r+i addr mode.
+
+ // Allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ }
+
+ return true;
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+ std::vector<SDNode*>* Created) const {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl= N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ APInt::ms magics = d.magic();
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ SDValue Q;
+ if (isOperationLegalOrCustom(ISD::MULHS, VT))
+ Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
+ N->getOperand(0),
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhs or equvialent
+ // If d > 0 and m < 0, add the numerator
+ if (d.isStrictlyPositive() && magics.m.isNegative()) {
+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // If d < 0 and m > 0, subtract the numerator.
+ if (d.isNegative() && magics.m.isStrictlyPositive()) {
+ Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Shift right algebraic if shift value is nonzero
+ if (magics.s > 0) {
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy()));
+ if (Created)
+ Created->push_back(Q.getNode());
+ }
+ // Extract the sign bit and add it to the quotient
+ SDValue T =
+ DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
+ getShiftAmountTy()));
+ if (Created)
+ Created->push_back(T.getNode());
+ return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+ std::vector<SDNode*>* Created) const {
+ MVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // FIXME: We should use a narrower constant when the upper
+ // bits are known to be zero.
+ ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
+ APInt::mu magics = N1C->getAPIntValue().magicu();
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ SDValue Q;
+ if (isOperationLegalOrCustom(ISD::MULHU, VT))
+ Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),
+ N->getOperand(0),
+ DAG.getConstant(magics.m, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhu or equvialent
+ if (Created)
+ Created->push_back(Q.getNode());
+
+ if (magics.a == 0) {
+ assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ return DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy()));
+ } else {
+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+ if (Created)
+ Created->push_back(NPQ.getNode());
+ return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(magics.s-1, getShiftAmountTy()));
+ }
+}
+
+/// IgnoreHarmlessInstructions - Ignore instructions between a CALL and RET
+/// node that don't prevent tail call optimization.
+static SDValue IgnoreHarmlessInstructions(SDValue node) {
+ // Found call return.
+ if (node.getOpcode() == ISD::CALL) return node;
+ // Ignore MERGE_VALUES. Will have at least one operand.
+ if (node.getOpcode() == ISD::MERGE_VALUES)
+ return IgnoreHarmlessInstructions(node.getOperand(0));
+ // Ignore ANY_EXTEND node.
+ if (node.getOpcode() == ISD::ANY_EXTEND)
+ return IgnoreHarmlessInstructions(node.getOperand(0));
+ if (node.getOpcode() == ISD::TRUNCATE)
+ return IgnoreHarmlessInstructions(node.getOperand(0));
+ // Any other node type.
+ return node;
+}
+
+bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall,
+ SDValue Ret) {
+ unsigned NumOps = Ret.getNumOperands();
+ // ISD::CALL results:(value0, ..., valuen, chain)
+ // ISD::RET operands:(chain, value0, flag0, ..., valuen, flagn)
+ // Value return:
+ // Check that operand of the RET node sources from the CALL node. The RET node
+ // has at least two operands. Operand 0 holds the chain. Operand 1 holds the
+ // value.
+ if (NumOps > 1 &&
+ IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0))
+ return true;
+ // void return: The RET node has the chain result value of the CALL node as
+ // input.
+ if (NumOps == 1 &&
+ Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1))
+ return true;
+
+ return false;
+}
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
new file mode 100644
index 0000000..2402f81
--- /dev/null
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -0,0 +1,439 @@
+//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics for targets that do
+// not natively support them (which includes the C backend). Note that the code
+// generated is not quite as efficient as algorithms which generate stack maps
+// to identify roots.
+//
+// This pass implements the code transformation described in this paper:
+// "Accurate Garbage Collection in an Uncooperative Environment"
+// Fergus Henderson, ISMM, 2002
+//
+// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
+// ShadowStackGC.
+//
+// In order to support this particular transformation, all stack roots are
+// coallocated in the stack. This allows a fully target-independent stack map
+// while introducing only minor runtime overhead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shadowstackgc"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+ class VISIBILITY_HIDDEN ShadowStackGC : public GCStrategy {
+ /// RootChain - This is the global linked-list that contains the chain of GC
+ /// roots.
+ GlobalVariable *Head;
+
+ /// StackEntryTy - Abstract type of a link in the shadow stack.
+ ///
+ const StructType *StackEntryTy;
+
+ /// Roots - GC roots in the current function. Each is a pair of the
+ /// intrinsic call and its corresponding alloca.
+ std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
+
+ public:
+ ShadowStackGC();
+
+ bool initializeCustomLowering(Module &M);
+ bool performCustomLowering(Function &F);
+
+ private:
+ bool IsNullValue(Value *V);
+ Constant *GetFrameMap(Function &F);
+ const Type* GetConcreteStackEntryType(Function &F);
+ void CollectRoots(Function &F);
+ static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ int Idx1, const char *Name);
+ static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ int Idx1, int Idx2, const char *Name);
+ };
+
+}
+
+static GCRegistry::Add<ShadowStackGC>
+X("shadow-stack", "Very portable GC for uncooperative code generators");
+
+namespace {
+ /// EscapeEnumerator - This is a little algorithm to find all escape points
+ /// from a function so that "finally"-style code can be inserted. In addition
+ /// to finding the existing return and unwind instructions, it also (if
+ /// necessary) transforms any call instructions into invokes and sends them to
+ /// a landing pad.
+ ///
+ /// It's wrapped up in a state machine using the same transform C# uses for
+ /// 'yield return' enumerators, This transform allows it to be non-allocating.
+ class VISIBILITY_HIDDEN EscapeEnumerator {
+ Function &F;
+ const char *CleanupBBName;
+
+ // State.
+ int State;
+ Function::iterator StateBB, StateE;
+ IRBuilder<> Builder;
+
+ public:
+ EscapeEnumerator(Function &F, const char *N = "cleanup")
+ : F(F), CleanupBBName(N), State(0) {}
+
+ IRBuilder<> *Next() {
+ switch (State) {
+ default:
+ return 0;
+
+ case 0:
+ StateBB = F.begin();
+ StateE = F.end();
+ State = 1;
+
+ case 1:
+ // Find all 'return' and 'unwind' instructions.
+ while (StateBB != StateE) {
+ BasicBlock *CurBB = StateBB++;
+
+ // Branches and invokes do not escape, only unwind and return do.
+ TerminatorInst *TI = CurBB->getTerminator();
+ if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI))
+ continue;
+
+ Builder.SetInsertPoint(TI->getParent(), TI);
+ return &Builder;
+ }
+
+ State = 2;
+
+ // Find all 'call' instructions.
+ SmallVector<Instruction*,16> Calls;
+ for (Function::iterator BB = F.begin(),
+ E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(),
+ EE = BB->end(); II != EE; ++II)
+ if (CallInst *CI = dyn_cast<CallInst>(II))
+ if (!CI->getCalledFunction() ||
+ !CI->getCalledFunction()->getIntrinsicID())
+ Calls.push_back(CI);
+
+ if (Calls.empty())
+ return 0;
+
+ // Create a cleanup block.
+ BasicBlock *CleanupBB = BasicBlock::Create(CleanupBBName, &F);
+ UnwindInst *UI = new UnwindInst(CleanupBB);
+
+ // Transform the 'call' instructions into 'invoke's branching to the
+ // cleanup block. Go in reverse order to make prettier BB names.
+ SmallVector<Value*,16> Args;
+ for (unsigned I = Calls.size(); I != 0; ) {
+ CallInst *CI = cast<CallInst>(Calls[--I]);
+
+ // Split the basic block containing the function call.
+ BasicBlock *CallBB = CI->getParent();
+ BasicBlock *NewBB =
+ CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+ // Remove the unconditional branch inserted at the end of CallBB.
+ CallBB->getInstList().pop_back();
+ NewBB->getInstList().remove(CI);
+
+ // Create a new invoke instruction.
+ Args.clear();
+ Args.append(CI->op_begin() + 1, CI->op_end());
+
+ InvokeInst *II = InvokeInst::Create(CI->getOperand(0),
+ NewBB, CleanupBB,
+ Args.begin(), Args.end(),
+ CI->getName(), CallBB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(II);
+ delete CI;
+ }
+
+ Builder.SetInsertPoint(UI->getParent(), UI);
+ return &Builder;
+ }
+ }
+ };
+}
+
+// -----------------------------------------------------------------------------
+
+void llvm::linkShadowStackGC() { }
+
+ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
+ InitRoots = true;
+ CustomRoots = true;
+}
+
+Constant *ShadowStackGC::GetFrameMap(Function &F) {
+ // doInitialization creates the abstract type of this value.
+
+ Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
+
+ // Truncate the ShadowStackDescriptor if some metadata is null.
+ unsigned NumMeta = 0;
+ SmallVector<Constant*,16> Metadata;
+ for (unsigned I = 0; I != Roots.size(); ++I) {
+ Constant *C = cast<Constant>(Roots[I].first->getOperand(2));
+ if (!C->isNullValue())
+ NumMeta = I + 1;
+ Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+ }
+
+ Constant *BaseElts[] = {
+ ConstantInt::get(Type::Int32Ty, Roots.size(), false),
+ ConstantInt::get(Type::Int32Ty, NumMeta, false),
+ };
+
+ Constant *DescriptorElts[] = {
+ ConstantStruct::get(BaseElts, 2),
+ ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
+ Metadata.begin(), NumMeta)
+ };
+
+ Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2);
+
+ std::string TypeName("gc_map.");
+ TypeName += utostr(NumMeta);
+ F.getParent()->addTypeName(TypeName, FrameMap->getType());
+
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+ // that, short of multithreaded LLVM, it should be safe; all that is
+ // necessary is that a simple Module::iterator loop not be invalidated.
+ // Appending to the GlobalVariable list is safe in that sense.
+ //
+ // All of the output passes emit globals last. The ExecutionEngine
+ // explicitly supports adding globals to the module after
+ // initialization.
+ //
+ // Still, if it isn't deemed acceptable, then this transformation needs
+ // to be a ModulePass (which means it cannot be in the 'llc' pipeline
+ // (which uses a FunctionPassManager (which segfaults (not asserts) if
+ // provided a ModulePass))).
+ Constant *GV = new GlobalVariable(FrameMap->getType(), true,
+ GlobalVariable::InternalLinkage,
+ FrameMap, "__gc_" + F.getName(),
+ F.getParent());
+
+ Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0),
+ ConstantInt::get(Type::Int32Ty, 0) };
+ return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
+}
+
+const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
+ // doInitialization creates the generic version of this type.
+ std::vector<const Type*> EltTys;
+ EltTys.push_back(StackEntryTy);
+ for (size_t I = 0; I != Roots.size(); I++)
+ EltTys.push_back(Roots[I].second->getAllocatedType());
+ Type *Ty = StructType::get(EltTys);
+
+ std::string TypeName("gc_stackentry.");
+ TypeName += F.getName();
+ F.getParent()->addTypeName(TypeName, Ty);
+
+ return Ty;
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGC::initializeCustomLowering(Module &M) {
+ // struct FrameMap {
+ // int32_t NumRoots; // Number of roots in stack frame.
+ // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
+ // void *Meta[]; // May be absent for roots without metadata.
+ // };
+ std::vector<const Type*> EltTys;
+ EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :)
+ EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array.
+ StructType *FrameMapTy = StructType::get(EltTys);
+ M.addTypeName("gc_map", FrameMapTy);
+ PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+ // struct StackEntry {
+ // ShadowStackEntry *Next; // Caller's stack entry.
+ // FrameMap *Map; // Pointer to constant FrameMap.
+ // void *Roots[]; // Stack roots (in-place array, so we pretend).
+ // };
+ OpaqueType *RecursiveTy = OpaqueType::get();
+
+ EltTys.clear();
+ EltTys.push_back(PointerType::getUnqual(RecursiveTy));
+ EltTys.push_back(FrameMapPtrTy);
+ PATypeHolder LinkTyH = StructType::get(EltTys);
+
+ RecursiveTy->refineAbstractTypeTo(LinkTyH.get());
+ StackEntryTy = cast<StructType>(LinkTyH.get());
+ const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+ M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from
+ // a FunctionPass?
+
+ // Get the root chain if it already exists.
+ Head = M.getGlobalVariable("llvm_gc_root_chain");
+ if (!Head) {
+ // If the root chain does not exist, insert a new one with linkonce
+ // linkage!
+ Head = new GlobalVariable(StackEntryPtrTy, false,
+ GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(StackEntryPtrTy),
+ "llvm_gc_root_chain", &M);
+ } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+ Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+ Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ }
+
+ return true;
+}
+
+bool ShadowStackGC::IsNullValue(Value *V) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+void ShadowStackGC::CollectRoots(Function &F) {
+ // FIXME: Account for original alignment. Could fragment the root array.
+ // Approach 1: Null initialize empty slots at runtime. Yuck.
+ // Approach 2: Emit a map of the array instead of just a count.
+
+ assert(Roots.empty() && "Not cleaned up?");
+
+ SmallVector<std::pair<CallInst*,AllocaInst*>,16> MetaRoots;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::gcroot) {
+ std::pair<CallInst*,AllocaInst*> Pair = std::make_pair(
+ CI, cast<AllocaInst>(CI->getOperand(1)->stripPointerCasts()));
+ if (IsNullValue(CI->getOperand(2)))
+ Roots.push_back(Pair);
+ else
+ MetaRoots.push_back(Pair);
+ }
+
+ // Number roots with metadata (usually empty) at the beginning, so that the
+ // FrameMap::Meta array can be elided.
+ Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ int Idx, int Idx2, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
+ ConstantInt::get(Type::Int32Ty, Idx),
+ ConstantInt::get(Type::Int32Ty, Idx2) };
+ Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr,
+ int Idx, const char *Name) {
+ Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
+ ConstantInt::get(Type::Int32Ty, Idx) };
+ Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGC::performCustomLowering(Function &F) {
+ // Find calls to llvm.gcroot.
+ CollectRoots(F);
+
+ // If there are no roots in this function, then there is no need to add a
+ // stack map entry for it.
+ if (Roots.empty())
+ return false;
+
+ // Build the constant map and figure the type of the shadow stack entry.
+ Value *FrameMap = GetFrameMap(F);
+ const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+ // Build the shadow stack entry at the very start of the function.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ IRBuilder<> AtEntry(IP->getParent(), IP);
+
+ Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
+ "gc_frame");
+
+ while (isa<AllocaInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Initialize the map pointer and load the current head of the shadow stack.
+ Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+ Instruction *EntryMapPtr = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map");
+ AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+ // After all the allocas...
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ // For each root, find the corresponding slot in the aggregate...
+ Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root");
+
+ // And use it in lieu of the alloca.
+ AllocaInst *OriginalAlloca = Roots[I].second;
+ SlotPtr->takeName(OriginalAlloca);
+ OriginalAlloca->replaceAllUsesWith(SlotPtr);
+ }
+
+ // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+ // really necessary (the collector would never see the intermediate state at
+ // runtime), but it's nicer not to push the half-initialized entry onto the
+ // shadow stack.
+ while (isa<StoreInst>(IP)) ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Push the entry onto the shadow stack.
+ Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next");
+ Instruction *NewHeadVal = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead");
+ AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+ AtEntry.CreateStore(NewHeadVal, Head);
+
+ // For each instruction that escapes...
+ EscapeEnumerator EE(F, "gc_cleanup");
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+ // AtEntry, since that would make the value live for the entire function.
+ Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0,
+ "gc_frame.next");
+ Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+ AtExit->CreateStore(SavedHead, Head);
+ }
+
+ // Delete the original allocas (which are no longer used) and the intrinsic
+ // calls (which are no longer valid). Doing this last avoids invalidating
+ // iterators.
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ Roots[I].first->eraseFromParent();
+ Roots[I].second->eraseFromParent();
+ }
+
+ Roots.clear();
+ return true;
+}
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
new file mode 100644
index 0000000..e44a138
--- /dev/null
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -0,0 +1,1141 @@
+//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a shrink wrapping variant of prolog/epilog insertion:
+// - Spills and restores of callee-saved registers (CSRs) are placed in the
+// machine CFG to tightly surround their uses so that execution paths that
+// do not use CSRs do not pay the spill/restore penalty.
+//
+// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
+// loop the spills are placed in the loop preheader, and restores are
+// placed in the loop exit nodes (the successors of loop _exiting_ nodes).
+//
+// - Covering paths without CSR uses:
+// If a region in a CFG uses CSRs and has multiple entry and/or exit points,
+// the use info for the CSRs inside the region is propagated outward in the
+// CFG to ensure validity of the spill/restore placements. This decreases
+// the effectiveness of shrink wrapping but does not require edge splitting
+// in the machine CFG.
+//
+// This shrink wrapping implementation uses an iterative analysis to determine
+// which basic blocks require spills and restores for CSRs.
+//
+// This pass uses MachineDominators and MachineLoopInfo. Loop information
+// is used to prevent placement of callee-saved register spills/restores
+// in the bodies of loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shrink-wrap"
+
+#include "PrologEpilogInserter.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include <sstream>
+
+using namespace llvm;
+
+STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
+
+// Shrink Wrapping:
+static cl::opt<bool>
+ShrinkWrapping("shrink-wrap",
+ cl::desc("Shrink wrap callee-saved register spills/restores"));
+
+// Shrink wrap only the specified function, a debugging aid.
+static cl::opt<std::string>
+ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
+ cl::desc("Shrink wrap the specified function"),
+ cl::value_desc("funcname"),
+ cl::init(""));
+
+// Debugging level for shrink wrapping.
+enum ShrinkWrapDebugLevel {
+ None, BasicInfo, Iterations, Details
+};
+
+static cl::opt<enum ShrinkWrapDebugLevel>
+ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
+ cl::desc("Print shrink wrapping debugging information"),
+ cl::values(
+ clEnumVal(None , "disable debug output"),
+ clEnumVal(BasicInfo , "print basic DF sets"),
+ clEnumVal(Iterations, "print SR sets for each iteration"),
+ clEnumVal(Details , "print all DF sets"),
+ clEnumValEnd));
+
+
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ if (ShrinkWrapping || ShrinkWrapFunc != "") {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ }
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+//===----------------------------------------------------------------------===//
+// ShrinkWrapping implementation
+//===----------------------------------------------------------------------===//
+
+// Convienences for dealing with machine loops.
+MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
+ assert(LP && "Machine loop is NULL.");
+ MachineBasicBlock* PHDR = LP->getLoopPreheader();
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ PHDR = PLP->getLoopPreheader();
+ PLP = PLP->getParentLoop();
+ }
+ return PHDR;
+}
+
+MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
+ if (LP == 0)
+ return 0;
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ LP = PLP;
+ PLP = PLP->getParentLoop();
+ }
+ return LP;
+}
+
+bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
+ return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+}
+
+// Initialize shrink wrapping DFA sets, called before iterations.
+void PEI::clearAnticAvailSets() {
+ AnticIn.clear();
+ AnticOut.clear();
+ AvailIn.clear();
+ AvailOut.clear();
+}
+
+// Clear all sets constructed by shrink wrapping.
+void PEI::clearAllSets() {
+ ReturnBlocks.clear();
+ clearAnticAvailSets();
+ UsedCSRegs.clear();
+ CSRUsed.clear();
+ TLLoops.clear();
+ CSRSave.clear();
+ CSRRestore.clear();
+}
+
+// Initialize all shrink wrapping data.
+void PEI::initShrinkWrappingInfo() {
+ clearAllSets();
+ EntryBlock = 0;
+#ifndef NDEBUG
+ HasFastExitPath = false;
+#endif
+ ShrinkWrapThisFunction = ShrinkWrapping;
+ // DEBUG: enable or disable shrink wrapping for the current function
+ // via --shrink-wrap-func=<funcname>.
+#ifndef NDEBUG
+ if (ShrinkWrapFunc != "") {
+ std::string MFName = MF->getFunction()->getName();
+ ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
+ }
+#endif
+}
+
+
+/// placeCSRSpillsAndRestores - determine which MBBs of the function
+/// need save, restore code for callee-saved registers by doing a DF analysis
+/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
+/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
+/// is used to ensure that CSR save/restore code is not placed inside loops.
+/// This function computes the maps of MBBs -> CSRs to spill and restore
+/// in CSRSave, CSRRestore.
+///
+/// If shrink wrapping is not being performed, place all spills in
+/// the entry block, all restores in return blocks. In this case,
+/// CSRSave has a single mapping, CSRRestore has mappings for each
+/// return block.
+///
+void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
+
+ DEBUG(MF = &Fn);
+
+ initShrinkWrappingInfo();
+
+ DEBUG(if (ShrinkWrapThisFunction) {
+ DOUT << "Place CSR spills/restores for "
+ << MF->getFunction()->getName() << "\n";
+ });
+
+ if (calculateSets(Fn))
+ placeSpillsAndRestores(Fn);
+}
+
+/// calcAnticInOut - calculate the anticipated in/out reg sets
+/// for the given MBB by looking forward in the MCFG at MBB's
+/// successors.
+///
+bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ CSRegSet prevAnticOut = AnticOut[MBB];
+ MachineBasicBlock* SUCC = successors[i];
+
+ AnticOut[MBB] = AnticIn[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ AnticOut[MBB] &= AnticIn[SUCC];
+ }
+ if (prevAnticOut != AnticOut[MBB])
+ changed = true;
+ }
+
+ // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
+ CSRegSet prevAnticIn = AnticIn[MBB];
+ AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+ if (prevAnticIn |= AnticIn[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calcAvailInOut - calculate the available in/out reg sets
+/// for the given MBB by looking backward in the MCFG at MBB's
+/// predecessors.
+///
+bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
+ bool changed = false;
+
+ // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ CSRegSet prevAvailIn = AvailIn[MBB];
+ MachineBasicBlock* PRED = predecessors[i];
+
+ AvailIn[MBB] = AvailOut[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ AvailIn[MBB] &= AvailOut[PRED];
+ }
+ if (prevAvailIn != AvailIn[MBB])
+ changed = true;
+ }
+
+ // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
+ CSRegSet prevAvailOut = AvailOut[MBB];
+ AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+ if (prevAvailOut |= AvailOut[MBB])
+ changed = true;
+ return changed;
+}
+
+/// calculateAnticAvail - build the sets anticipated and available
+/// registers in the MCFG of the current function iteratively,
+/// doing a combined forward and backward analysis.
+///
+void PEI::calculateAnticAvail(MachineFunction &Fn) {
+ // Initialize data flow sets.
+ clearAnticAvailSets();
+
+ // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+ bool changed = true;
+ unsigned iterations = 0;
+ while (changed) {
+ changed = false;
+ ++iterations;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Calculate anticipated in, out regs at MBB from
+ // anticipated at successors of MBB.
+ changed |= calcAnticInOut(MBB);
+
+ // Calculate available in, out regs at MBB from
+ // available at predecessors of MBB.
+ changed |= calcAvailInOut(MBB);
+ }
+ }
+
+ DEBUG(if (ShrinkWrapDebugging >= Details) {
+ DOUT << "-----------------------------------------------------------\n";
+ DOUT << " Antic/Avail Sets:\n";
+ DOUT << "-----------------------------------------------------------\n";
+ DOUT << "iterations = " << iterations << "\n";
+ DOUT << "-----------------------------------------------------------\n";
+ DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
+ DOUT << "-----------------------------------------------------------\n";
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets(MBB);
+ }
+ DOUT << "-----------------------------------------------------------\n";
+ });
+}
+
+/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
+/// of the loop given by LP and its parent loops. This prevents spills/restores
+/// from being placed in the bodies of loops.
+///
+void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
+ if (! MBB || !LP)
+ return;
+
+ std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
+ for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* LBB = loopBlocks[i];
+ if (LBB == MBB)
+ continue;
+ if (CSRUsed[LBB].contains(CSRUsed[MBB]))
+ continue;
+ CSRUsed[LBB] |= CSRUsed[MBB];
+ }
+}
+
+/// calculateSets - collect the CSRs used in this function, compute
+/// the DF sets that describe the initial minimal regions in the
+/// Machine CFG around which CSR spills and restores must be placed.
+///
+/// Additionally, this function decides if shrink wrapping should
+/// be disabled for the current function, checking the following:
+/// 1. the current function has more than 500 MBBs: heuristic limit
+/// on function size to reduce compile time impact of the current
+/// iterative algorithm.
+/// 2. all CSRs are used in the entry block.
+/// 3. all CSRs are used in all immediate successors of the entry block.
+/// 4. all CSRs are used in a subset of blocks, each of which dominates
+/// all return blocks. These blocks, taken as a subgraph of the MCFG,
+/// are equivalent to the entry block since all execution paths pass
+/// through them.
+///
+bool PEI::calculateSets(MachineFunction &Fn) {
+ // Sets used to compute spill, restore placement sets.
+ const std::vector<CalleeSavedInfo> CSI =
+ Fn.getFrameInfo()->getCalleeSavedInfo();
+
+ // If no CSRs used, we are done.
+ if (CSI.empty()) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ DOUT << "DISABLED: " << Fn.getFunction()->getName()
+ << ": uses no callee-saved registers\n");
+ return false;
+ }
+
+ // Save refs to entry and return blocks.
+ EntryBlock = Fn.begin();
+ for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+ MBB != E; ++MBB)
+ if (isReturnBlock(MBB))
+ ReturnBlocks.push_back(MBB);
+
+ // Determine if this function has fast exit paths.
+ DEBUG(if (ShrinkWrapThisFunction)
+ findFastExitPath());
+
+ // Limit shrink wrapping via the current iterative bit vector
+ // implementation to functions with <= 500 MBBs.
+ if (Fn.size() > 500) {
+ DEBUG(if (ShrinkWrapThisFunction)
+ DOUT << "DISABLED: " << Fn.getFunction()->getName()
+ << ": too large (" << Fn.size() << " MBBs)\n");
+ ShrinkWrapThisFunction = false;
+ }
+
+ // Return now if not shrink wrapping.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ // Collect set of used CSRs.
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ UsedCSRegs.set(inx);
+ }
+
+ // Walk instructions in all MBBs, create CSRUsed[] sets, choose
+ // whether or not to shrink wrap this function.
+ MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+
+ bool allCSRUsesInEntryBlock = true;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ unsigned Reg = CSI[inx].getReg();
+ // If instruction I reads or modifies Reg, add it to UsedCSRegs,
+ // CSRUsed map for the current block.
+ for (unsigned opInx = 0, opEnd = I->getNumOperands();
+ opInx != opEnd; ++opInx) {
+ const MachineOperand &MO = I->getOperand(opInx);
+ if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(Reg, MOReg))) {
+ // CSR Reg is defined/used in block MBB.
+ CSRUsed[MBB].set(inx);
+ // Check for uses in EntryBlock.
+ if (MBB != EntryBlock)
+ allCSRUsesInEntryBlock = false;
+ }
+ }
+ }
+ }
+
+ if (CSRUsed[MBB].empty())
+ continue;
+
+ // Propagate CSRUsed[MBB] in loops
+ if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+ // Add top level loop to work list.
+ MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
+ MachineLoop* PLP = getTopLevelLoopParent(LP);
+
+ if (! HDR) {
+ HDR = PLP->getHeader();
+ assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
+ MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
+ HDR = *PI;
+ }
+ TLLoops[HDR] = PLP;
+
+ // Push uses from inside loop to its parent loops,
+ // or to all other MBBs in its loop.
+ if (LP->getLoopDepth() > 1) {
+ for (MachineLoop* PLP = LP->getParentLoop(); PLP;
+ PLP = PLP->getParentLoop()) {
+ propagateUsesAroundLoop(MBB, PLP);
+ }
+ } else {
+ propagateUsesAroundLoop(MBB, LP);
+ }
+ }
+ }
+
+ if (allCSRUsesInEntryBlock) {
+ DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ } else {
+ bool allCSRsUsedInEntryFanout = true;
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (CSRUsed[SUCC] != UsedCSRegs)
+ allCSRsUsedInEntryFanout = false;
+ }
+ if (allCSRsUsedInEntryFanout) {
+ DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in imm successors of EntryBlock\n");
+ ShrinkWrapThisFunction = false;
+ }
+ }
+
+ if (ShrinkWrapThisFunction) {
+ // Check if MBB uses CSRs and dominates all exit nodes.
+ // Such nodes are equiv. to the entry node w.r.t.
+ // CSR uses: every path through the function must
+ // pass through this node. If each CSR is used at least
+ // once by these nodes, shrink wrapping is disabled.
+ CSRegSet CSRUsedInChokePoints;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
+ continue;
+ bool dominatesExitNodes = true;
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ if (! DT.dominates(MBB, ReturnBlocks[ri])) {
+ dominatesExitNodes = false;
+ break;
+ }
+ if (dominatesExitNodes) {
+ CSRUsedInChokePoints |= CSRUsed[MBB];
+ if (CSRUsedInChokePoints == UsedCSRegs) {
+ DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
+ << ": all CSRs used in choke point(s) at "
+ << getBasicBlockName(MBB) << "\n");
+ ShrinkWrapThisFunction = false;
+ break;
+ }
+ }
+ }
+ }
+
+ // Return now if we have decided not to apply shrink wrapping
+ // to the current function.
+ if (! ShrinkWrapThisFunction)
+ return false;
+
+ DEBUG({
+ DOUT << "ENABLED: " << Fn.getFunction()->getName();
+ if (HasFastExitPath)
+ DOUT << " (fast exit path)";
+ DOUT << "\n";
+ if (ShrinkWrapDebugging >= BasicInfo) {
+ DOUT << "------------------------------"
+ << "-----------------------------\n";
+ DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+ if (ShrinkWrapDebugging >= Details) {
+ DOUT << "------------------------------"
+ << "-----------------------------\n";
+ dumpAllUsed();
+ }
+ }
+ });
+
+ // Build initial DF sets to determine minimal regions in the
+ // Machine CFG around which CSRs must be spilled and restored.
+ calculateAnticAvail(Fn);
+
+ return true;
+}
+
+/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
+/// multi-entry, multi-exit (MEME) regions so spill and restore
+/// placement will not break code that enters or leaves a
+/// shrink-wrapped region by inducing spills with no matching
+/// restores or restores with no matching spills. A MEME region
+/// is a subgraph of the MCFG with multiple entry edges, multiple
+/// exit edges, or both. This code propagates use information
+/// through the MCFG until all paths requiring spills and restores
+/// _outside_ the computed minimal placement regions have been covered.
+///
+bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4>& blks) {
+ if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
+ bool processThisBlock = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC->pred_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED->succ_size() > 1) {
+ processThisBlock = true;
+ break;
+ }
+ }
+ }
+ if (! processThisBlock)
+ return false;
+ }
+
+ CSRegSet prop;
+ if (!CSRSave[MBB].empty())
+ prop = CSRSave[MBB];
+ else if (!CSRRestore[MBB].empty())
+ prop = CSRRestore[MBB];
+ else
+ prop = CSRUsed[MBB];
+ if (prop.empty())
+ return false;
+
+ // Propagate selected bits to successors, predecessors of MBB.
+ bool addedUses = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ // Self-loop
+ if (SUCC == MBB)
+ continue;
+ if (! CSRUsed[SUCC].contains(prop)) {
+ CSRUsed[SUCC] |= prop;
+ addedUses = true;
+ blks.push_back(SUCC);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ DOUT << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "successor " << getBasicBlockName(SUCC) << "\n");
+ }
+ }
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ // Self-loop
+ if (PRED == MBB)
+ continue;
+ if (! CSRUsed[PRED].contains(prop)) {
+ CSRUsed[PRED] |= prop;
+ addedUses = true;
+ blks.push_back(PRED);
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ DOUT << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(prop) << ")->"
+ << "predecessor " << getBasicBlockName(PRED) << "\n");
+ }
+ }
+ return addedUses;
+}
+
+/// addUsesForTopLevelLoops - add uses for CSRs used inside top
+/// level loops to the exit blocks of those loops.
+///
+bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
+ bool addedUses = false;
+
+ // Place restores for top level loops where needed.
+ for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
+ I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
+ MachineBasicBlock* MBB = I->first;
+ MachineLoop* LP = I->second;
+ MachineBasicBlock* HDR = LP->getHeader();
+ SmallVector<MachineBasicBlock*, 4> exitBlocks;
+ CSRegSet loopSpills;
+
+ loopSpills = CSRSave[MBB];
+ if (CSRSave[MBB].empty()) {
+ loopSpills = CSRUsed[HDR];
+ assert(!loopSpills.empty() && "No CSRs used in loop?");
+ } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
+ continue;
+
+ LP->getExitBlocks(exitBlocks);
+ assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
+ for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* EXB = exitBlocks[i];
+ if (! CSRUsed[EXB].contains(loopSpills)) {
+ CSRUsed[EXB] |= loopSpills;
+ addedUses = true;
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ DOUT << "LOOP " << getBasicBlockName(MBB)
+ << "(" << stringifyCSRegSet(loopSpills) << ")->"
+ << getBasicBlockName(EXB) << "\n");
+ if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
+ blks.push_back(EXB);
+ }
+ }
+ }
+ return addedUses;
+}
+
+/// calcSpillPlacements - determine which CSRs should be spilled
+/// in MBB using AnticIn sets of MBB's predecessors, keeping track
+/// of changes to spilled reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevSpills) {
+ bool placedSpills = false;
+ // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
+ CSRegSet anticInPreds;
+ SmallVector<MachineBasicBlock*, 4> predecessors;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+ if (PRED != MBB)
+ predecessors.push_back(PRED);
+ }
+ unsigned i = 0, e = predecessors.size();
+ if (i != e) {
+ MachineBasicBlock* PRED = predecessors[i];
+ anticInPreds = UsedCSRegs - AnticIn[PRED];
+ for (++i; i != e; ++i) {
+ PRED = predecessors[i];
+ anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
+ }
+ } else {
+ // Handle uses in entry blocks (which have no predecessors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ anticInPreds = UsedCSRegs;
+ }
+ // Compute spills required at MBB:
+ CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
+
+ if (! CSRSave[MBB].empty()) {
+ if (MBB == EntryBlock) {
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
+ } else {
+ // Reset all regs spilled in MBB that are also spilled in EntryBlock.
+ if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
+ CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
+ }
+ }
+ }
+ placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
+ prevSpills[MBB] = CSRSave[MBB];
+ // Remember this block for adding restores to successor
+ // blocks for multi-entry region.
+ if (placedSpills)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]) << "\n");
+
+ return placedSpills;
+}
+
+/// calcRestorePlacements - determine which CSRs should be restored
+/// in MBB using AvailOut sets of MBB's succcessors, keeping track
+/// of changes to restored reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
+ SmallVector<MachineBasicBlock*, 4> &blks,
+ CSRegBlockMap &prevRestores) {
+ bool placedRestores = false;
+ // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
+ CSRegSet availOutSucc;
+ SmallVector<MachineBasicBlock*, 4> successors;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ if (SUCC != MBB)
+ successors.push_back(SUCC);
+ }
+ unsigned i = 0, e = successors.size();
+ if (i != e) {
+ MachineBasicBlock* SUCC = successors[i];
+ availOutSucc = UsedCSRegs - AvailOut[SUCC];
+ for (++i; i != e; ++i) {
+ SUCC = successors[i];
+ availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
+ }
+ } else {
+ if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
+ // Handle uses in return blocks (which have no successors).
+ // This is necessary because the DFA formulation assumes the
+ // entry and (multiple) exit nodes cannot have CSR uses, which
+ // is not the case in the real world.
+ availOutSucc = UsedCSRegs;
+ }
+ }
+ // Compute restores required at MBB:
+ CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
+
+ // Postprocess restore placements at MBB.
+ // Remove the CSRs that are restored in the return blocks.
+ // Lest this be confusing, note that:
+ // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
+ if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
+ if (! CSRSave[EntryBlock].empty())
+ CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
+ }
+ placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
+ prevRestores[MBB] = CSRRestore[MBB];
+ // Remember this block for adding saves to predecessor
+ // blocks for multi-entry region.
+ if (placedRestores)
+ blks.push_back(MBB);
+
+ DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+ DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+ return placedRestores;
+}
+
+/// placeSpillsAndRestores - place spills and restores of CSRs
+/// used in MBBs in minimal regions that contain the uses.
+///
+void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
+ CSRegBlockMap prevCSRSave;
+ CSRegBlockMap prevCSRRestore;
+ SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
+ bool changed = true;
+ unsigned iterations = 0;
+
+ // Iterate computation of spill and restore placements in the MCFG until:
+ // 1. CSR use info has been fully propagated around the MCFG, and
+ // 2. computation of CSRSave[], CSRRestore[] reach fixed points.
+ while (changed) {
+ changed = false;
+ ++iterations;
+
+ DEBUG(if (ShrinkWrapDebugging >= Iterations)
+ DOUT << "iter " << iterations
+ << " --------------------------------------------------\n");
+
+ // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
+ // which determines the placements of spills and restores.
+ // Keep track of changes to spills, restores in each iteration to
+ // minimize the total iterations.
+ bool SRChanged = false;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // Place spills for CSRs in MBB.
+ SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
+
+ // Place restores for CSRs in MBB.
+ SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
+ }
+
+ // Add uses of CSRs used inside loops where needed.
+ changed |= addUsesForTopLevelLoops(cvBlocks);
+
+ // Add uses for CSRs spilled or restored at branch, join points.
+ if (changed || SRChanged) {
+ while (! cvBlocks.empty()) {
+ MachineBasicBlock* MBB = cvBlocks.pop_back_val();
+ changed |= addUsesForMEMERegion(MBB, ncvBlocks);
+ }
+ if (! ncvBlocks.empty()) {
+ cvBlocks = ncvBlocks;
+ ncvBlocks.clear();
+ }
+ }
+
+ if (changed) {
+ calculateAnticAvail(Fn);
+ CSRSave.clear();
+ CSRRestore.clear();
+ }
+ }
+
+ // Check for effectiveness:
+ // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
+ // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
+ // Gives a measure of how many CSR spills have been moved from EntryBlock
+ // to minimal regions enclosing their uses.
+ CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
+ unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
+ numSRReduced += numSRReducedThisFunc;
+ DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
+ DOUT << "-----------------------------------------------------------\n";
+ DOUT << "total iterations = " << iterations << " ( "
+ << Fn.getFunction()->getName()
+ << " " << numSRReducedThisFunc
+ << " " << Fn.size()
+ << " )\n";
+ DOUT << "-----------------------------------------------------------\n";
+ dumpSRSets();
+ DOUT << "-----------------------------------------------------------\n";
+ if (numSRReducedThisFunc)
+ verifySpillRestorePlacement();
+ });
+}
+
+// Debugging methods.
+#ifndef NDEBUG
+/// findFastExitPath - debugging method used to detect functions
+/// with at least one path from the entry block to a return block
+/// directly or which has a very small number of edges.
+///
+void PEI::findFastExitPath() {
+ if (! EntryBlock)
+ return;
+ // Fina a path from EntryBlock to any return block that does not branch:
+ // Entry
+ // | ...
+ // v |
+ // B1<-----+
+ // |
+ // v
+ // Return
+ for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+ SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+
+ // Assume positive, disprove existence of fast path.
+ HasFastExitPath = true;
+
+ // Check the immediate successors.
+ if (isReturnBlock(SUCC)) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << getBasicBlockName(SUCC) << "\n";
+ break;
+ }
+ // Traverse df from SUCC, look for a branch block.
+ std::string exitPath = getBasicBlockName(SUCC);
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
+ BE = df_end(SUCC); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ // Reject paths with branch nodes.
+ if (SBB->succ_size() > 1) {
+ HasFastExitPath = false;
+ break;
+ }
+ exitPath += "->" + getBasicBlockName(SBB);
+ }
+ if (HasFastExitPath) {
+ if (ShrinkWrapDebugging >= BasicInfo)
+ DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
+ << "->" << exitPath << "\n";
+ break;
+ }
+ }
+}
+
+/// verifySpillRestorePlacement - check the current spill/restore
+/// sets for safety. Attempt to find spills without restores or
+/// restores without spills.
+/// Spills: walk df from each MBB in spill set ensuring that
+/// all CSRs spilled at MMBB are restored on all paths
+/// from MBB to all exit blocks.
+/// Restores: walk idf from each MBB in restore set ensuring that
+/// all CSRs restored at MBB are spilled on all paths
+/// reaching MBB.
+///
+void PEI::verifySpillRestorePlacement() {
+ unsigned numReturnBlocks = 0;
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ if (isReturnBlock(MBB) || MBB->succ_size() == 0)
+ ++numReturnBlocks;
+ }
+ for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+ BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet spilled = BI->second;
+ CSRegSet restored;
+
+ if (spilled.empty())
+ continue;
+
+ DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(spilled)
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+
+ if (CSRRestore[MBB].intersects(spilled)) {
+ restored |= (CSRRestore[MBB] & spilled);
+ }
+
+ // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
+ // we must find restores for all spills w/no intervening spills on all
+ // paths from MBB to all return blocks.
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
+ BE = df_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ if (SBB == MBB)
+ continue;
+ // Stop when we encounter spills of any CSRs spilled at MBB that
+ // have not yet been seen to be restored.
+ if (CSRSave[SBB].intersects(spilled) &&
+ !restored.contains(CSRSave[SBB] & spilled))
+ break;
+ // Collect the CSRs spilled at MBB that are restored
+ // at this DF successor of MBB.
+ if (CSRRestore[SBB].intersects(spilled))
+ restored |= (CSRRestore[SBB] & spilled);
+ // If we are at a retun block, check that the restores
+ // we have seen so far exhaust the spills at MBB, then
+ // reset the restores.
+ if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
+ if (restored != spilled) {
+ CSRegSet notRestored = (spilled - restored);
+ DOUT << MF->getFunction()->getName() << ": "
+ << stringifyCSRegSet(notRestored)
+ << " spilled at " << getBasicBlockName(MBB)
+ << " are never restored on path to return "
+ << getBasicBlockName(SBB) << "\n";
+ }
+ restored.clear();
+ }
+ }
+ }
+
+ // Check restore placements.
+ for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+ BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restored = BI->second;
+ CSRegSet spilled;
+
+ if (restored.empty())
+ continue;
+
+ DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB])
+ << " RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(restored) << "\n";
+
+ if (CSRSave[MBB].intersects(restored)) {
+ spilled |= (CSRSave[MBB] & restored);
+ }
+ // Walk inverse depth first from MBB to find spills of all
+ // CSRs restored at MBB:
+ for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
+ BE = idf_end(MBB); BI != BE; ++BI) {
+ MachineBasicBlock* PBB = *BI;
+ if (PBB == MBB)
+ continue;
+ // Stop when we encounter restores of any CSRs restored at MBB that
+ // have not yet been seen to be spilled.
+ if (CSRRestore[PBB].intersects(restored) &&
+ !spilled.contains(CSRRestore[PBB] & restored))
+ break;
+ // Collect the CSRs restored at MBB that are spilled
+ // at this DF predecessor of MBB.
+ if (CSRSave[PBB].intersects(restored))
+ spilled |= (CSRSave[PBB] & restored);
+ }
+ if (spilled != restored) {
+ CSRegSet notSpilled = (restored - spilled);
+ DOUT << MF->getFunction()->getName() << ": "
+ << stringifyCSRegSet(notSpilled)
+ << " restored at " << getBasicBlockName(MBB)
+ << " are never spilled\n";
+ }
+ }
+}
+
+// Debugging print methods.
+std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
+ std::ostringstream name;
+ if (MBB) {
+ if (MBB->getBasicBlock())
+ name << MBB->getBasicBlock()->getName();
+ else
+ name << "_MBB_" << MBB->getNumber();
+ }
+ return name.str();
+}
+
+std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
+ const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+ const std::vector<CalleeSavedInfo> CSI =
+ MF->getFrameInfo()->getCalleeSavedInfo();
+
+ std::ostringstream srep;
+ if (CSI.size() == 0) {
+ srep << "[]";
+ return srep.str();
+ }
+ srep << "[";
+ CSRegSet::iterator I = s.begin(), E = s.end();
+ if (I != E) {
+ unsigned reg = CSI[*I].getReg();
+ srep << TRI->getName(reg);
+ for (++I; I != E; ++I) {
+ reg = CSI[*I].getReg();
+ srep << ",";
+ srep << TRI->getName(reg);
+ }
+ }
+ srep << "]";
+ return srep.str();
+}
+
+void PEI::dumpSet(const CSRegSet& s) {
+ DOUT << stringifyCSRegSet(s) << "\n";
+}
+
+void PEI::dumpUsed(MachineBasicBlock* MBB) {
+ if (MBB) {
+ DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRUsed[MBB]) << "\n";
+ }
+}
+
+void PEI::dumpAllUsed() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpUsed(MBB);
+ }
+}
+
+void PEI::dumpSets(MachineBasicBlock* MBB) {
+ if (MBB) {
+ DOUT << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << "\n";
+ }
+}
+
+void PEI::dumpSets1(MachineBasicBlock* MBB) {
+ if (MBB) {
+ DOUT << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(CSRUsed[MBB]) << " | "
+ << stringifyCSRegSet(AnticIn[MBB]) << " | "
+ << stringifyCSRegSet(AnticOut[MBB]) << " | "
+ << stringifyCSRegSet(AvailIn[MBB]) << " | "
+ << stringifyCSRegSet(AvailOut[MBB]) << " | "
+ << stringifyCSRegSet(CSRSave[MBB]) << " | "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ }
+}
+
+void PEI::dumpAllSets() {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ dumpSets1(MBB);
+ }
+}
+
+void PEI::dumpSRSets() {
+ for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
+ MBB != E; ++MBB) {
+ if (! CSRSave[MBB].empty()) {
+ DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB]);
+ if (CSRRestore[MBB].empty())
+ DOUT << "\n";
+ }
+ if (! CSRRestore[MBB].empty()) {
+ if (! CSRSave[MBB].empty())
+ DOUT << " ";
+ DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+ }
+ }
+}
+#endif
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
new file mode 100644
index 0000000..2bc234f
--- /dev/null
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -0,0 +1,2827 @@
+//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register coalescing pass that attempts to
+// aggressively coalesce every register copy that it can.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regcoalescing"
+#include "SimpleRegisterCoalescing.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Value.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numJoins , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends , "Number of copies extended");
+STATISTIC(NumReMats , "Number of instructions re-materialized");
+STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
+STATISTIC(numAborts , "Number of times interval joining aborted");
+STATISTIC(numDeadValNo, "Number of valno def marked dead");
+
+char SimpleRegisterCoalescing::ID = 0;
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+ cl::desc("Coalesce copies (default=true)"),
+ cl::init(true));
+
+static cl::opt<bool>
+NewHeuristic("new-coalescer-heuristic",
+ cl::desc("Use new coalescer heuristic"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+CrossClassJoin("join-cross-class-copies",
+ cl::desc("Coalesce cross register class copies"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+PhysJoinTweak("tweak-phys-join-heuristics",
+ cl::desc("Tweak heuristics for joining phys reg with vr"),
+ cl::init(false), cl::Hidden);
+
+static RegisterPass<SimpleRegisterCoalescing>
+X("simple-register-coalescing", "Simple Register Coalescing");
+
+// Declare that we implement the RegisterCoalescer interface
+static RegisterAnalysisGroup<RegisterCoalescer, true/*The Default*/> V(X);
+
+const PassInfo *const llvm::SimpleRegisterCoalescingID = &X;
+
+void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ if (StrongPHIElim)
+ AU.addPreservedID(StrongPHIEliminationID);
+ else
+ AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB. If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy. For example:
+///
+/// A3 = B0
+/// ...
+/// B1 = A3 <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
+ LiveInterval &IntB,
+ MachineInstr *CopyMI) {
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+ assert(BLR != IntB.end() && "Live range not found!");
+ VNInfo *BValNo = BLR->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (!BValNo->copy) return false;
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1);
+ assert(ALR != IntA.end() && "Live range not found!");
+ VNInfo *AValNo = ALR->valno;
+ // If it's re-defined by an early clobber somewhere in the live range, then
+ // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
+ // See PR3149:
+ // 172 %ECX<def> = MOV32rr %reg1039<kill>
+ // 180 INLINEASM <es:subl $5,$1
+ // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, %EAX<kill>,
+ // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
+ // 188 %EAX<def> = MOV32rr %EAX<kill>
+ // 196 %ECX<def> = MOV32rr %ECX<kill>
+ // 204 %ECX<def> = MOV32rr %ECX<kill>
+ // 212 %EAX<def> = MOV32rr %EAX<kill>
+ // 220 %EAX<def> = MOV32rr %EAX
+ // 228 %reg1039<def> = MOV32rr %ECX<kill>
+ // The early clobber operand ties ECX input to the ECX def.
+ //
+ // The live interval of ECX is represented as this:
+ // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47)
+ // The coalescer has no idea there was a def in the middle of [174,230].
+ if (AValNo->redefByEC)
+ return false;
+
+ // If AValNo is defined as a copy from IntB, we can potentially process this.
+ // Get the instruction that defines this value number.
+ unsigned SrcReg = li_->getVNInfoSourceReg(AValNo);
+ if (!SrcReg) return false; // Not defined by a copy.
+
+ // If the value number is not defined by a copy instruction, ignore it.
+
+ // If the source register comes from an interval other than IntB, we can't
+ // handle this.
+ if (SrcReg != IntB.reg) return false;
+
+ // Get the LiveRange in IntB that this value number starts with.
+ LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNo->def-1);
+ assert(ValLR != IntB.end() && "Live range not found!");
+
+ // Make sure that the end of the live range is inside the same block as
+ // CopyMI.
+ MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end-1);
+ if (!ValLREndInst ||
+ ValLREndInst->getParent() != CopyMI->getParent()) return false;
+
+ // Okay, we now know that ValLR ends in the same block that the CopyMI
+ // live-range starts. If there are no intervening live ranges between them in
+ // IntB, we can merge them.
+ if (ValLR+1 != BLR) return false;
+
+ // If a live interval is a physical register, conservatively check if any
+ // of its sub-registers is overlapping the live interval of the virtual
+ // register. If so, do not coalesce.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) &&
+ *tri_->getSubRegisters(IntB.reg)) {
+ for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+ return false;
+ }
+ }
+
+ DOUT << "\nExtending: "; IntB.print(DOUT, tri_);
+
+ unsigned FillerStart = ValLR->end, FillerEnd = BLR->start;
+ // We are about to delete CopyMI, so need to remove it as the 'instruction
+ // that defines this value #'. Update the the valnum with the new defining
+ // instruction #.
+ BValNo->def = FillerStart;
+ BValNo->copy = NULL;
+
+ // Okay, we can merge them. We need to insert a new liverange:
+ // [ValLR.end, BLR.begin) of either value number, then we merge the
+ // two value numbers.
+ IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+ // If the IntB live range is assigned to a physical register, and if that
+ // physreg has sub-registers, update their live intervals as well.
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+ for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ LiveInterval &SRLI = li_->getInterval(*SR);
+ SRLI.addRange(LiveRange(FillerStart, FillerEnd,
+ SRLI.getNextValue(FillerStart, 0, li_->getVNInfoAllocator())));
+ }
+ }
+
+ // Okay, merge "B1" into the same value number as "B0".
+ if (BValNo != ValLR->valno) {
+ IntB.addKills(ValLR->valno, BValNo->kills);
+ IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+ }
+ DOUT << " result = "; IntB.print(DOUT, tri_);
+ DOUT << "\n";
+
+ // If the source instruction was killing the source register before the
+ // merge, unset the isKill marker given the live range has been extended.
+ int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ if (UIdx != -1) {
+ ValLREndInst->getOperand(UIdx).setIsKill(false);
+ IntB.removeKill(ValLR->valno, FillerStart);
+ }
+
+ ++numExtends;
+ return true;
+}
+
+/// HasOtherReachingDefs - Return true if there are definitions of IntB
+/// other than BValNo val# that can reach uses of AValno val# of IntA.
+bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
+ LiveInterval &IntB,
+ VNInfo *AValNo,
+ VNInfo *BValNo) {
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ LiveInterval::Ranges::iterator BI =
+ std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
+ if (BI != IntB.ranges.begin())
+ --BI;
+ for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+ if (BI->valno == BValNo)
+ continue;
+ if (BI->start <= AI->start && BI->end > AI->start)
+ return true;
+ if (BI->start > AI->start && BI->start < AI->end)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB. If the source value number (in IntA) is defined by a commutable
+/// instruction and its other operand is coalesced to the copy dest register,
+/// see if we can transform the copy into a noop by commuting the definition. For
+/// example,
+///
+/// A3 = op A2 B0<kill>
+/// ...
+/// B1 = A3 <- this copy
+/// ...
+/// = op A3 <- more uses
+///
+/// ==>
+///
+/// B2 = op B0 A2<kill>
+/// ...
+/// B1 = B2 <- now an identify copy
+/// ...
+/// = op B2 <- more uses
+///
+/// This returns true if an interval was modified.
+///
+bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
+ LiveInterval &IntB,
+ MachineInstr *CopyMI) {
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+
+ // FIXME: For now, only eliminate the copy by commuting its def when the
+ // source register is a virtual register. We want to guard against cases
+ // where the copy is a back edge copy and commuting the def lengthen the
+ // live interval of the source register to the entire loop.
+ if (TargetRegisterInfo::isPhysicalRegister(IntA.reg))
+ return false;
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+ assert(BLR != IntB.end() && "Live range not found!");
+ VNInfo *BValNo = BLR->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (!BValNo->copy) return false;
+ assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1);
+ assert(ALR != IntA.end() && "Live range not found!");
+ VNInfo *AValNo = ALR->valno;
+ // If other defs can reach uses of this def, then it's not safe to perform
+ // the optimization.
+ if (AValNo->def == ~0U || AValNo->def == ~1U || AValNo->hasPHIKill)
+ return false;
+ MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
+ const TargetInstrDesc &TID = DefMI->getDesc();
+ unsigned NewDstIdx;
+ if (!TID.isCommutable() ||
+ !tii_->CommuteChangesDestination(DefMI, NewDstIdx))
+ return false;
+
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (NewReg != IntB.reg || !NewDstMO.isKill())
+ return false;
+
+ // Make sure there are no other definitions of IntB that would reach the
+ // uses which the new definition can reach.
+ if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+ return false;
+
+ // If some of the uses of IntA.reg is already coalesced away, return false.
+ // It's not possible to determine whether it's safe to perform the coalescing.
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
+ UE = mri_->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ unsigned UseIdx = li_->getInstructionIndex(UseMI);
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end())
+ continue;
+ if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
+ return false;
+ }
+
+ // At this point we have decided that it is legal to do this
+ // transformation. Start by commuting the instruction.
+ MachineBasicBlock *MBB = DefMI->getParent();
+ MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
+ if (!NewMI)
+ return false;
+ if (NewMI != DefMI) {
+ li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
+ MBB->insert(DefMI, NewMI);
+ MBB->erase(DefMI);
+ }
+ unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
+ NewMI->getOperand(OpIdx).setIsKill();
+
+ bool BHasPHIKill = BValNo->hasPHIKill;
+ SmallVector<VNInfo*, 4> BDeadValNos;
+ SmallVector<unsigned, 4> BKills;
+ std::map<unsigned, unsigned> BExtend;
+
+ // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+ // A = or A, B
+ // ...
+ // B = A
+ // ...
+ // C = A<kill>
+ // ...
+ // = B
+ //
+ // then do not add kills of A to the newly created B interval.
+ bool Extended = BLR->end > ALR->end && ALR->end != ALR->start;
+ if (Extended)
+ BExtend[ALR->end] = BLR->end;
+
+ // Update uses of IntA of the specific Val# with IntB.
+ bool BHasSubRegs = false;
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+ BHasSubRegs = *tri_->getSubRegisters(IntB.reg);
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
+ UE = mri_->use_end(); UI != UE;) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (JoinedCopies.count(UseMI))
+ continue;
+ unsigned UseIdx = li_->getInstructionIndex(UseMI);
+ LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+ if (ULR == IntA.end() || ULR->valno != AValNo)
+ continue;
+ UseMO.setReg(NewReg);
+ if (UseMI == CopyMI)
+ continue;
+ if (UseMO.isKill()) {
+ if (Extended)
+ UseMO.setIsKill(false);
+ else
+ BKills.push_back(li_->getUseIndex(UseIdx)+1);
+ }
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ continue;
+ if (DstReg == IntB.reg) {
+ // This copy will become a noop. If it's defining a new val#,
+ // remove that val# as well. However this live range is being
+ // extended to the end of the existing live range defined by the copy.
+ unsigned DefIdx = li_->getDefIndex(UseIdx);
+ const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
+ BHasPHIKill |= DLR->valno->hasPHIKill;
+ assert(DLR->valno->def == DefIdx);
+ BDeadValNos.push_back(DLR->valno);
+ BExtend[DLR->start] = DLR->end;
+ JoinedCopies.insert(UseMI);
+ // If this is a kill but it's going to be removed, the last use
+ // of the same val# is the new kill.
+ if (UseMO.isKill())
+ BKills.pop_back();
+ }
+ }
+
+ // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
+ // simply extend BLR if CopyMI doesn't end the range.
+ DOUT << "\nExtending: "; IntB.print(DOUT, tri_);
+
+ // Remove val#'s defined by copies that will be coalesced away.
+ for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) {
+ VNInfo *DeadVNI = BDeadValNos[i];
+ if (BHasSubRegs) {
+ for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ LiveInterval &SRLI = li_->getInterval(*SR);
+ const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def);
+ SRLI.removeValNo(SRLR->valno);
+ }
+ }
+ IntB.removeValNo(BDeadValNos[i]);
+ }
+
+ // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+ // is updated. Kills are also updated.
+ VNInfo *ValNo = BValNo;
+ ValNo->def = AValNo->def;
+ ValNo->copy = NULL;
+ for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) {
+ unsigned Kill = ValNo->kills[j];
+ if (Kill != BLR->end)
+ BKills.push_back(Kill);
+ }
+ ValNo->kills.clear();
+ for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+ AI != AE; ++AI) {
+ if (AI->valno != AValNo) continue;
+ unsigned End = AI->end;
+ std::map<unsigned, unsigned>::iterator EI = BExtend.find(End);
+ if (EI != BExtend.end())
+ End = EI->second;
+ IntB.addRange(LiveRange(AI->start, End, ValNo));
+
+ // If the IntB live range is assigned to a physical register, and if that
+ // physreg has sub-registers, update their live intervals as well.
+ if (BHasSubRegs) {
+ for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ LiveInterval &SRLI = li_->getInterval(*SR);
+ SRLI.MergeInClobberRange(AI->start, End, li_->getVNInfoAllocator());
+ }
+ }
+ }
+ IntB.addKills(ValNo, BKills);
+ ValNo->hasPHIKill = BHasPHIKill;
+
+ DOUT << " result = "; IntB.print(DOUT, tri_);
+ DOUT << "\n";
+
+ DOUT << "\nShortening: "; IntA.print(DOUT, tri_);
+ IntA.removeValNo(AValNo);
+ DOUT << " result = "; IntA.print(DOUT, tri_);
+ DOUT << "\n";
+
+ ++numCommutes;
+ return true;
+}
+
+/// isSameOrFallThroughBB - Return true if MBB == SuccMBB or MBB simply
+/// fallthoughs to SuccMBB.
+static bool isSameOrFallThroughBB(MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccMBB,
+ const TargetInstrInfo *tii_) {
+ if (MBB == SuccMBB)
+ return true;
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ return !tii_->AnalyzeBranch(*MBB, TBB, FBB, Cond) && !TBB && !FBB &&
+ MBB->isSuccessor(SuccMBB);
+}
+
+/// removeRange - Wrapper for LiveInterval::removeRange. This removes a range
+/// from a physical register live interval as well as from the live intervals
+/// of its sub-registers.
+static void removeRange(LiveInterval &li, unsigned Start, unsigned End,
+ LiveIntervals *li_, const TargetRegisterInfo *tri_) {
+ li.removeRange(Start, End, true);
+ if (TargetRegisterInfo::isPhysicalRegister(li.reg)) {
+ for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ LiveInterval &sli = li_->getInterval(*SR);
+ unsigned RemoveEnd = Start;
+ while (RemoveEnd != End) {
+ LiveInterval::iterator LR = sli.FindLiveRangeContaining(Start);
+ if (LR == sli.end())
+ break;
+ RemoveEnd = (LR->end < End) ? LR->end : End;
+ sli.removeRange(Start, RemoveEnd, true);
+ Start = RemoveEnd;
+ }
+ }
+ }
+}
+
+/// TrimLiveIntervalToLastUse - If there is a last use in the same basic block
+/// as the copy instruction, trim the live interval to the last use and return
+/// true.
+bool
+SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &li,
+ const LiveRange *LR) {
+ unsigned MBBStart = li_->getMBBStartIdx(CopyMBB);
+ unsigned LastUseIdx;
+ MachineOperand *LastUse = lastRegisterUse(LR->start, CopyIdx-1, li.reg,
+ LastUseIdx);
+ if (LastUse) {
+ MachineInstr *LastUseMI = LastUse->getParent();
+ if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) {
+ // r1024 = op
+ // ...
+ // BB1:
+ // = r1024
+ //
+ // BB2:
+ // r1025<dead> = r1024<kill>
+ if (MBBStart < LR->end)
+ removeRange(li, MBBStart, LR->end, li_, tri_);
+ return true;
+ }
+
+ // There are uses before the copy, just shorten the live range to the end
+ // of last use.
+ LastUse->setIsKill();
+ removeRange(li, li_->getDefIndex(LastUseIdx), LR->end, li_, tri_);
+ li.addKill(LR->valno, LastUseIdx+1);
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ DstReg == li.reg) {
+ // Last use is itself an identity code.
+ int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, false, tri_);
+ LastUseMI->getOperand(DeadIdx).setIsDead();
+ }
+ return true;
+ }
+
+ // Is it livein?
+ if (LR->start <= MBBStart && LR->end > MBBStart) {
+ if (LR->start == 0) {
+ assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
+ // Live-in to the function but dead. Remove it from entry live-in set.
+ mf_->begin()->removeLiveIn(li.reg);
+ }
+ // FIXME: Shorten intervals in BBs that reaches this BB.
+ }
+
+ return false;
+}
+
+/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// computation, replace the copy by rematerialize the definition.
+bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+ unsigned DstReg,
+ MachineInstr *CopyMI) {
+ unsigned CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI));
+ LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
+ assert(SrcLR != SrcInt.end() && "Live range not found!");
+ VNInfo *ValNo = SrcLR->valno;
+ // If other defs can reach uses of this def, then it's not safe to perform
+ // the optimization.
+ if (ValNo->def == ~0U || ValNo->def == ~1U || ValNo->hasPHIKill)
+ return false;
+ MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+ const TargetInstrDesc &TID = DefMI->getDesc();
+ if (!TID.isAsCheapAsAMove())
+ return false;
+ if (!DefMI->getDesc().isRematerializable() ||
+ !tii_->isTriviallyReMaterializable(DefMI))
+ return false;
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(tii_, SawStore))
+ return false;
+
+ unsigned DefIdx = li_->getDefIndex(CopyIdx);
+ const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx);
+ DLR->valno->copy = NULL;
+ // Don't forget to update sub-register intervals.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
+ if (DLR && DLR->valno->copy == CopyMI)
+ DLR->valno->copy = NULL;
+ }
+ }
+
+ // If copy kills the source register, find the last use and propagate
+ // kill.
+ bool checkForDeadDef = false;
+ MachineBasicBlock *MBB = CopyMI->getParent();
+ if (CopyMI->killsRegister(SrcInt.reg))
+ if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
+ checkForDeadDef = true;
+ }
+
+ MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
+ CopyMI->removeFromParent();
+ tii_->reMaterialize(*MBB, MII, DstReg, DefMI);
+ MachineInstr *NewMI = prior(MII);
+
+ if (checkForDeadDef) {
+ // PR4090 fix: Trim interval failed because there was no use of the
+ // source interval in this MBB. If the def is in this MBB too then we
+ // should mark it dead:
+ if (DefMI->getParent() == MBB) {
+ DefMI->addRegisterDead(SrcInt.reg, tri_);
+ SrcLR->end = SrcLR->start + 1;
+ }
+
+ }
+
+ // CopyMI may have implicit operands, transfer them over to the newly
+ // rematerialized instruction. And update implicit def interval valnos.
+ for (unsigned i = CopyMI->getDesc().getNumOperands(),
+ e = CopyMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = CopyMI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ NewMI->addOperand(MO);
+ if (MO.isDef() && li_->hasInterval(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
+ if (DLR && DLR->valno->copy == CopyMI)
+ DLR->valno->copy = NULL;
+ }
+ }
+
+ li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+ MBB->getParent()->DeleteMachineInstr(CopyMI);
+ ReMatCopies.insert(CopyMI);
+ ReMatDefs.insert(DefMI);
+ ++NumReMats;
+ return true;
+}
+
+/// isBackEdgeCopy - Returns true if CopyMI is a back edge copy.
+///
+bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI,
+ unsigned DstReg) const {
+ MachineBasicBlock *MBB = CopyMI->getParent();
+ const MachineLoop *L = loopInfo->getLoopFor(MBB);
+ if (!L)
+ return false;
+ if (MBB != L->getLoopLatch())
+ return false;
+
+ LiveInterval &LI = li_->getInterval(DstReg);
+ unsigned DefIdx = li_->getInstructionIndex(CopyMI);
+ LiveInterval::const_iterator DstLR =
+ LI.FindLiveRangeContaining(li_->getDefIndex(DefIdx));
+ if (DstLR == LI.end())
+ return false;
+ unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1;
+ if (DstLR->valno->kills.size() == 1 &&
+ DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill)
+ return true;
+ return false;
+}
+
+/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// update the subregister number if it is not zero. If DstReg is a
+/// physical register and the existing subregister number of the def / use
+/// being updated is not zero, make sure to set it to the correct physical
+/// subregister.
+void
+SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
+ unsigned SubIdx) {
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ if (DstIsPhys && SubIdx) {
+ // Figure out the real physical register we are updating with.
+ DstReg = tri_->getSubReg(DstReg, SubIdx);
+ SubIdx = 0;
+ }
+
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg),
+ E = mri_->reg_end(); I != E; ) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *UseMI = &*I;
+ ++I;
+ unsigned OldSubIdx = O.getSubReg();
+ if (DstIsPhys) {
+ unsigned UseDstReg = DstReg;
+ if (OldSubIdx)
+ UseDstReg = tri_->getSubReg(DstReg, OldSubIdx);
+
+ unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
+ if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
+ CopySrcSubIdx, CopyDstSubIdx) &&
+ CopySrcReg != CopyDstReg &&
+ CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
+ // If the use is a copy and it won't be coalesced away, and its source
+ // is defined by a trivial computation, try to rematerialize it instead.
+ if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,UseMI))
+ continue;
+ }
+
+ O.setReg(UseDstReg);
+ O.setSubReg(0);
+ continue;
+ }
+
+ // Sub-register indexes goes from small to large. e.g.
+ // RAX: 1 -> AL, 2 -> AX, 3 -> EAX
+ // EAX: 1 -> AL, 2 -> AX
+ // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose
+ // sub-register 2 is also AX.
+ if (SubIdx && OldSubIdx && SubIdx != OldSubIdx)
+ assert(OldSubIdx < SubIdx && "Conflicting sub-register index!");
+ else if (SubIdx)
+ O.setSubReg(SubIdx);
+ // Remove would-be duplicated kill marker.
+ if (O.isKill() && UseMI->killsRegister(DstReg))
+ O.setIsKill(false);
+ O.setReg(DstReg);
+
+ // After updating the operand, check if the machine instruction has
+ // become a copy. If so, update its val# information.
+ const TargetInstrDesc &TID = UseMI->getDesc();
+ unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
+ if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 &&
+ tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
+ CopySrcSubIdx, CopyDstSubIdx) &&
+ CopySrcReg != CopyDstReg &&
+ (TargetRegisterInfo::isVirtualRegister(CopyDstReg) ||
+ allocatableRegs_[CopyDstReg])) {
+ LiveInterval &LI = li_->getInterval(CopyDstReg);
+ unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(UseMI));
+ const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx);
+ if (DLR->valno->def == DefIdx)
+ DLR->valno->copy = UseMI;
+ }
+ }
+}
+
+/// RemoveDeadImpDef - Remove implicit_def instructions which are "re-defining"
+/// registers due to insert_subreg coalescing. e.g.
+/// r1024 = op
+/// r1025 = implicit_def
+/// r1025 = insert_subreg r1025, r1024
+/// = op r1025
+/// =>
+/// r1025 = op
+/// r1025 = implicit_def
+/// r1025 = insert_subreg r1025, r1025
+/// = op r1025
+void
+SimpleRegisterCoalescing::RemoveDeadImpDef(unsigned Reg, LiveInterval &LI) {
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
+ E = mri_->reg_end(); I != E; ) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *DefMI = &*I;
+ ++I;
+ if (!O.isDef())
+ continue;
+ if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF)
+ continue;
+ if (!LI.liveBeforeAndAt(li_->getInstructionIndex(DefMI)))
+ continue;
+ li_->RemoveMachineInstrFromMaps(DefMI);
+ DefMI->eraseFromParent();
+ }
+}
+
+/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
+/// due to live range lengthening as the result of coalescing.
+void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg,
+ LiveInterval &LI) {
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
+ UE = mri_->use_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ if (UseMO.isKill()) {
+ MachineInstr *UseMI = UseMO.getParent();
+ unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI));
+ const LiveRange *UI = LI.getLiveRangeContaining(UseIdx);
+ if (!UI || !LI.isKill(UI->valno, UseIdx+1))
+ UseMO.setIsKill(false);
+ }
+ }
+}
+
+/// removeIntervalIfEmpty - Check if the live interval of a physical register
+/// is empty, if so remove it and also remove the empty intervals of its
+/// sub-registers. Return true if live interval is removed.
+static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
+ const TargetRegisterInfo *tri_) {
+ if (li.empty()) {
+ if (TargetRegisterInfo::isPhysicalRegister(li.reg))
+ for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ LiveInterval &sli = li_->getInterval(*SR);
+ if (sli.empty())
+ li_->removeInterval(*SR);
+ }
+ li_->removeInterval(li.reg);
+ return true;
+ }
+ return false;
+}
+
+/// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
+/// Return true if live interval is removed.
+bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li,
+ MachineInstr *CopyMI) {
+ unsigned CopyIdx = li_->getInstructionIndex(CopyMI);
+ LiveInterval::iterator MLR =
+ li.FindLiveRangeContaining(li_->getDefIndex(CopyIdx));
+ if (MLR == li.end())
+ return false; // Already removed by ShortenDeadCopySrcLiveRange.
+ unsigned RemoveStart = MLR->start;
+ unsigned RemoveEnd = MLR->end;
+ // Remove the liverange that's defined by this.
+ if (RemoveEnd == li_->getDefIndex(CopyIdx)+1) {
+ removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
+ return removeIntervalIfEmpty(li, li_, tri_);
+ }
+ return false;
+}
+
+/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
+/// the val# it defines. If the live interval becomes empty, remove it as well.
+bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
+ MachineInstr *DefMI) {
+ unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI));
+ LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
+ if (DefIdx != MLR->valno->def)
+ return false;
+ li.removeValNo(MLR->valno);
+ return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+/// PropagateDeadness - Propagate the dead marker to the instruction which
+/// defines the val#.
+static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
+ unsigned &LRStart, LiveIntervals *li_,
+ const TargetRegisterInfo* tri_) {
+ MachineInstr *DefMI =
+ li_->getInstructionFromIndex(li_->getDefIndex(LRStart));
+ if (DefMI && DefMI != CopyMI) {
+ int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false, tri_);
+ if (DeadIdx != -1) {
+ DefMI->getOperand(DeadIdx).setIsDead();
+ // A dead def should have a single cycle interval.
+ ++LRStart;
+ }
+ }
+}
+
+/// ShortenDeadCopySrcLiveRange - Shorten a live range as it's artificially
+/// extended by a dead copy. Mark the last use (if any) of the val# as kill as
+/// ends the live range there. If there isn't another use, then this live range
+/// is dead. Return true if live interval is removed.
+bool
+SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
+ MachineInstr *CopyMI) {
+ unsigned CopyIdx = li_->getInstructionIndex(CopyMI);
+ if (CopyIdx == 0) {
+ // FIXME: special case: function live in. It can be a general case if the
+ // first instruction index starts at > 0 value.
+ assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
+ // Live-in to the function but dead. Remove it from entry live-in set.
+ if (mf_->begin()->isLiveIn(li.reg))
+ mf_->begin()->removeLiveIn(li.reg);
+ const LiveRange *LR = li.getLiveRangeContaining(CopyIdx);
+ removeRange(li, LR->start, LR->end, li_, tri_);
+ return removeIntervalIfEmpty(li, li_, tri_);
+ }
+
+ LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx-1);
+ if (LR == li.end())
+ // Livein but defined by a phi.
+ return false;
+
+ unsigned RemoveStart = LR->start;
+ unsigned RemoveEnd = li_->getDefIndex(CopyIdx)+1;
+ if (LR->end > RemoveEnd)
+ // More uses past this copy? Nothing to do.
+ return false;
+
+ // If there is a last use in the same bb, we can't remove the live range.
+ // Shorten the live interval and return.
+ MachineBasicBlock *CopyMBB = CopyMI->getParent();
+ if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR))
+ return false;
+
+ MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart);
+ if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_))
+ // If the live range starts in another mbb and the copy mbb is not a fall
+ // through mbb, then we can only cut the range from the beginning of the
+ // copy mbb.
+ RemoveStart = li_->getMBBStartIdx(CopyMBB) + 1;
+
+ if (LR->valno->def == RemoveStart) {
+ // If the def MI defines the val# and this copy is the only kill of the
+ // val#, then propagate the dead marker.
+ if (li.isOnlyLROfValNo(LR)) {
+ PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_);
+ ++numDeadValNo;
+ }
+ if (li.isKill(LR->valno, RemoveEnd))
+ li.removeKill(LR->valno, RemoveEnd);
+ }
+
+ removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
+ return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+/// CanCoalesceWithImpDef - Returns true if the specified copy instruction
+/// from an implicit def to another register can be coalesced away.
+bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
+ LiveInterval &li,
+ LiveInterval &ImpLi) const{
+ if (!CopyMI->killsRegister(ImpLi.reg))
+ return false;
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+ LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx);
+ if (LR == li.end())
+ return false;
+ if (LR->valno->hasPHIKill)
+ return false;
+ if (LR->valno->def != CopyIdx)
+ return false;
+ // Make sure all of val# uses are copies.
+ for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(li.reg),
+ UE = mri_->use_end(); UI != UE;) {
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (JoinedCopies.count(UseMI))
+ continue;
+ unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI));
+ LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx);
+ if (ULR == li.end() || ULR->valno != LR->valno)
+ continue;
+ // If the use is not a use, then it's not safe to coalesce the move.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ if (UseMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG &&
+ UseMI->getOperand(1).getReg() == li.reg)
+ continue;
+ return false;
+ }
+ }
+ return true;
+}
+
+
+/// RemoveCopiesFromValNo - The specified value# is defined by an implicit
+/// def and it is being removed. Turn all copies from this value# into
+/// identity copies so they will be removed.
+void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li,
+ VNInfo *VNI) {
+ SmallVector<MachineInstr*, 4> ImpDefs;
+ MachineOperand *LastUse = NULL;
+ unsigned LastUseIdx = li_->getUseIndex(VNI->def);
+ for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg),
+ RE = mri_->reg_end(); RI != RE;) {
+ MachineOperand *MO = &RI.getOperand();
+ MachineInstr *MI = &*RI;
+ ++RI;
+ if (MO->isDef()) {
+ if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ ImpDefs.push_back(MI);
+ }
+ continue;
+ }
+ if (JoinedCopies.count(MI))
+ continue;
+ unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(MI));
+ LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx);
+ if (ULR == li.end() || ULR->valno != VNI)
+ continue;
+ // If the use is a copy, turn it into an identity copy.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ SrcReg == li.reg) {
+ // Each use MI may have multiple uses of this register. Change them all.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == li.reg)
+ MO.setReg(DstReg);
+ }
+ JoinedCopies.insert(MI);
+ } else if (UseIdx > LastUseIdx) {
+ LastUseIdx = UseIdx;
+ LastUse = MO;
+ }
+ }
+ if (LastUse) {
+ LastUse->setIsKill();
+ li.addKill(VNI, LastUseIdx+1);
+ } else {
+ // Remove dead implicit_def's.
+ while (!ImpDefs.empty()) {
+ MachineInstr *ImpDef = ImpDefs.back();
+ ImpDefs.pop_back();
+ li_->RemoveMachineInstrFromMaps(ImpDef);
+ ImpDef->eraseFromParent();
+ }
+ }
+}
+
+/// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
+/// a virtual destination register with physical source register.
+bool
+SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt,
+ LiveInterval &SrcInt) {
+ // If the virtual register live interval is long but it has low use desity,
+ // do not join them, instead mark the physical register as its allocation
+ // preference.
+ const TargetRegisterClass *RC = mri_->getRegClass(DstInt.reg);
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ unsigned Length = li_->getApproximateInstructionCount(DstInt);
+ if (Length > Threshold &&
+ (((float)std::distance(mri_->use_begin(DstInt.reg),
+ mri_->use_end()) / Length) < (1.0 / Threshold)))
+ return false;
+
+ // If the virtual register live interval extends into a loop, turn down
+ // aggressiveness.
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+ const MachineLoop *L = loopInfo->getLoopFor(CopyMBB);
+ if (!L) {
+ // Let's see if the virtual register live interval extends into the loop.
+ LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx);
+ assert(DLR != DstInt.end() && "Live range not found!");
+ DLR = DstInt.FindLiveRangeContaining(DLR->end+1);
+ if (DLR != DstInt.end()) {
+ CopyMBB = li_->getMBBFromIndex(DLR->start);
+ L = loopInfo->getLoopFor(CopyMBB);
+ }
+ }
+
+ if (!L || Length <= Threshold)
+ return true;
+
+ unsigned UseIdx = li_->getUseIndex(CopyIdx);
+ LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
+ MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
+ if (loopInfo->getLoopFor(SMBB) != L) {
+ if (!loopInfo->isLoopHeader(CopyMBB))
+ return false;
+ // If vr's live interval extends pass the loop header, do not join.
+ for (MachineBasicBlock::succ_iterator SI = CopyMBB->succ_begin(),
+ SE = CopyMBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB == CopyMBB)
+ continue;
+ if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB),
+ li_->getMBBEndIdx(SuccMBB)+1))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a
+/// copy from a virtual source register to a physical destination register.
+bool
+SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt,
+ LiveInterval &SrcInt) {
+ // If the virtual register live interval is long but it has low use desity,
+ // do not join them, instead mark the physical register as its allocation
+ // preference.
+ const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg);
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ unsigned Length = li_->getApproximateInstructionCount(SrcInt);
+ if (Length > Threshold &&
+ (((float)std::distance(mri_->use_begin(SrcInt.reg),
+ mri_->use_end()) / Length) < (1.0 / Threshold)))
+ return false;
+
+ if (SrcInt.empty())
+ // Must be implicit_def.
+ return false;
+
+ // If the virtual register live interval is defined or cross a loop, turn
+ // down aggressiveness.
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+ unsigned UseIdx = li_->getUseIndex(CopyIdx);
+ LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
+ assert(SLR != SrcInt.end() && "Live range not found!");
+ SLR = SrcInt.FindLiveRangeContaining(SLR->start-1);
+ if (SLR == SrcInt.end())
+ return true;
+ MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
+ const MachineLoop *L = loopInfo->getLoopFor(SMBB);
+
+ if (!L || Length <= Threshold)
+ return true;
+
+ if (loopInfo->getLoopFor(CopyMBB) != L) {
+ if (SMBB != L->getLoopLatch())
+ return false;
+ // If vr's live interval is extended from before the loop latch, do not
+ // join.
+ for (MachineBasicBlock::pred_iterator PI = SMBB->pred_begin(),
+ PE = SMBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredMBB = *PI;
+ if (PredMBB == SMBB)
+ continue;
+ if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB),
+ li_->getMBBEndIdx(PredMBB)+1))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+/// two virtual registers from different register classes.
+bool
+SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
+ unsigned SmallReg,
+ unsigned Threshold) {
+ // Then make sure the intervals are *short*.
+ LiveInterval &LargeInt = li_->getInterval(LargeReg);
+ LiveInterval &SmallInt = li_->getInterval(SmallReg);
+ unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
+ unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
+ if (SmallSize > Threshold || LargeSize > Threshold)
+ if ((float)std::distance(mri_->use_begin(SmallReg),
+ mri_->use_end()) / SmallSize <
+ (float)std::distance(mri_->use_begin(LargeReg),
+ mri_->use_end()) / LargeSize)
+ return false;
+ return true;
+}
+
+/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
+/// register with a physical register, check if any of the virtual register
+/// operand is a sub-register use or def. If so, make sure it won't result
+/// in an illegal extract_subreg or insert_subreg instruction. e.g.
+/// vr1024 = extract_subreg vr1025, 1
+/// ...
+/// vr1024 = mov8rr AH
+/// If vr1024 is coalesced with AH, the extract_subreg is now illegal since
+/// AH does not have a super-reg whose sub-register 1 is AH.
+bool
+SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
+ unsigned VirtReg,
+ unsigned PhysReg) {
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg),
+ E = mri_->reg_end(); I != E; ++I) {
+ MachineOperand &O = I.getOperand();
+ MachineInstr *MI = &*I;
+ if (MI == CopyMI || JoinedCopies.count(MI))
+ continue;
+ unsigned SubIdx = O.getSubReg();
+ if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx))
+ return true;
+ if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+ SubIdx = MI->getOperand(2).getImm();
+ if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx))
+ return true;
+ if (O.isDef()) {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ const TargetRegisterClass *RC =
+ TargetRegisterInfo::isPhysicalRegister(SrcReg)
+ ? tri_->getPhysicalRegisterRegClass(SrcReg)
+ : mri_->getRegClass(SrcReg);
+ if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC))
+ return true;
+ }
+ }
+ if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+ SubIdx = MI->getOperand(3).getImm();
+ if (VirtReg == MI->getOperand(0).getReg()) {
+ if (!tri_->getSubReg(PhysReg, SubIdx))
+ return true;
+ } else {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC =
+ TargetRegisterInfo::isPhysicalRegister(DstReg)
+ ? tri_->getPhysicalRegisterRegClass(DstReg)
+ : mri_->getRegClass(DstReg);
+ if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+/// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce
+/// an extract_subreg where dst is a physical register, e.g.
+/// cl = EXTRACT_SUBREG reg1024, 1
+bool
+SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
+ unsigned SrcReg, unsigned SubIdx,
+ unsigned &RealDstReg) {
+ const TargetRegisterClass *RC = mri_->getRegClass(SrcReg);
+ RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC);
+ assert(RealDstReg && "Invalid extract_subreg instruction!");
+
+ // For this type of EXTRACT_SUBREG, conservatively
+ // check if the live interval of the source register interfere with the
+ // actual super physical register we are trying to coalesce with.
+ LiveInterval &RHS = li_->getInterval(SrcReg);
+ if (li_->hasInterval(RealDstReg) &&
+ RHS.overlaps(li_->getInterval(RealDstReg))) {
+ DOUT << "Interfere with register ";
+ DEBUG(li_->getInterval(RealDstReg).print(DOUT, tri_));
+ return false; // Not coalescable
+ }
+ for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+ return false; // Not coalescable
+ }
+ return true;
+}
+
+/// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce
+/// an insert_subreg where src is a physical register, e.g.
+/// reg1024 = INSERT_SUBREG reg1024, c1, 0
+bool
+SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
+ unsigned SrcReg, unsigned SubIdx,
+ unsigned &RealSrcReg) {
+ const TargetRegisterClass *RC = mri_->getRegClass(DstReg);
+ RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC);
+ assert(RealSrcReg && "Invalid extract_subreg instruction!");
+
+ LiveInterval &RHS = li_->getInterval(DstReg);
+ if (li_->hasInterval(RealSrcReg) &&
+ RHS.overlaps(li_->getInterval(RealSrcReg))) {
+ DOUT << "Interfere with register ";
+ DEBUG(li_->getInterval(RealSrcReg).print(DOUT, tri_));
+ return false; // Not coalescable
+ }
+ for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+ return false; // Not coalescable
+ }
+ return true;
+}
+
+/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI. This returns true
+/// if the copy was successfully coalesced away. If it is not currently
+/// possible to coalesce this interval, but it may be possible if other
+/// things get coalesced, then it returns true by reference in 'Again'.
+bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
+ MachineInstr *CopyMI = TheCopy.MI;
+
+ Again = false;
+ if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
+ return false; // Already done.
+
+ DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI;
+
+ unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0;
+ bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG;
+ bool isInsSubReg = CopyMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG;
+ bool isSubRegToReg = CopyMI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG;
+ unsigned SubIdx = 0;
+ if (isExtSubReg) {
+ DstReg = CopyMI->getOperand(0).getReg();
+ DstSubIdx = CopyMI->getOperand(0).getSubReg();
+ SrcReg = CopyMI->getOperand(1).getReg();
+ SrcSubIdx = CopyMI->getOperand(2).getImm();
+ } else if (isInsSubReg || isSubRegToReg) {
+ if (CopyMI->getOperand(2).getSubReg()) {
+ DOUT << "\tSource of insert_subreg is already coalesced "
+ << "to another register.\n";
+ return false; // Not coalescable.
+ }
+ DstReg = CopyMI->getOperand(0).getReg();
+ DstSubIdx = CopyMI->getOperand(3).getImm();
+ SrcReg = CopyMI->getOperand(2).getReg();
+ } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){
+ assert(0 && "Unrecognized copy instruction!");
+ return false;
+ }
+
+ // If they are already joined we continue.
+ if (SrcReg == DstReg) {
+ DOUT << "\tCopy already coalesced.\n";
+ return false; // Not coalescable.
+ }
+
+ bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+
+ // If they are both physical registers, we cannot join them.
+ if (SrcIsPhys && DstIsPhys) {
+ DOUT << "\tCan not coalesce physregs.\n";
+ return false; // Not coalescable.
+ }
+
+ // We only join virtual registers with allocatable physical registers.
+ if (SrcIsPhys && !allocatableRegs_[SrcReg]) {
+ DOUT << "\tSrc reg is unallocatable physreg.\n";
+ return false; // Not coalescable.
+ }
+ if (DstIsPhys && !allocatableRegs_[DstReg]) {
+ DOUT << "\tDst reg is unallocatable physreg.\n";
+ return false; // Not coalescable.
+ }
+
+ // Check that a physical source register is compatible with dst regclass
+ if (SrcIsPhys) {
+ unsigned SrcSubReg = SrcSubIdx ?
+ tri_->getSubReg(SrcReg, SrcSubIdx) : SrcReg;
+ const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
+ const TargetRegisterClass *DstSubRC = DstRC;
+ if (DstSubIdx)
+ DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx);
+ assert(DstSubRC && "Illegal subregister index");
+ if (!DstSubRC->contains(SrcSubReg)) {
+ DOUT << "\tIncompatible destination regclass: "
+ << tri_->getName(SrcSubReg) << " not in " << DstSubRC->getName()
+ << ".\n";
+ return false; // Not coalescable.
+ }
+ }
+
+ // Check that a physical dst register is compatible with source regclass
+ if (DstIsPhys) {
+ unsigned DstSubReg = DstSubIdx ?
+ tri_->getSubReg(DstReg, DstSubIdx) : DstReg;
+ const TargetRegisterClass *SrcRC = mri_->getRegClass(SrcReg);
+ const TargetRegisterClass *SrcSubRC = SrcRC;
+ if (SrcSubIdx)
+ SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx);
+ assert(SrcSubRC && "Illegal subregister index");
+ if (!SrcSubRC->contains(DstReg)) {
+ DOUT << "\tIncompatible source regclass: "
+ << tri_->getName(DstSubReg) << " not in " << SrcSubRC->getName()
+ << ".\n";
+ return false; // Not coalescable.
+ }
+ }
+
+ // Should be non-null only when coalescing to a sub-register class.
+ bool CrossRC = false;
+ const TargetRegisterClass *NewRC = NULL;
+ MachineBasicBlock *CopyMBB = CopyMI->getParent();
+ unsigned RealDstReg = 0;
+ unsigned RealSrcReg = 0;
+ if (isExtSubReg || isInsSubReg || isSubRegToReg) {
+ SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm();
+ if (SrcIsPhys && isExtSubReg) {
+ // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be
+ // coalesced with AX.
+ unsigned DstSubIdx = CopyMI->getOperand(0).getSubReg();
+ if (DstSubIdx) {
+ // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been
+ // coalesced to a larger register so the subreg indices cancel out.
+ if (DstSubIdx != SubIdx) {
+ DOUT << "\t Sub-register indices mismatch.\n";
+ return false; // Not coalescable.
+ }
+ } else
+ SrcReg = tri_->getSubReg(SrcReg, SubIdx);
+ SubIdx = 0;
+ } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) {
+ // EAX = INSERT_SUBREG EAX, r1024, 0
+ unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg();
+ if (SrcSubIdx) {
+ // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been
+ // coalesced to a larger register so the subreg indices cancel out.
+ if (SrcSubIdx != SubIdx) {
+ DOUT << "\t Sub-register indices mismatch.\n";
+ return false; // Not coalescable.
+ }
+ } else
+ DstReg = tri_->getSubReg(DstReg, SubIdx);
+ SubIdx = 0;
+ } else if ((DstIsPhys && isExtSubReg) ||
+ (SrcIsPhys && (isInsSubReg || isSubRegToReg))) {
+ if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) {
+ DOUT << "\tSrc of extract_subreg already coalesced with reg"
+ << " of a super-class.\n";
+ return false; // Not coalescable.
+ }
+
+ if (isExtSubReg) {
+ if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg))
+ return false; // Not coalescable
+ } else {
+ if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg))
+ return false; // Not coalescable
+ }
+ SubIdx = 0;
+ } else {
+ unsigned OldSubIdx = isExtSubReg ? CopyMI->getOperand(0).getSubReg()
+ : CopyMI->getOperand(2).getSubReg();
+ if (OldSubIdx) {
+ if (OldSubIdx == SubIdx && !differingRegisterClasses(SrcReg, DstReg))
+ // r1024<2> = EXTRACT_SUBREG r1025, 2. Then r1024 has already been
+ // coalesced to a larger register so the subreg indices cancel out.
+ // Also check if the other larger register is of the same register
+ // class as the would be resulting register.
+ SubIdx = 0;
+ else {
+ DOUT << "\t Sub-register indices mismatch.\n";
+ return false; // Not coalescable.
+ }
+ }
+ if (SubIdx) {
+ unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
+ unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
+ unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
+ if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) {
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+ }
+ } else if (differingRegisterClasses(SrcReg, DstReg)) {
+ if (!CrossClassJoin)
+ return false;
+ CrossRC = true;
+
+ // FIXME: What if the result of a EXTRACT_SUBREG is then coalesced
+ // with another? If it's the resulting destination register, then
+ // the subidx must be propagated to uses (but only those defined
+ // by the EXTRACT_SUBREG). If it's being coalesced into another
+ // register, it should be safe because register is assumed to have
+ // the register class of the super-register.
+
+ // Process moves where one of the registers have a sub-register index.
+ MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg);
+ MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg);
+ SubIdx = DstMO->getSubReg();
+ if (SubIdx) {
+ if (SrcMO->getSubReg())
+ // FIXME: can we handle this?
+ return false;
+ // This is not an insert_subreg but it looks like one.
+ // e.g. %reg1024:4 = MOV32rr %EAX
+ isInsSubReg = true;
+ if (SrcIsPhys) {
+ if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg))
+ return false; // Not coalescable
+ SubIdx = 0;
+ }
+ } else {
+ SubIdx = SrcMO->getSubReg();
+ if (SubIdx) {
+ // This is not a extract_subreg but it looks like one.
+ // e.g. %cl = MOV16rr %reg1024:1
+ isExtSubReg = true;
+ if (DstIsPhys) {
+ if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg))
+ return false; // Not coalescable
+ SubIdx = 0;
+ }
+ }
+ }
+
+ const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
+ const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
+ unsigned LargeReg = SrcReg;
+ unsigned SmallReg = DstReg;
+ unsigned Limit = 0;
+
+ // Now determine the register class of the joined register.
+ if (isExtSubReg) {
+ if (SubIdx && DstRC && DstRC->isASubClass()) {
+ // This is a move to a sub-register class. However, the source is a
+ // sub-register of a larger register class. We don't know what should
+ // the register class be. FIXME.
+ Again = true;
+ return false;
+ }
+ Limit = allocatableRCRegs_[DstRC].count();
+ } else if (!SrcIsPhys && !DstIsPhys) {
+ NewRC = getCommonSubClass(SrcRC, DstRC);
+ if (!NewRC) {
+ DOUT << "\tDisjoint regclasses: "
+ << SrcRC->getName() << ", "
+ << DstRC->getName() << ".\n";
+ return false; // Not coalescable.
+ }
+ if (DstRC->getSize() > SrcRC->getSize())
+ std::swap(LargeReg, SmallReg);
+ }
+
+ // If we are joining two virtual registers and the resulting register
+ // class is more restrictive (fewer register, smaller size). Check if it's
+ // worth doing the merge.
+ if (!SrcIsPhys && !DstIsPhys &&
+ (isExtSubReg || DstRC->isASubClass()) &&
+ !isWinToJoinCrossClass(LargeReg, SmallReg,
+ allocatableRCRegs_[NewRC].count())) {
+ DOUT << "\tSrc/Dest are different register classes.\n";
+ // Allow the coalescer to try again in case either side gets coalesced to
+ // a physical register that's compatible with the other side. e.g.
+ // r1024 = MOV32to32_ r1025
+ // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+
+ // Will it create illegal extract_subreg / insert_subreg?
+ if (SrcIsPhys && HasIncompatibleSubRegDefUse(CopyMI, DstReg, SrcReg))
+ return false;
+ if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg))
+ return false;
+
+ LiveInterval &SrcInt = li_->getInterval(SrcReg);
+ LiveInterval &DstInt = li_->getInterval(DstReg);
+ assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg &&
+ "Register mapping is horribly broken!");
+
+ DOUT << "\t\tInspecting "; SrcInt.print(DOUT, tri_);
+ DOUT << " and "; DstInt.print(DOUT, tri_);
+ DOUT << ": ";
+
+ // Save a copy of the virtual register live interval. We'll manually
+ // merge this into the "real" physical register live interval this is
+ // coalesced with.
+ LiveInterval *SavedLI = 0;
+ if (RealDstReg)
+ SavedLI = li_->dupInterval(&SrcInt);
+ else if (RealSrcReg)
+ SavedLI = li_->dupInterval(&DstInt);
+
+ // Check if it is necessary to propagate "isDead" property.
+ if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) {
+ MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false);
+ bool isDead = mopd->isDead();
+
+ // We need to be careful about coalescing a source physical register with a
+ // virtual register. Once the coalescing is done, it cannot be broken and
+ // these are not spillable! If the destination interval uses are far away,
+ // think twice about coalescing them!
+ if (!isDead && (SrcIsPhys || DstIsPhys)) {
+ // If the copy is in a loop, take care not to coalesce aggressively if the
+ // src is coming in from outside the loop (or the dst is out of the loop).
+ // If it's not in a loop, then determine whether to join them base purely
+ // by the length of the interval.
+ if (PhysJoinTweak) {
+ if (SrcIsPhys) {
+ if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
+ DstInt.preference = SrcReg;
+ ++numAborts;
+ DOUT << "\tMay tie down a physical register, abort!\n";
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ } else {
+ if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
+ SrcInt.preference = DstReg;
+ ++numAborts;
+ DOUT << "\tMay tie down a physical register, abort!\n";
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+ } else {
+ // If the virtual register live interval is long but it has low use desity,
+ // do not join them, instead mark the physical register as its allocation
+ // preference.
+ LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
+ unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg;
+ unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
+ const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ if (TheCopy.isBackEdge)
+ Threshold *= 2; // Favors back edge copies.
+
+ unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+ float Ratio = 1.0 / Threshold;
+ if (Length > Threshold &&
+ (((float)std::distance(mri_->use_begin(JoinVReg),
+ mri_->use_end()) / Length) < Ratio)) {
+ JoinVInt.preference = JoinPReg;
+ ++numAborts;
+ DOUT << "\tMay tie down a physical register, abort!\n";
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ }
+ }
+ }
+
+ // Okay, attempt to join these two intervals. On failure, this returns false.
+ // Otherwise, if one of the intervals being joined is a physreg, this method
+ // always canonicalizes DstInt to be it. The output "SrcInt" will not have
+ // been modified, so we can use this information below to update aliases.
+ bool Swapped = false;
+ // If SrcInt is implicitly defined, it's safe to coalesce.
+ bool isEmpty = SrcInt.empty();
+ if (isEmpty && !CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) {
+ // Only coalesce an empty interval (defined by implicit_def) with
+ // another interval which has a valno defined by the CopyMI and the CopyMI
+ // is a kill of the implicit def.
+ DOUT << "Not profitable!\n";
+ return false;
+ }
+
+ if (!isEmpty && !JoinIntervals(DstInt, SrcInt, Swapped)) {
+ // Coalescing failed.
+
+ // If definition of source is defined by trivial computation, try
+ // rematerializing it.
+ if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
+ ReMaterializeTrivialDef(SrcInt, DstInt.reg, CopyMI))
+ return true;
+
+ // If we can eliminate the copy without merging the live ranges, do so now.
+ if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
+ (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
+ RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) {
+ JoinedCopies.insert(CopyMI);
+ return true;
+ }
+
+ // Otherwise, we are unable to join the intervals.
+ DOUT << "Interference!\n";
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+
+ LiveInterval *ResSrcInt = &SrcInt;
+ LiveInterval *ResDstInt = &DstInt;
+ if (Swapped) {
+ std::swap(SrcReg, DstReg);
+ std::swap(ResSrcInt, ResDstInt);
+ }
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "LiveInterval::join didn't work right!");
+
+ // If we're about to merge live ranges into a physical register live interval,
+ // we have to update any aliased register's live ranges to indicate that they
+ // have clobbered values for this range.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ // If this is a extract_subreg where dst is a physical register, e.g.
+ // cl = EXTRACT_SUBREG reg1024, 1
+ // then create and update the actual physical register allocated to RHS.
+ if (RealDstReg || RealSrcReg) {
+ LiveInterval &RealInt =
+ li_->getOrCreateInterval(RealDstReg ? RealDstReg : RealSrcReg);
+ for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(),
+ E = SavedLI->vni_end(); I != E; ++I) {
+ const VNInfo *ValNo = *I;
+ VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy,
+ li_->getVNInfoAllocator());
+ NewValNo->hasPHIKill = ValNo->hasPHIKill;
+ NewValNo->redefByEC = ValNo->redefByEC;
+ RealInt.addKills(NewValNo, ValNo->kills);
+ RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo);
+ }
+ RealInt.weight += SavedLI->weight;
+ DstReg = RealDstReg ? RealDstReg : RealSrcReg;
+ }
+
+ // Update the liveintervals of sub-registers.
+ for (const unsigned *AS = tri_->getSubRegisters(DstReg); *AS; ++AS)
+ li_->getOrCreateInterval(*AS).MergeInClobberRanges(*ResSrcInt,
+ li_->getVNInfoAllocator());
+ }
+
+ // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the
+ // larger super-register.
+ if ((isExtSubReg || isInsSubReg || isSubRegToReg) &&
+ !SrcIsPhys && !DstIsPhys) {
+ if ((isExtSubReg && !Swapped) ||
+ ((isInsSubReg || isSubRegToReg) && Swapped)) {
+ ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator());
+ std::swap(SrcReg, DstReg);
+ std::swap(ResSrcInt, ResDstInt);
+ }
+ }
+
+ // Coalescing to a virtual register that is of a sub-register class of the
+ // other. Make sure the resulting register is set to the right register class.
+ if (CrossRC) {
+ ++numCrossRCs;
+ if (NewRC)
+ mri_->setRegClass(DstReg, NewRC);
+ }
+
+ if (NewHeuristic) {
+ // Add all copies that define val# in the source interval into the queue.
+ for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(),
+ e = ResSrcInt->vni_end(); i != e; ++i) {
+ const VNInfo *vni = *i;
+ if (!vni->def || vni->def == ~1U || vni->def == ~0U)
+ continue;
+ MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
+ unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx;
+ if (CopyMI &&
+ JoinedCopies.count(CopyMI) == 0 &&
+ tii_->isMoveInstr(*CopyMI, NewSrcReg, NewDstReg,
+ NewSrcSubIdx, NewDstSubIdx)) {
+ unsigned LoopDepth = loopInfo->getLoopDepth(CopyMBB);
+ JoinQueue->push(CopyRec(CopyMI, LoopDepth,
+ isBackEdgeCopy(CopyMI, DstReg)));
+ }
+ }
+ }
+
+ // Remember to delete the copy instruction.
+ JoinedCopies.insert(CopyMI);
+
+ // Some live range has been lengthened due to colaescing, eliminate the
+ // unnecessary kills.
+ RemoveUnnecessaryKills(SrcReg, *ResDstInt);
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ RemoveUnnecessaryKills(DstReg, *ResDstInt);
+
+ if (isInsSubReg)
+ // Avoid:
+ // r1024 = op
+ // r1024 = implicit_def
+ // ...
+ // = r1024
+ RemoveDeadImpDef(DstReg, *ResDstInt);
+ UpdateRegDefsUses(SrcReg, DstReg, SubIdx);
+
+ // SrcReg is guarateed to be the register whose live interval that is
+ // being merged.
+ li_->removeInterval(SrcReg);
+
+ // Manually deleted the live interval copy.
+ if (SavedLI) {
+ SavedLI->clear();
+ delete SavedLI;
+ }
+
+ if (isEmpty) {
+ // Now the copy is being coalesced away, the val# previously defined
+ // by the copy is being defined by an IMPLICIT_DEF which defines a zero
+ // length interval. Remove the val#.
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+ const LiveRange *LR = ResDstInt->getLiveRangeContaining(CopyIdx);
+ VNInfo *ImpVal = LR->valno;
+ assert(ImpVal->def == CopyIdx);
+ unsigned NextDef = LR->end;
+ RemoveCopiesFromValNo(*ResDstInt, ImpVal);
+ ResDstInt->removeValNo(ImpVal);
+ LR = ResDstInt->FindLiveRangeContaining(NextDef);
+ if (LR != ResDstInt->end() && LR->valno->def == NextDef) {
+ // Special case: vr1024 = implicit_def
+ // vr1024 = insert_subreg vr1024, vr1025, c
+ // The insert_subreg becomes a "copy" that defines a val# which can itself
+ // be coalesced away.
+ MachineInstr *DefMI = li_->getInstructionFromIndex(NextDef);
+ if (DefMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG)
+ LR->valno->copy = DefMI;
+ }
+ }
+
+ // If resulting interval has a preference that no longer fits because of subreg
+ // coalescing, just clear the preference.
+ if (ResDstInt->preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
+ TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) {
+ const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg);
+ if (!RC->contains(ResDstInt->preference))
+ ResDstInt->preference = 0;
+ }
+
+ DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, tri_);
+ DOUT << "\n";
+
+ ++numJoins;
+ return true;
+}
+
+/// ComputeUltimateVN - Assuming we are going to join two live intervals,
+/// compute what the resultant value numbers for each value in the input two
+/// ranges will be. This is complicated by copies between the two which can
+/// and will commonly cause multiple value numbers to be merged into one.
+///
+/// VN is the value number that we're trying to resolve. InstDefiningValue
+/// keeps track of the new InstDefiningValue assignment for the result
+/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of
+/// whether a value in this or other is a copy from the opposite set.
+/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
+/// already been assigned.
+///
+/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
+/// contains the value number the copy is from.
+///
+static unsigned ComputeUltimateVN(VNInfo *VNI,
+ SmallVector<VNInfo*, 16> &NewVNInfo,
+ DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
+ DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
+ SmallVector<int, 16> &ThisValNoAssignments,
+ SmallVector<int, 16> &OtherValNoAssignments) {
+ unsigned VN = VNI->id;
+
+ // If the VN has already been computed, just return it.
+ if (ThisValNoAssignments[VN] >= 0)
+ return ThisValNoAssignments[VN];
+// assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?");
+
+ // If this val is not a copy from the other val, then it must be a new value
+ // number in the destination.
+ DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
+ if (I == ThisFromOther.end()) {
+ NewVNInfo.push_back(VNI);
+ return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
+ }
+ VNInfo *OtherValNo = I->second;
+
+ // Otherwise, this *is* a copy from the RHS. If the other side has already
+ // been computed, return it.
+ if (OtherValNoAssignments[OtherValNo->id] >= 0)
+ return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
+
+ // Mark this value number as currently being computed, then ask what the
+ // ultimate value # of the other value is.
+ ThisValNoAssignments[VN] = -2;
+ unsigned UltimateVN =
+ ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
+ OtherValNoAssignments, ThisValNoAssignments);
+ return ThisValNoAssignments[VN] = UltimateVN;
+}
+
+static bool InVector(VNInfo *Val, const SmallVector<VNInfo*, 8> &V) {
+ return std::find(V.begin(), V.end(), Val) != V.end();
+}
+
+/// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
+/// the specified live interval is defined by a copy from the specified
+/// register.
+bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
+ LiveRange *LR,
+ unsigned Reg) {
+ unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno);
+ if (SrcReg == Reg)
+ return true;
+ if (LR->valno->def == ~0U &&
+ TargetRegisterInfo::isPhysicalRegister(li.reg) &&
+ *tri_->getSuperRegisters(li.reg)) {
+ // It's a sub-register live interval, we may not have precise information.
+ // Re-compute it.
+ MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start);
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (DefMI &&
+ tii_->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ DstReg == li.reg && SrcReg == Reg) {
+ // Cache computed info.
+ LR->valno->def = LR->start;
+ LR->valno->copy = DefMI;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// SimpleJoin - Attempt to joint the specified interval into this one. The
+/// caller of this method must guarantee that the RHS only contains a single
+/// value number and that the RHS is not defined by a copy from this
+/// interval. This returns false if the intervals are not joinable, or it
+/// joins them and returns true.
+bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
+ assert(RHS.containsOneValue());
+
+ // Some number (potentially more than one) value numbers in the current
+ // interval may be defined as copies from the RHS. Scan the overlapping
+ // portions of the LHS and RHS, keeping track of this and looking for
+ // overlapping live ranges that are NOT defined as copies. If these exist, we
+ // cannot coalesce.
+
+ LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end();
+ LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end();
+
+ if (LHSIt->start < RHSIt->start) {
+ LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start);
+ if (LHSIt != LHS.begin()) --LHSIt;
+ } else if (RHSIt->start < LHSIt->start) {
+ RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start);
+ if (RHSIt != RHS.begin()) --RHSIt;
+ }
+
+ SmallVector<VNInfo*, 8> EliminatedLHSVals;
+
+ while (1) {
+ // Determine if these live intervals overlap.
+ bool Overlaps = false;
+ if (LHSIt->start <= RHSIt->start)
+ Overlaps = LHSIt->end > RHSIt->start;
+ else
+ Overlaps = RHSIt->end > LHSIt->start;
+
+ // If the live intervals overlap, there are two interesting cases: if the
+ // LHS interval is defined by a copy from the RHS, it's ok and we record
+ // that the LHS value # is the same as the RHS. If it's not, then we cannot
+ // coalesce these live ranges and we bail out.
+ if (Overlaps) {
+ // If we haven't already recorded that this value # is safe, check it.
+ if (!InVector(LHSIt->valno, EliminatedLHSVals)) {
+ // Copy from the RHS?
+ if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg))
+ return false; // Nope, bail out.
+
+ if (LHSIt->contains(RHSIt->valno->def))
+ // Here is an interesting situation:
+ // BB1:
+ // vr1025 = copy vr1024
+ // ..
+ // BB2:
+ // vr1024 = op
+ // = vr1025
+ // Even though vr1025 is copied from vr1024, it's not safe to
+ // coalesce them since the live range of vr1025 intersects the
+ // def of vr1024. This happens because vr1025 is assigned the
+ // value of the previous iteration of vr1024.
+ return false;
+ EliminatedLHSVals.push_back(LHSIt->valno);
+ }
+
+ // We know this entire LHS live range is okay, so skip it now.
+ if (++LHSIt == LHSEnd) break;
+ continue;
+ }
+
+ if (LHSIt->end < RHSIt->end) {
+ if (++LHSIt == LHSEnd) break;
+ } else {
+ // One interesting case to check here. It's possible that we have
+ // something like "X3 = Y" which defines a new value number in the LHS,
+ // and is the last use of this liverange of the RHS. In this case, we
+ // want to notice this copy (so that it gets coalesced away) even though
+ // the live ranges don't actually overlap.
+ if (LHSIt->start == RHSIt->end) {
+ if (InVector(LHSIt->valno, EliminatedLHSVals)) {
+ // We already know that this value number is going to be merged in
+ // if coalescing succeeds. Just skip the liverange.
+ if (++LHSIt == LHSEnd) break;
+ } else {
+ // Otherwise, if this is a copy from the RHS, mark it as being merged
+ // in.
+ if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) {
+ if (LHSIt->contains(RHSIt->valno->def))
+ // Here is an interesting situation:
+ // BB1:
+ // vr1025 = copy vr1024
+ // ..
+ // BB2:
+ // vr1024 = op
+ // = vr1025
+ // Even though vr1025 is copied from vr1024, it's not safe to
+ // coalesced them since live range of vr1025 intersects the
+ // def of vr1024. This happens because vr1025 is assigned the
+ // value of the previous iteration of vr1024.
+ return false;
+ EliminatedLHSVals.push_back(LHSIt->valno);
+
+ // We know this entire LHS live range is okay, so skip it now.
+ if (++LHSIt == LHSEnd) break;
+ }
+ }
+ }
+
+ if (++RHSIt == RHSEnd) break;
+ }
+ }
+
+ // If we got here, we know that the coalescing will be successful and that
+ // the value numbers in EliminatedLHSVals will all be merged together. Since
+ // the most common case is that EliminatedLHSVals has a single number, we
+ // optimize for it: if there is more than one value, we merge them all into
+ // the lowest numbered one, then handle the interval as if we were merging
+ // with one value number.
+ VNInfo *LHSValNo = NULL;
+ if (EliminatedLHSVals.size() > 1) {
+ // Loop through all the equal value numbers merging them into the smallest
+ // one.
+ VNInfo *Smallest = EliminatedLHSVals[0];
+ for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) {
+ if (EliminatedLHSVals[i]->id < Smallest->id) {
+ // Merge the current notion of the smallest into the smaller one.
+ LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]);
+ Smallest = EliminatedLHSVals[i];
+ } else {
+ // Merge into the smallest.
+ LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest);
+ }
+ }
+ LHSValNo = Smallest;
+ } else if (EliminatedLHSVals.empty()) {
+ if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) &&
+ *tri_->getSuperRegisters(LHS.reg))
+ // Imprecise sub-register information. Can't handle it.
+ return false;
+ assert(0 && "No copies from the RHS?");
+ } else {
+ LHSValNo = EliminatedLHSVals[0];
+ }
+
+ // Okay, now that there is a single LHS value number that we're merging the
+ // RHS into, update the value number info for the LHS to indicate that the
+ // value number is defined where the RHS value number was.
+ const VNInfo *VNI = RHS.getValNumInfo(0);
+ LHSValNo->def = VNI->def;
+ LHSValNo->copy = VNI->copy;
+
+ // Okay, the final step is to loop over the RHS live intervals, adding them to
+ // the LHS.
+ LHSValNo->hasPHIKill |= VNI->hasPHIKill;
+ LHS.addKills(LHSValNo, VNI->kills);
+ LHS.MergeRangesInAsValue(RHS, LHSValNo);
+ LHS.weight += RHS.weight;
+ if (RHS.preference && !LHS.preference)
+ LHS.preference = RHS.preference;
+
+ // Update the liveintervals of sub-registers.
+ if (TargetRegisterInfo::isPhysicalRegister(LHS.reg))
+ for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS)
+ li_->getOrCreateInterval(*AS).MergeInClobberRanges(LHS,
+ li_->getVNInfoAllocator());
+
+ return true;
+}
+
+/// JoinIntervals - Attempt to join these two intervals. On failure, this
+/// returns false. Otherwise, if one of the intervals being joined is a
+/// physreg, this method always canonicalizes LHS to be it. The output
+/// "RHS" will not have been modified, so we can use this information
+/// below to update aliases.
+bool
+SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
+ bool &Swapped) {
+ // Compute the final value assignment, assuming that the live ranges can be
+ // coalesced.
+ SmallVector<int, 16> LHSValNoAssignments;
+ SmallVector<int, 16> RHSValNoAssignments;
+ DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
+ DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
+ SmallVector<VNInfo*, 16> NewVNInfo;
+
+ // If a live interval is a physical register, conservatively check if any
+ // of its sub-registers is overlapping the live interval of the virtual
+ // register. If so, do not coalesce.
+ if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) &&
+ *tri_->getSubRegisters(LHS.reg)) {
+ // If it's coalescing a virtual register to a physical register, estimate
+ // its live interval length. This is the *cost* of scanning an entire live
+ // interval. If the cost is low, we'll do an exhaustive check instead.
+
+ // If this is something like this:
+ // BB1:
+ // v1024 = op
+ // ...
+ // BB2:
+ // ...
+ // RAX = v1024
+ //
+ // That is, the live interval of v1024 crosses a bb. Then we can't rely on
+ // less conservative check. It's possible a sub-register is defined before
+ // v1024 (or live in) and live out of BB1.
+ if (RHS.containsOneValue() &&
+ li_->intervalIsInOneMBB(RHS) &&
+ li_->getApproximateInstructionCount(RHS) <= 10) {
+ // Perform a more exhaustive check for some common cases.
+ if (li_->conflictsWithPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
+ return false;
+ } else {
+ for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+ return false;
+ }
+ }
+ } else if (TargetRegisterInfo::isPhysicalRegister(RHS.reg) &&
+ *tri_->getSubRegisters(RHS.reg)) {
+ if (LHS.containsOneValue() &&
+ li_->getApproximateInstructionCount(LHS) <= 10) {
+ // Perform a more exhaustive check for some common cases.
+ if (li_->conflictsWithPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
+ return false;
+ } else {
+ for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, tri_));
+ return false;
+ }
+ }
+ }
+
+ // Compute ultimate value numbers for the LHS and RHS values.
+ if (RHS.containsOneValue()) {
+ // Copies from a liveinterval with a single value are simple to handle and
+ // very common, handle the special case here. This is important, because
+ // often RHS is small and LHS is large (e.g. a physreg).
+
+ // Find out if the RHS is defined as a copy from some value in the LHS.
+ int RHSVal0DefinedFromLHS = -1;
+ int RHSValID = -1;
+ VNInfo *RHSValNoInfo = NULL;
+ VNInfo *RHSValNoInfo0 = RHS.getValNumInfo(0);
+ unsigned RHSSrcReg = li_->getVNInfoSourceReg(RHSValNoInfo0);
+ if (RHSSrcReg == 0 || RHSSrcReg != LHS.reg) {
+ // If RHS is not defined as a copy from the LHS, we can use simpler and
+ // faster checks to see if the live ranges are coalescable. This joiner
+ // can't swap the LHS/RHS intervals though.
+ if (!TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
+ return SimpleJoin(LHS, RHS);
+ } else {
+ RHSValNoInfo = RHSValNoInfo0;
+ }
+ } else {
+ // It was defined as a copy from the LHS, find out what value # it is.
+ RHSValNoInfo = LHS.getLiveRangeContaining(RHSValNoInfo0->def-1)->valno;
+ RHSValID = RHSValNoInfo->id;
+ RHSVal0DefinedFromLHS = RHSValID;
+ }
+
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ NewVNInfo.resize(LHS.getNumValNums(), NULL);
+
+ // Okay, *all* of the values in LHS that are defined as a copy from RHS
+ // should now get updated.
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (unsigned LHSSrcReg = li_->getVNInfoSourceReg(VNI)) {
+ if (LHSSrcReg != RHS.reg) {
+ // If this is not a copy from the RHS, its value number will be
+ // unmodified by the coalescing.
+ NewVNInfo[VN] = VNI;
+ LHSValNoAssignments[VN] = VN;
+ } else if (RHSValID == -1) {
+ // Otherwise, it is a copy from the RHS, and we don't already have a
+ // value# for it. Keep the current value number, but remember it.
+ LHSValNoAssignments[VN] = RHSValID = VN;
+ NewVNInfo[VN] = RHSValNoInfo;
+ LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0;
+ } else {
+ // Otherwise, use the specified value #.
+ LHSValNoAssignments[VN] = RHSValID;
+ if (VN == (unsigned)RHSValID) { // Else this val# is dead.
+ NewVNInfo[VN] = RHSValNoInfo;
+ LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0;
+ }
+ }
+ } else {
+ NewVNInfo[VN] = VNI;
+ LHSValNoAssignments[VN] = VN;
+ }
+ }
+
+ assert(RHSValID != -1 && "Didn't find value #?");
+ RHSValNoAssignments[0] = RHSValID;
+ if (RHSVal0DefinedFromLHS != -1) {
+ // This path doesn't go through ComputeUltimateVN so just set
+ // it to anything.
+ RHSValsDefinedFromLHS[RHSValNoInfo0] = (VNInfo*)1;
+ }
+ } else {
+ // Loop over the value numbers of the LHS, seeing if any are defined from
+ // the RHS.
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy?
+ continue;
+
+ // DstReg is known to be a register in the LHS interval. If the src is
+ // from the RHS interval, we can use its value #.
+ if (li_->getVNInfoSourceReg(VNI) != RHS.reg)
+ continue;
+
+ // Figure out the value # from the RHS.
+ LHSValsDefinedFromRHS[VNI]=RHS.getLiveRangeContaining(VNI->def-1)->valno;
+ }
+
+ // Loop over the value numbers of the RHS, seeing if any are defined from
+ // the LHS.
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy?
+ continue;
+
+ // DstReg is known to be a register in the RHS interval. If the src is
+ // from the LHS interval, we can use its value #.
+ if (li_->getVNInfoSourceReg(VNI) != LHS.reg)
+ continue;
+
+ // Figure out the value # from the LHS.
+ RHSValsDefinedFromLHS[VNI]=LHS.getLiveRangeContaining(VNI->def-1)->valno;
+ }
+
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (LHSValNoAssignments[VN] >= 0 || VNI->def == ~1U)
+ continue;
+ ComputeUltimateVN(VNI, NewVNInfo,
+ LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+ LHSValNoAssignments, RHSValNoAssignments);
+ }
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (RHSValNoAssignments[VN] >= 0 || VNI->def == ~1U)
+ continue;
+ // If this value number isn't a copy from the LHS, it's a new number.
+ if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
+ NewVNInfo.push_back(VNI);
+ RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+ continue;
+ }
+
+ ComputeUltimateVN(VNI, NewVNInfo,
+ RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+ RHSValNoAssignments, LHSValNoAssignments);
+ }
+ }
+
+ // Armed with the mappings of LHS/RHS values to ultimate values, walk the
+ // interval lists to see if these intervals are coalescable.
+ LiveInterval::const_iterator I = LHS.begin();
+ LiveInterval::const_iterator IE = LHS.end();
+ LiveInterval::const_iterator J = RHS.begin();
+ LiveInterval::const_iterator JE = RHS.end();
+
+ // Skip ahead until the first place of potential sharing.
+ if (I->start < J->start) {
+ I = std::upper_bound(I, IE, J->start);
+ if (I != LHS.begin()) --I;
+ } else if (J->start < I->start) {
+ J = std::upper_bound(J, JE, I->start);
+ if (J != RHS.begin()) --J;
+ }
+
+ while (1) {
+ // Determine if these two live ranges overlap.
+ bool Overlaps;
+ if (I->start < J->start) {
+ Overlaps = I->end > J->start;
+ } else {
+ Overlaps = J->end > I->start;
+ }
+
+ // If so, check value # info to determine if they are really different.
+ if (Overlaps) {
+ // If the live range overlap will map to the same value number in the
+ // result liverange, we can still coalesce them. If not, we can't.
+ if (LHSValNoAssignments[I->valno->id] !=
+ RHSValNoAssignments[J->valno->id])
+ return false;
+ }
+
+ if (I->end < J->end) {
+ ++I;
+ if (I == IE) break;
+ } else {
+ ++J;
+ if (J == JE) break;
+ }
+ }
+
+ // Update kill info. Some live ranges are extended due to copy coalescing.
+ for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
+ E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
+ VNInfo *VNI = I->first;
+ unsigned LHSValID = LHSValNoAssignments[VNI->id];
+ LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def);
+ NewVNInfo[LHSValID]->hasPHIKill |= VNI->hasPHIKill;
+ RHS.addKills(NewVNInfo[LHSValID], VNI->kills);
+ }
+
+ // Update kill info. Some live ranges are extended due to copy coalescing.
+ for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
+ E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
+ VNInfo *VNI = I->first;
+ unsigned RHSValID = RHSValNoAssignments[VNI->id];
+ LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def);
+ NewVNInfo[RHSValID]->hasPHIKill |= VNI->hasPHIKill;
+ LHS.addKills(NewVNInfo[RHSValID], VNI->kills);
+ }
+
+ // If we get here, we know that we can coalesce the live ranges. Ask the
+ // intervals to coalesce themselves now.
+ if ((RHS.ranges.size() > LHS.ranges.size() &&
+ TargetRegisterInfo::isVirtualRegister(LHS.reg)) ||
+ TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
+ RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo);
+ Swapped = true;
+ } else {
+ LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo);
+ Swapped = false;
+ }
+ return true;
+}
+
+namespace {
+ // DepthMBBCompare - Comparison predicate that sort first based on the loop
+ // depth of the basic block (the unsigned), and then on the MBB number.
+ struct DepthMBBCompare {
+ typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+ bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+ if (LHS.first > RHS.first) return true; // Deeper loops first
+ return LHS.first == RHS.first &&
+ LHS.second->getNumber() < RHS.second->getNumber();
+ }
+ };
+}
+
+/// getRepIntervalSize - Returns the size of the interval that represents the
+/// specified register.
+template<class SF>
+unsigned JoinPriorityQueue<SF>::getRepIntervalSize(unsigned Reg) {
+ return Rc->getRepIntervalSize(Reg);
+}
+
+/// CopyRecSort::operator - Join priority queue sorting function.
+///
+bool CopyRecSort::operator()(CopyRec left, CopyRec right) const {
+ // Inner loops first.
+ if (left.LoopDepth > right.LoopDepth)
+ return false;
+ else if (left.LoopDepth == right.LoopDepth)
+ if (left.isBackEdge && !right.isBackEdge)
+ return false;
+ return true;
+}
+
+void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<CopyRec> &TryAgain) {
+ DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+
+ std::vector<CopyRec> VirtCopies;
+ std::vector<CopyRec> PhysCopies;
+ std::vector<CopyRec> ImpDefCopies;
+ unsigned LoopDepth = loopInfo->getLoopDepth(MBB);
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E;) {
+ MachineInstr *Inst = MII++;
+
+ // If this isn't a copy nor a extract_subreg, we can't join intervals.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(1).getReg();
+ } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(2).getReg();
+ } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ continue;
+
+ bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ if (NewHeuristic) {
+ JoinQueue->push(CopyRec(Inst, LoopDepth, isBackEdgeCopy(Inst, DstReg)));
+ } else {
+ if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+ ImpDefCopies.push_back(CopyRec(Inst, 0, false));
+ else if (SrcIsPhys || DstIsPhys)
+ PhysCopies.push_back(CopyRec(Inst, 0, false));
+ else
+ VirtCopies.push_back(CopyRec(Inst, 0, false));
+ }
+ }
+
+ if (NewHeuristic)
+ return;
+
+ // Try coalescing implicit copies first, followed by copies to / from
+ // physical registers, then finally copies from virtual registers to
+ // virtual registers.
+ for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
+ CopyRec &TheCopy = ImpDefCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+ for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
+ CopyRec &TheCopy = PhysCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+ for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
+ CopyRec &TheCopy = VirtCopies[i];
+ bool Again = false;
+ if (!JoinCopy(TheCopy, Again))
+ if (Again)
+ TryAgain.push_back(TheCopy);
+ }
+}
+
+void SimpleRegisterCoalescing::joinIntervals() {
+ DOUT << "********** JOINING INTERVALS ***********\n";
+
+ if (NewHeuristic)
+ JoinQueue = new JoinPriorityQueue<CopyRecSort>(this);
+
+ std::vector<CopyRec> TryAgainList;
+ if (loopInfo->empty()) {
+ // If there are no loops in the function, join intervals in function order.
+ for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+ I != E; ++I)
+ CopyCoalesceInMBB(I, TryAgainList);
+ } else {
+ // Otherwise, join intervals in inner loops before other intervals.
+ // Unfortunately we can't just iterate over loop hierarchy here because
+ // there may be more MBB's than BB's. Collect MBB's for sorting.
+
+ // Join intervals in the function prolog first. We want to join physical
+ // registers with virtual registers before the intervals got too long.
+ std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+ for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
+ MachineBasicBlock *MBB = I;
+ MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
+ }
+
+ // Sort by loop depth.
+ std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+ // Finally, join intervals in loop nest order.
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+ CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
+ }
+
+ // Joining intervals can allow other intervals to be joined. Iteratively join
+ // until we make no progress.
+ if (NewHeuristic) {
+ SmallVector<CopyRec, 16> TryAgain;
+ bool ProgressMade = true;
+ while (ProgressMade) {
+ ProgressMade = false;
+ while (!JoinQueue->empty()) {
+ CopyRec R = JoinQueue->pop();
+ bool Again = false;
+ bool Success = JoinCopy(R, Again);
+ if (Success)
+ ProgressMade = true;
+ else if (Again)
+ TryAgain.push_back(R);
+ }
+
+ if (ProgressMade) {
+ while (!TryAgain.empty()) {
+ JoinQueue->push(TryAgain.back());
+ TryAgain.pop_back();
+ }
+ }
+ }
+ } else {
+ bool ProgressMade = true;
+ while (ProgressMade) {
+ ProgressMade = false;
+
+ for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+ CopyRec &TheCopy = TryAgainList[i];
+ if (TheCopy.MI) {
+ bool Again = false;
+ bool Success = JoinCopy(TheCopy, Again);
+ if (Success || !Again) {
+ TheCopy.MI = 0; // Mark this one as done.
+ ProgressMade = true;
+ }
+ }
+ }
+ }
+ }
+
+ if (NewHeuristic)
+ delete JoinQueue;
+}
+
+/// Return true if the two specified registers belong to different register
+/// classes. The registers may be either phys or virt regs.
+bool
+SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
+ unsigned RegB) const {
+ // Get the register classes for the first reg.
+ if (TargetRegisterInfo::isPhysicalRegister(RegA)) {
+ assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ "Shouldn't consider two physregs!");
+ return !mri_->getRegClass(RegB)->contains(RegA);
+ }
+
+ // Compare against the regclass for the second reg.
+ const TargetRegisterClass *RegClassA = mri_->getRegClass(RegA);
+ if (TargetRegisterInfo::isVirtualRegister(RegB)) {
+ const TargetRegisterClass *RegClassB = mri_->getRegClass(RegB);
+ return RegClassA != RegClassB;
+ }
+ return !RegClassA->contains(RegB);
+}
+
+/// lastRegisterUse - Returns the last use of the specific register between
+/// cycles Start and End or NULL if there are no uses.
+MachineOperand *
+SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End,
+ unsigned Reg, unsigned &UseIdx) const{
+ UseIdx = 0;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MachineOperand *LastUse = NULL;
+ for (MachineRegisterInfo::use_iterator I = mri_->use_begin(Reg),
+ E = mri_->use_end(); I != E; ++I) {
+ MachineOperand &Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ SrcReg == DstReg)
+ // Ignore identity copies.
+ continue;
+ unsigned Idx = li_->getInstructionIndex(UseMI);
+ if (Idx >= Start && Idx < End && Idx >= UseIdx) {
+ LastUse = &Use;
+ UseIdx = li_->getUseIndex(Idx);
+ }
+ }
+ return LastUse;
+ }
+
+ int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM;
+ int s = Start;
+ while (e >= s) {
+ // Skip deleted instructions
+ MachineInstr *MI = li_->getInstructionFromIndex(e);
+ while ((e - InstrSlots::NUM) >= s && !MI) {
+ e -= InstrSlots::NUM;
+ MI = li_->getInstructionFromIndex(e);
+ }
+ if (e < s || MI == NULL)
+ return NULL;
+
+ // Ignore identity copies.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ SrcReg == DstReg))
+ for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+ MachineOperand &Use = MI->getOperand(i);
+ if (Use.isReg() && Use.isUse() && Use.getReg() &&
+ tri_->regsOverlap(Use.getReg(), Reg)) {
+ UseIdx = li_->getUseIndex(e);
+ return &Use;
+ }
+ }
+
+ e -= InstrSlots::NUM;
+ }
+
+ return NULL;
+}
+
+
+void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
+ if (TargetRegisterInfo::isPhysicalRegister(reg))
+ cerr << tri_->getName(reg);
+ else
+ cerr << "%reg" << reg;
+}
+
+void SimpleRegisterCoalescing::releaseMemory() {
+ JoinedCopies.clear();
+ ReMatCopies.clear();
+ ReMatDefs.clear();
+}
+
+static bool isZeroLengthInterval(LiveInterval *li) {
+ for (LiveInterval::Ranges::const_iterator
+ i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
+ if (i->end - i->start > LiveInterval::InstrSlots::NUM)
+ return false;
+ return true;
+}
+
+/// TurnCopyIntoImpDef - If source of the specified copy is an implicit def,
+/// turn the copy into an implicit def.
+bool
+SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
+ MachineBasicBlock *MBB,
+ unsigned DstReg, unsigned SrcReg) {
+ MachineInstr *CopyMI = &*I;
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+ if (!li_->hasInterval(SrcReg))
+ return false;
+ LiveInterval &SrcInt = li_->getInterval(SrcReg);
+ if (!SrcInt.empty())
+ return false;
+ if (!li_->hasInterval(DstReg))
+ return false;
+ LiveInterval &DstInt = li_->getInterval(DstReg);
+ const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx);
+ DstInt.removeValNo(DstLR->valno);
+ CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+ for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i)
+ CopyMI->RemoveOperand(i);
+ bool NoUse = mri_->use_empty(SrcReg);
+ if (NoUse) {
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg),
+ E = mri_->reg_end(); I != E; ) {
+ assert(I.getOperand().isDef());
+ MachineInstr *DefMI = &*I;
+ ++I;
+ // The implicit_def source has no other uses, delete it.
+ assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF);
+ li_->RemoveMachineInstrFromMaps(DefMI);
+ DefMI->eraseFromParent();
+ }
+ }
+ ++I;
+ return true;
+}
+
+
+bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ mri_ = &fn.getRegInfo();
+ tm_ = &fn.getTarget();
+ tri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ li_ = &getAnalysis<LiveIntervals>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ DOUT << "********** SIMPLE REGISTER COALESCING **********\n"
+ << "********** Function: "
+ << ((Value*)mf_->getFunction())->getName() << '\n';
+
+ allocatableRegs_ = tri_->getAllocatableSet(fn);
+ for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
+ E = tri_->regclass_end(); I != E; ++I)
+ allocatableRCRegs_.insert(std::make_pair(*I,
+ tri_->getAllocatableSet(fn, *I)));
+
+ // Join (coalesce) intervals if requested.
+ if (EnableJoining) {
+ joinIntervals();
+ DEBUG({
+ DOUT << "********** INTERVALS POST JOINING **********\n";
+ for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){
+ I->second->print(DOUT, tri_);
+ DOUT << "\n";
+ }
+ });
+ }
+
+ // Perform a final pass over the instructions and compute spill weights
+ // and remove identity moves.
+ SmallVector<unsigned, 4> DeadDefs;
+ for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+ mbbi != mbbe; ++mbbi) {
+ MachineBasicBlock* mbb = mbbi;
+ unsigned loopDepth = loopInfo->getLoopDepth(mbb);
+
+ for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+ mii != mie; ) {
+ MachineInstr *MI = mii;
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (JoinedCopies.count(MI)) {
+ // Delete all coalesced copies.
+ if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG ||
+ MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
+ MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) &&
+ "Unrecognized copy instruction");
+ DstReg = MI->getOperand(0).getReg();
+ }
+ if (MI->registerDefIsDead(DstReg)) {
+ LiveInterval &li = li_->getInterval(DstReg);
+ if (!ShortenDeadCopySrcLiveRange(li, MI))
+ ShortenDeadCopyLiveRange(li, MI);
+ }
+ li_->RemoveMachineInstrFromMaps(MI);
+ mii = mbbi->erase(mii);
+ ++numPeep;
+ continue;
+ }
+
+ // Now check if this is a remat'ed def instruction which is now dead.
+ if (ReMatDefs.count(MI)) {
+ bool isDead = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ DeadDefs.push_back(Reg);
+ if (MO.isDead())
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !mri_->use_empty(Reg)) {
+ isDead = false;
+ break;
+ }
+ }
+ if (isDead) {
+ while (!DeadDefs.empty()) {
+ unsigned DeadDef = DeadDefs.back();
+ DeadDefs.pop_back();
+ RemoveDeadDef(li_->getInterval(DeadDef), MI);
+ }
+ li_->RemoveMachineInstrFromMaps(mii);
+ mii = mbbi->erase(mii);
+ continue;
+ } else
+ DeadDefs.clear();
+ }
+
+ // If the move will be an identity move delete it
+ bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
+ if (isMove && SrcReg == DstReg) {
+ if (li_->hasInterval(SrcReg)) {
+ LiveInterval &RegInt = li_->getInterval(SrcReg);
+ // If def of this move instruction is dead, remove its live range
+ // from the dstination register's live interval.
+ if (MI->registerDefIsDead(DstReg)) {
+ if (!ShortenDeadCopySrcLiveRange(RegInt, MI))
+ ShortenDeadCopyLiveRange(RegInt, MI);
+ }
+ }
+ li_->RemoveMachineInstrFromMaps(MI);
+ mii = mbbi->erase(mii);
+ ++numPeep;
+ } else if (!isMove || !TurnCopyIntoImpDef(mii, mbb, DstReg, SrcReg)) {
+ SmallSet<unsigned, 4> UniqueUses;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &mop = MI->getOperand(i);
+ if (mop.isReg() && mop.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(mop.getReg())) {
+ unsigned reg = mop.getReg();
+ // Multiple uses of reg by the same instruction. It should not
+ // contribute to spill weight again.
+ if (UniqueUses.count(reg) != 0)
+ continue;
+ LiveInterval &RegInt = li_->getInterval(reg);
+ RegInt.weight +=
+ li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth);
+ UniqueUses.insert(reg);
+ }
+ }
+ ++mii;
+ }
+ }
+ }
+
+ for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
+ LiveInterval &LI = *I->second;
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ // If the live interval length is essentially zero, i.e. in every live
+ // range the use follows def immediately, it doesn't make sense to spill
+ // it and hope it will be easier to allocate for this li.
+ if (isZeroLengthInterval(&LI))
+ LI.weight = HUGE_VALF;
+ else {
+ bool isLoad = false;
+ SmallVector<LiveInterval*, 4> SpillIs;
+ if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
+ // If all of the definitions of the interval are re-materializable,
+ // it is a preferred candidate for spilling. If non of the defs are
+ // loads, then it's potentially very cheap to re-materialize.
+ // FIXME: this gets much more complicated once we support non-trivial
+ // re-materialization.
+ if (isLoad)
+ LI.weight *= 0.9F;
+ else
+ LI.weight *= 0.5F;
+ }
+ }
+
+ // Slightly prefer live interval that has been assigned a preferred reg.
+ if (LI.preference)
+ LI.weight *= 1.01F;
+
+ // Divide the weight of the interval by its size. This encourages
+ // spilling of intervals that are large and have few uses, and
+ // discourages spilling of small intervals with many uses.
+ LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
+ }
+ }
+
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void SimpleRegisterCoalescing::print(std::ostream &O, const Module* m) const {
+ li_->print(O, m);
+}
+
+RegisterCoalescer* llvm::createSimpleRegisterCoalescer() {
+ return new SimpleRegisterCoalescing();
+}
+
+// Make sure that anything that uses RegisterCoalescer pulls in this file...
+DEFINING_FILE_FOR(SimpleRegisterCoalescing)
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
new file mode 100644
index 0000000..a495bfd
--- /dev/null
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -0,0 +1,313 @@
+//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register copy coalescing phase.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
+#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/ADT/BitVector.h"
+#include <queue>
+
+namespace llvm {
+ class SimpleRegisterCoalescing;
+ class LiveVariables;
+ class TargetRegisterInfo;
+ class TargetInstrInfo;
+ class VirtRegMap;
+ class MachineLoopInfo;
+
+ /// CopyRec - Representation for copy instructions in coalescer queue.
+ ///
+ struct CopyRec {
+ MachineInstr *MI;
+ unsigned LoopDepth;
+ bool isBackEdge;
+ CopyRec(MachineInstr *mi, unsigned depth, bool be)
+ : MI(mi), LoopDepth(depth), isBackEdge(be) {};
+ };
+
+ template<class SF> class JoinPriorityQueue;
+
+ /// CopyRecSort - Sorting function for coalescer queue.
+ ///
+ struct CopyRecSort : public std::binary_function<CopyRec,CopyRec,bool> {
+ JoinPriorityQueue<CopyRecSort> *JPQ;
+ explicit CopyRecSort(JoinPriorityQueue<CopyRecSort> *jpq) : JPQ(jpq) {}
+ CopyRecSort(const CopyRecSort &RHS) : JPQ(RHS.JPQ) {}
+ bool operator()(CopyRec left, CopyRec right) const;
+ };
+
+ /// JoinQueue - A priority queue of copy instructions the coalescer is
+ /// going to process.
+ template<class SF>
+ class JoinPriorityQueue {
+ SimpleRegisterCoalescing *Rc;
+ std::priority_queue<CopyRec, std::vector<CopyRec>, SF> Queue;
+
+ public:
+ explicit JoinPriorityQueue(SimpleRegisterCoalescing *rc)
+ : Rc(rc), Queue(SF(this)) {}
+
+ bool empty() const { return Queue.empty(); }
+ void push(CopyRec R) { Queue.push(R); }
+ CopyRec pop() {
+ if (empty()) return CopyRec(0, 0, false);
+ CopyRec R = Queue.top();
+ Queue.pop();
+ return R;
+ }
+
+ // Callbacks to SimpleRegisterCoalescing.
+ unsigned getRepIntervalSize(unsigned Reg);
+ };
+
+ class SimpleRegisterCoalescing : public MachineFunctionPass,
+ public RegisterCoalescer {
+ MachineFunction* mf_;
+ MachineRegisterInfo* mri_;
+ const TargetMachine* tm_;
+ const TargetRegisterInfo* tri_;
+ const TargetInstrInfo* tii_;
+ LiveIntervals *li_;
+ const MachineLoopInfo* loopInfo;
+
+ BitVector allocatableRegs_;
+ DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_;
+
+ /// JoinQueue - A priority queue of copy instructions the coalescer is
+ /// going to process.
+ JoinPriorityQueue<CopyRecSort> *JoinQueue;
+
+ /// JoinedCopies - Keep track of copies eliminated due to coalescing.
+ ///
+ SmallPtrSet<MachineInstr*, 32> JoinedCopies;
+
+ /// ReMatCopies - Keep track of copies eliminated due to remat.
+ ///
+ SmallPtrSet<MachineInstr*, 32> ReMatCopies;
+
+ /// ReMatDefs - Keep track of definition instructions which have
+ /// been remat'ed.
+ SmallPtrSet<MachineInstr*, 8> ReMatDefs;
+
+ public:
+ static char ID; // Pass identifcation, replacement for typeid
+ SimpleRegisterCoalescing() : MachineFunctionPass(&ID) {}
+
+ struct InstrSlots {
+ enum {
+ LOAD = 0,
+ USE = 1,
+ DEF = 2,
+ STORE = 3,
+ NUM = 4
+ };
+ };
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual void releaseMemory();
+
+ /// runOnMachineFunction - pass entry point
+ virtual bool runOnMachineFunction(MachineFunction&);
+
+ bool coalesceFunction(MachineFunction &mf, RegallocQuery &) {
+ // This runs as an independent pass, so don't do anything.
+ return false;
+ };
+
+ /// getRepIntervalSize - Called from join priority queue sorting function.
+ /// It returns the size of the interval that represent the given register.
+ unsigned getRepIntervalSize(unsigned Reg) {
+ if (!li_->hasInterval(Reg))
+ return 0;
+ return li_->getApproximateInstructionCount(li_->getInterval(Reg)) *
+ LiveInterval::InstrSlots::NUM;
+ }
+
+ /// print - Implement the dump method.
+ virtual void print(std::ostream &O, const Module* = 0) const;
+ void print(std::ostream *O, const Module* M = 0) const {
+ if (O) print(*O, M);
+ }
+
+ private:
+ /// joinIntervals - join compatible live intervals
+ void joinIntervals();
+
+ /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into the "TryAgain" list.
+ void CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<CopyRec> &TryAgain);
+
+ /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// which are the src/dst of the copy instruction CopyMI. This returns true
+ /// if the copy was successfully coalesced away. If it is not currently
+ /// possible to coalesce this interval, but it may be possible if other
+ /// things get coalesced, then it returns true by reference in 'Again'.
+ bool JoinCopy(CopyRec &TheCopy, bool &Again);
+
+ /// JoinIntervals - Attempt to join these two intervals. On failure, this
+ /// returns false. Otherwise, if one of the intervals being joined is a
+ /// physreg, this method always canonicalizes DestInt to be it. The output
+ /// "SrcInt" will not have been modified, so we can use this information
+ /// below to update aliases.
+ bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped);
+
+ /// SimpleJoin - Attempt to join the specified interval into this one. The
+ /// caller of this method must guarantee that the RHS only contains a single
+ /// value number and that the RHS is not defined by a copy from this
+ /// interval. This returns false if the intervals are not joinable, or it
+ /// joins them and returns true.
+ bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS);
+
+ /// Return true if the two specified registers belong to different register
+ /// classes. The registers may be either phys or virt regs.
+ bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
+
+
+ /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// the source value number is defined by a copy from the destination reg
+ /// see if we can merge these two destination reg valno# into a single
+ /// value number, eliminating a copy.
+ bool AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB,
+ MachineInstr *CopyMI);
+
+ /// HasOtherReachingDefs - Return true if there are definitions of IntB
+ /// other than BValNo val# that can reach uses of AValno val# of IntA.
+ bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ VNInfo *AValNo, VNInfo *BValNo);
+
+ /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// If the source value number is defined by a commutable instruction and
+ /// its other operand is coalesced to the copy dest register, see if we
+ /// can transform the copy into a noop by commuting the definition.
+ bool RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB,
+ MachineInstr *CopyMI);
+
+ /// TrimLiveIntervalToLastUse - If there is a last use in the same basic
+ /// block as the copy instruction, trim the ive interval to the last use
+ /// and return true.
+ bool TrimLiveIntervalToLastUse(unsigned CopyIdx,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &li, const LiveRange *LR);
+
+ /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+ /// computation, replace the copy by rematerialize the definition.
+ bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
+ MachineInstr *CopyMI);
+
+ /// TurnCopyIntoImpDef - If source of the specified copy is an implicit def,
+ /// turn the copy into an implicit def.
+ bool TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
+ MachineBasicBlock *MBB,
+ unsigned DstReg, unsigned SrcReg);
+
+ /// CanCoalesceWithImpDef - Returns true if the specified copy instruction
+ /// from an implicit def to another register can be coalesced away.
+ bool CanCoalesceWithImpDef(MachineInstr *CopyMI,
+ LiveInterval &li, LiveInterval &ImpLi) const;
+
+ /// RemoveCopiesFromValNo - The specified value# is defined by an implicit
+ /// def and it is being removed. Turn all copies from this value# into
+ /// identity copies so they will be removed.
+ void RemoveCopiesFromValNo(LiveInterval &li, VNInfo *VNI);
+
+ /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
+ /// a virtual destination register with physical source register.
+ bool isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt, LiveInterval &SrcInt);
+
+ /// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a
+ /// copy from a virtual source register to a physical destination register.
+ bool isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
+ MachineBasicBlock *CopyMBB,
+ LiveInterval &DstInt, LiveInterval &SrcInt);
+
+ /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+ /// two virtual registers from different register classes.
+ bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg,
+ unsigned Threshold);
+
+ /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
+ /// register with a physical register, check if any of the virtual register
+ /// operand is a sub-register use or def. If so, make sure it won't result
+ /// in an illegal extract_subreg or insert_subreg instruction.
+ bool HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
+ unsigned VirtReg, unsigned PhysReg);
+
+ /// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce
+ /// an extract_subreg where dst is a physical register, e.g.
+ /// cl = EXTRACT_SUBREG reg1024, 1
+ bool CanJoinExtractSubRegToPhysReg(unsigned DstReg, unsigned SrcReg,
+ unsigned SubIdx, unsigned &RealDstReg);
+
+ /// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce
+ /// an insert_subreg where src is a physical register, e.g.
+ /// reg1024 = INSERT_SUBREG reg1024, c1, 0
+ bool CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned SrcReg,
+ unsigned SubIdx, unsigned &RealDstReg);
+
+ /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
+ /// the specified live interval is defined by a copy from the specified
+ /// register.
+ bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR,
+ unsigned Reg);
+
+ /// isBackEdgeCopy - Return true if CopyMI is a back edge copy.
+ ///
+ bool isBackEdgeCopy(MachineInstr *CopyMI, unsigned DstReg) const;
+
+ /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ /// update the subregister number if it is not zero. If DstReg is a
+ /// physical register and the existing subregister number of the def / use
+ /// being updated is not zero, make sure to set it to the correct physical
+ /// subregister.
+ void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+
+ /// RemoveDeadImpDef - Remove implicit_def instructions which are
+ /// "re-defining" registers due to insert_subreg coalescing. e.g.
+ void RemoveDeadImpDef(unsigned Reg, LiveInterval &LI);
+
+ /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate
+ /// due to live range lengthening as the result of coalescing.
+ void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI);
+
+ /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
+ /// Return true if live interval is removed.
+ bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI);
+
+ /// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially
+ /// extended by a dead copy. Mark the last use (if any) of the val# as kill
+ /// as ends the live range there. If there isn't another use, then this
+ /// live range is dead. Return true if live interval is removed.
+ bool ShortenDeadCopySrcLiveRange(LiveInterval &li, MachineInstr *CopyMI);
+
+ /// RemoveDeadDef - If a def of a live interval is now determined dead,
+ /// remove the val# it defines. If the live interval becomes empty, remove
+ /// it as well.
+ bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
+
+ /// lastRegisterUse - Returns the last use of the specific register between
+ /// cycles Start and End or NULL if there are no uses.
+ MachineOperand *lastRegisterUse(unsigned Start, unsigned End, unsigned Reg,
+ unsigned &LastUseIdx) const;
+
+ void printRegName(unsigned reg) const;
+ };
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
new file mode 100644
index 0000000..ce63121
--- /dev/null
+++ b/lib/CodeGen/Spiller.cpp
@@ -0,0 +1,229 @@
+//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+Spiller::~Spiller() {}
+
+namespace {
+
+/// Utility class for spillers.
+class SpillerBase : public Spiller {
+protected:
+
+ MachineFunction *mf;
+ LiveIntervals *lis;
+ LiveStacks *ls;
+ MachineFrameInfo *mfi;
+ MachineRegisterInfo *mri;
+ const TargetInstrInfo *tii;
+ VirtRegMap *vrm;
+
+ /// Construct a spiller base.
+ SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) :
+ mf(mf), lis(lis), ls(ls), vrm(vrm)
+ {
+ mfi = mf->getFrameInfo();
+ mri = &mf->getRegInfo();
+ tii = mf->getTarget().getInstrInfo();
+ }
+
+ /// Insert a store of the given vreg to the given stack slot immediately
+ /// after the given instruction. Returns the base index of the inserted
+ /// instruction. The caller is responsible for adding an appropriate
+ /// LiveInterval to the LiveIntervals analysis.
+ unsigned insertStoreFor(MachineInstr *mi, unsigned ss,
+ unsigned newVReg,
+ const TargetRegisterClass *trc) {
+ MachineBasicBlock::iterator nextInstItr(mi);
+ ++nextInstItr;
+
+ if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
+ lis->scaleNumbering(2);
+ ls->scaleNumbering(2);
+ }
+
+ unsigned miIdx = lis->getInstructionIndex(mi);
+
+ assert(lis->hasGapAfterInstr(miIdx));
+
+ tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, newVReg,
+ true, ss, trc);
+ MachineBasicBlock::iterator storeInstItr(mi);
+ ++storeInstItr;
+ MachineInstr *storeInst = &*storeInstItr;
+ unsigned storeInstIdx = miIdx + LiveInterval::InstrSlots::NUM;
+
+ assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
+ "Store inst index already in use.");
+
+ lis->InsertMachineInstrInMaps(storeInst, storeInstIdx);
+
+ return storeInstIdx;
+ }
+
+ /// Insert a load of the given veg from the given stack slot immediately
+ /// before the given instruction. Returns the base index of the inserted
+ /// instruction. The caller is responsible for adding an appropriate
+ /// LiveInterval to the LiveIntervals analysis.
+ unsigned insertLoadFor(MachineInstr *mi, unsigned ss,
+ unsigned newVReg,
+ const TargetRegisterClass *trc) {
+ MachineBasicBlock::iterator useInstItr(mi);
+
+ if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
+ lis->scaleNumbering(2);
+ ls->scaleNumbering(2);
+ }
+
+ unsigned miIdx = lis->getInstructionIndex(mi);
+
+ assert(lis->hasGapBeforeInstr(miIdx));
+
+ tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, newVReg, ss, trc);
+ MachineBasicBlock::iterator loadInstItr(mi);
+ --loadInstItr;
+ MachineInstr *loadInst = &*loadInstItr;
+ unsigned loadInstIdx = miIdx - LiveInterval::InstrSlots::NUM;
+
+ assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
+ "Load inst index already in use.");
+
+ lis->InsertMachineInstrInMaps(loadInst, loadInstIdx);
+
+ return loadInstIdx;
+ }
+
+
+ /// Add spill ranges for every use/def of the live interval, inserting loads
+ /// immediately before each use, and stores after each def. No folding is
+ /// attempted.
+ std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) {
+ DOUT << "Spilling everywhere " << *li << "\n";
+
+ assert(li->weight != HUGE_VALF &&
+ "Attempting to spill already spilled value.");
+
+ assert(!li->isStackSlot() &&
+ "Trying to spill a stack slot.");
+
+ std::vector<LiveInterval*> added;
+
+ const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+ unsigned ss = vrm->assignVirt2StackSlot(li->reg);
+
+ for (MachineRegisterInfo::reg_iterator
+ regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
+
+ MachineInstr *mi = &*regItr;
+ do {
+ ++regItr;
+ } while (regItr != mri->reg_end() && (&*regItr == mi));
+
+ SmallVector<unsigned, 2> indices;
+ bool hasUse = false;
+ bool hasDef = false;
+
+ for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
+ MachineOperand &op = mi->getOperand(i);
+
+ if (!op.isReg() || op.getReg() != li->reg)
+ continue;
+
+ hasUse |= mi->getOperand(i).isUse();
+ hasDef |= mi->getOperand(i).isDef();
+
+ indices.push_back(i);
+ }
+
+ unsigned newVReg = mri->createVirtualRegister(trc);
+ vrm->grow();
+ vrm->assignVirt2StackSlot(newVReg, ss);
+
+ LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+ newLI->weight = HUGE_VALF;
+
+ for (unsigned i = 0; i < indices.size(); ++i) {
+ mi->getOperand(indices[i]).setReg(newVReg);
+
+ if (mi->getOperand(indices[i]).isUse()) {
+ mi->getOperand(indices[i]).setIsKill(true);
+ }
+ }
+
+ assert(hasUse || hasDef);
+
+ if (hasUse) {
+ unsigned loadInstIdx = insertLoadFor(mi, ss, newVReg, trc);
+ unsigned start = lis->getDefIndex(loadInstIdx),
+ end = lis->getUseIndex(lis->getInstructionIndex(mi));
+
+ VNInfo *vni =
+ newLI->getNextValue(loadInstIdx, 0, lis->getVNInfoAllocator());
+ vni->kills.push_back(lis->getInstructionIndex(mi));
+ LiveRange lr(start, end, vni);
+
+ newLI->addRange(lr);
+ }
+
+ if (hasDef) {
+ unsigned storeInstIdx = insertStoreFor(mi, ss, newVReg, trc);
+ unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
+ end = lis->getUseIndex(storeInstIdx);
+
+ VNInfo *vni =
+ newLI->getNextValue(storeInstIdx, 0, lis->getVNInfoAllocator());
+ vni->kills.push_back(storeInstIdx);
+ LiveRange lr(start, end, vni);
+
+ newLI->addRange(lr);
+ }
+
+ added.push_back(newLI);
+ }
+
+
+ return added;
+ }
+
+};
+
+
+/// Spills any live range using the spill-everywhere method with no attempt at
+/// folding.
+class TrivialSpiller : public SpillerBase {
+public:
+ TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) :
+ SpillerBase(mf, lis, ls, vrm) {}
+
+ std::vector<LiveInterval*> spill(LiveInterval *li) {
+ return trivialSpillEverywhere(li);
+ }
+
+};
+
+}
+
+llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
+ LiveStacks *ls, VirtRegMap *vrm) {
+ return new TrivialSpiller(mf, lis, ls, vrm);
+}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
new file mode 100644
index 0000000..cad054d
--- /dev/null
+++ b/lib/CodeGen/Spiller.h
@@ -0,0 +1,37 @@
+//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLER_H
+#define LLVM_CODEGEN_SPILLER_H
+
+#include <vector>
+
+namespace llvm {
+ class LiveInterval;
+ class LiveIntervals;
+ class LiveStacks;
+ class MachineFunction;
+ class VirtRegMap;
+
+ /// Spiller interface.
+ ///
+ /// Implementations are utility classes which insert spill or remat code on
+ /// demand.
+ class Spiller {
+ public:
+ virtual ~Spiller() = 0;
+ virtual std::vector<LiveInterval*> spill(LiveInterval *li) = 0;
+ };
+
+ /// Create and return a spiller object, as specified on the command line.
+ Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li,
+ LiveStacks *ls, VirtRegMap *vrm);
+}
+
+#endif
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
new file mode 100644
index 0000000..c179f1e
--- /dev/null
+++ b/lib/CodeGen/StackProtector.cpp
@@ -0,0 +1,224 @@
+//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts stack protectors into functions which need them. A variable
+// with a random value in it is stored onto the stack before the local variables
+// are allocated. Upon exiting the block, the stored value is checked. If it's
+// changed, then there was some sort of violation and the program aborts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stack-protector"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+// SSPBufferSize - The lower bound for a buffer to be considered for stack
+// smashing protection.
+static cl::opt<unsigned>
+SSPBufferSize("stack-protector-buffer-size", cl::init(8),
+ cl::desc("Lower bound for a buffer to be considered for "
+ "stack protection"));
+
+namespace {
+ class VISIBILITY_HIDDEN StackProtector : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ Function *F;
+ Module *M;
+
+ /// InsertStackProtectors - Insert code into the prologue and epilogue of
+ /// the function.
+ ///
+ /// - The prologue code loads and stores the stack guard onto the stack.
+ /// - The epilogue checks the value stored in the prologue against the
+ /// original value. It calls __stack_chk_fail if they differ.
+ bool InsertStackProtectors();
+
+ /// CreateFailBB - Create a basic block to jump to when the stack protector
+ /// check fails.
+ BasicBlock *CreateFailBB();
+
+ /// RequiresStackProtector - Check whether or not this function needs a
+ /// stack protector based upon the stack protector level.
+ bool RequiresStackProtector() const;
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ StackProtector() : FunctionPass(&ID), TLI(0) {}
+ StackProtector(const TargetLowering *tli)
+ : FunctionPass(&ID), TLI(tli) {}
+
+ virtual bool runOnFunction(Function &Fn);
+ };
+} // end anonymous namespace
+
+char StackProtector::ID = 0;
+static RegisterPass<StackProtector>
+X("stack-protector", "Insert stack protectors");
+
+FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
+ return new StackProtector(tli);
+}
+
+bool StackProtector::runOnFunction(Function &Fn) {
+ F = &Fn;
+ M = F->getParent();
+
+ if (!RequiresStackProtector()) return false;
+
+ return InsertStackProtectors();
+}
+
+/// RequiresStackProtector - Check whether or not this function needs a stack
+/// protector based upon the stack protector level. The heuristic we use is to
+/// add a guard variable to functions that call alloca, and functions with
+/// buffers larger than SSPBufferSize bytes.
+bool StackProtector::RequiresStackProtector() const {
+ if (F->hasFnAttr(Attribute::StackProtectReq))
+ return true;
+
+ if (!F->hasFnAttr(Attribute::StackProtect))
+ return false;
+
+ const TargetData *TD = TLI->getTargetData();
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ BasicBlock *BB = I;
+
+ for (BasicBlock::iterator
+ II = BB->begin(), IE = BB->end(); II != IE; ++II)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+ if (AI->isArrayAllocation())
+ // This is a call to alloca with a variable size. Emit stack
+ // protectors.
+ return true;
+
+ if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType()))
+ // If an array has more than SSPBufferSize bytes of allocated space,
+ // then we emit stack protectors.
+ if (SSPBufferSize <= TD->getTypeAllocSize(AT))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// InsertStackProtectors - Insert code into the prologue and epilogue of the
+/// function.
+///
+/// - The prologue code loads and stores the stack guard onto the stack.
+/// - The epilogue checks the value stored in the prologue against the original
+/// value. It calls __stack_chk_fail if they differ.
+bool StackProtector::InsertStackProtectors() {
+ BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
+ AllocaInst *AI = 0; // Place on stack that stores the stack guard.
+ Constant *StackGuardVar = 0; // The stack guard variable.
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
+ BasicBlock *BB = I++;
+
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI) continue;
+
+ if (!FailBB) {
+ // Insert code into the entry block that stores the __stack_chk_guard
+ // variable onto the stack:
+ //
+ // entry:
+ // StackGuardSlot = alloca i8*
+ // StackGuard = load __stack_chk_guard
+ // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
+ //
+ PointerType *PtrTy = PointerType::getUnqual(Type::Int8Ty);
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+
+ BasicBlock &Entry = F->getEntryBlock();
+ Instruction *InsPt = &Entry.front();
+
+ AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
+ LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
+
+ Value *Args[] = { LI, AI };
+ CallInst::
+ Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+ &Args[0], array_endof(Args), "", InsPt);
+
+ // Create the basic block to jump to when the guard check fails.
+ FailBB = CreateFailBB();
+ }
+
+ // For each block with a return instruction, convert this:
+ //
+ // return:
+ // ...
+ // ret ...
+ //
+ // into this:
+ //
+ // return:
+ // ...
+ // %1 = load __stack_chk_guard
+ // %2 = load StackGuardSlot
+ // %3 = cmp i1 %1, %2
+ // br i1 %3, label %SP_return, label %CallStackCheckFailBlk
+ //
+ // SP_return:
+ // ret ...
+ //
+ // CallStackCheckFailBlk:
+ // call void @__stack_chk_fail()
+ // unreachable
+
+ // Split the basic block before the return instruction.
+ BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+
+ // Remove default branch instruction to the new BB.
+ BB->getTerminator()->eraseFromParent();
+
+ // Move the newly created basic block to the point right after the old basic
+ // block so that it's in the "fall through" position.
+ NewBB->moveAfter(BB);
+
+ // Generate the stack protector instructions in the old basic block.
+ LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
+ LoadInst *LI2 = new LoadInst(AI, "", true, BB);
+ ICmpInst *Cmp = new ICmpInst(CmpInst::ICMP_EQ, LI1, LI2, "", BB);
+ BranchInst::Create(NewBB, FailBB, Cmp, BB);
+ }
+
+ // Return if we didn't modify any basic blocks. I.e., there are no return
+ // statements in the function.
+ if (!FailBB) return false;
+
+ return true;
+}
+
+/// CreateFailBB - Create a basic block to jump to when the stack protector
+/// check fails.
+BasicBlock *StackProtector::CreateFailBB() {
+ BasicBlock *FailBB = BasicBlock::Create("CallStackCheckFailBlk", F);
+ Constant *StackChkFail =
+ M->getOrInsertFunction("__stack_chk_fail", Type::VoidTy, NULL);
+ CallInst::Create(StackChkFail, "", FailBB);
+ new UnreachableInst(FailBB);
+ return FailBB;
+}
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
new file mode 100644
index 0000000..5824644
--- /dev/null
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -0,0 +1,733 @@
+//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack slot coloring pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackcoloring"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableSharing("no-stack-slot-sharing",
+ cl::init(false), cl::Hidden,
+ cl::desc("Suppress slot sharing during stack coloring"));
+
+static cl::opt<bool>
+ColorWithRegsOpt("color-ss-with-regs",
+ cl::init(false), cl::Hidden,
+ cl::desc("Color stack slots with free registers"));
+
+
+static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
+STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs");
+STATISTIC(NumLoadElim, "Number of loads eliminated");
+STATISTIC(NumStoreElim, "Number of stores eliminated");
+STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
+
+namespace {
+ class VISIBILITY_HIDDEN StackSlotColoring : public MachineFunctionPass {
+ bool ColorWithRegs;
+ LiveStacks* LS;
+ VirtRegMap* VRM;
+ MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MachineLoopInfo *loopInfo;
+
+ // SSIntervals - Spill slot intervals.
+ std::vector<LiveInterval*> SSIntervals;
+
+ // SSRefs - Keep a list of frame index references for each spill slot.
+ SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs;
+
+ // OrigAlignments - Alignments of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigAlignments;
+
+ // OrigSizes - Sizess of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigSizes;
+
+ // AllColors - If index is set, it's a spill slot, i.e. color.
+ // FIXME: This assumes PEI locate spill slot with smaller indices
+ // closest to stack pointer / frame pointer. Therefore, smaller
+ // index == better color.
+ BitVector AllColors;
+
+ // NextColor - Next "color" that's not yet used.
+ int NextColor;
+
+ // UsedColors - "Colors" that have been assigned.
+ BitVector UsedColors;
+
+ // Assignments - Color to intervals mapping.
+ SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
+
+ public:
+ static char ID; // Pass identification
+ StackSlotColoring() :
+ MachineFunctionPass(&ID), ColorWithRegs(false), NextColor(-1) {}
+ StackSlotColoring(bool RegColor) :
+ MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveStacks>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char* getPassName() const {
+ return "Stack Slot Coloring";
+ }
+
+ private:
+ void InitializeSlots();
+ void ScanForSpillSlotRefs(MachineFunction &MF);
+ bool OverlapWithAssignments(LiveInterval *li, int Color) const;
+ int ColorSlot(LiveInterval *li);
+ bool ColorSlots(MachineFunction &MF);
+ bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+ SmallVector<SmallVector<int, 4>, 16> &RevMap,
+ BitVector &SlotIsReg);
+ void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
+ MachineFunction &MF);
+ bool PropagateBackward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg);
+ bool PropagateForward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg);
+ void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+ unsigned Reg, const TargetRegisterClass *RC,
+ SmallSet<unsigned, 4> &Defs,
+ MachineFunction &MF);
+ bool AllMemRefsCanBeUnfolded(int SS);
+ bool RemoveDeadStores(MachineBasicBlock* MBB);
+ };
+} // end anonymous namespace
+
+char StackSlotColoring::ID = 0;
+
+static RegisterPass<StackSlotColoring>
+X("stack-slot-coloring", "Stack Slot Coloring");
+
+FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
+ return new StackSlotColoring(RegColor);
+}
+
+namespace {
+ // IntervalSorter - Comparison predicate that sort live intervals by
+ // their weight.
+ struct IntervalSorter {
+ bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+ return LHS->weight > RHS->weight;
+ }
+ };
+}
+
+/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
+/// references and update spill slot weights.
+void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
+ SSRefs.resize(MFI->getObjectIndexEnd());
+
+ // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = &*MBBI;
+ unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+ for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
+ MII != EE; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI < 0)
+ continue;
+ if (!LS->hasInterval(FI))
+ continue;
+ LiveInterval &li = LS->getInterval(FI);
+ li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
+ SSRefs[FI].push_back(MI);
+ }
+ }
+ }
+}
+
+/// InitializeSlots - Process all spill stack slot liveintervals and add them
+/// to a sorted (by weight) list.
+void StackSlotColoring::InitializeSlots() {
+ int LastFI = MFI->getObjectIndexEnd();
+ OrigAlignments.resize(LastFI);
+ OrigSizes.resize(LastFI);
+ AllColors.resize(LastFI);
+ UsedColors.resize(LastFI);
+ Assignments.resize(LastFI);
+
+ // Gather all spill slots into a list.
+ DOUT << "Spill slot intervals:\n";
+ for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
+ LiveInterval &li = i->second;
+ DEBUG(li.dump());
+ int FI = li.getStackSlotIndex();
+ if (MFI->isDeadObjectIndex(FI))
+ continue;
+ SSIntervals.push_back(&li);
+ OrigAlignments[FI] = MFI->getObjectAlignment(FI);
+ OrigSizes[FI] = MFI->getObjectSize(FI);
+ AllColors.set(FI);
+ }
+ DOUT << '\n';
+
+ // Sort them by weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+ // Get first "color".
+ NextColor = AllColors.find_first();
+}
+
+/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
+/// LiveIntervals that have already been assigned to the specified color.
+bool
+StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
+ const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color];
+ for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
+ LiveInterval *OtherLI = OtherLIs[i];
+ if (OtherLI->overlaps(*li))
+ return true;
+ }
+ return false;
+}
+
+/// ColorSlotsWithFreeRegs - If there are any free registers available, try
+/// replacing spill slots references with registers instead.
+bool
+StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+ SmallVector<SmallVector<int, 4>, 16> &RevMap,
+ BitVector &SlotIsReg) {
+ if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters())
+ return false;
+
+ bool Changed = false;
+ DOUT << "Assigning unused registers to spill slots:\n";
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = li->getStackSlotIndex();
+ if (!UsedColors[SS] || li->weight < 20)
+ // If the weight is < 20, i.e. two references in a loop with depth 1,
+ // don't bother with it.
+ continue;
+
+ // These slots allow to share the same registers.
+ bool AllColored = true;
+ SmallVector<unsigned, 4> ColoredRegs;
+ for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) {
+ int RSS = RevMap[SS][j];
+ const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS);
+ // If it's not colored to another stack slot, try coloring it
+ // to a "free" register.
+ if (!RC) {
+ AllColored = false;
+ continue;
+ }
+ unsigned Reg = VRM->getFirstUnusedRegister(RC);
+ if (!Reg) {
+ AllColored = false;
+ continue;
+ }
+ if (!AllMemRefsCanBeUnfolded(RSS)) {
+ AllColored = false;
+ continue;
+ } else {
+ DOUT << "Assigning fi#" << RSS << " to " << TRI->getName(Reg) << '\n';
+ ColoredRegs.push_back(Reg);
+ SlotMapping[RSS] = Reg;
+ SlotIsReg.set(RSS);
+ Changed = true;
+ }
+ }
+
+ // Register and its sub-registers are no longer free.
+ while (!ColoredRegs.empty()) {
+ unsigned Reg = ColoredRegs.back();
+ ColoredRegs.pop_back();
+ VRM->setRegisterUsed(Reg);
+ // If reg is a callee-saved register, it will have to be spilled in
+ // the prologue.
+ MRI->setPhysRegUsed(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ VRM->setRegisterUsed(*AS);
+ MRI->setPhysRegUsed(*AS);
+ }
+ }
+ // This spill slot is dead after the rewrites
+ if (AllColored) {
+ MFI->RemoveStackObject(SS);
+ ++NumEliminated;
+ }
+ }
+ DOUT << '\n';
+
+ return Changed;
+}
+
+/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
+///
+int StackSlotColoring::ColorSlot(LiveInterval *li) {
+ int Color = -1;
+ bool Share = false;
+ if (!DisableSharing) {
+ // Check if it's possible to reuse any of the used colors.
+ Color = UsedColors.find_first();
+ while (Color != -1) {
+ if (!OverlapWithAssignments(li, Color)) {
+ Share = true;
+ ++NumEliminated;
+ break;
+ }
+ Color = UsedColors.find_next(Color);
+ }
+ }
+
+ // Assign it to the first available color (assumed to be the best) if it's
+ // not possible to share a used color with other objects.
+ if (!Share) {
+ assert(NextColor != -1 && "No more spill slots?");
+ Color = NextColor;
+ UsedColors.set(Color);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ // Record the assignment.
+ Assignments[Color].push_back(li);
+ int FI = li->getStackSlotIndex();
+ DOUT << "Assigning fi#" << FI << " to fi#" << Color << "\n";
+
+ // Change size and alignment of the allocated slot. If there are multiple
+ // objects sharing the same slot, then make sure the size and alignment
+ // are large enough for all.
+ unsigned Align = OrigAlignments[FI];
+ if (!Share || Align > MFI->getObjectAlignment(Color))
+ MFI->setObjectAlignment(Color, Align);
+ int64_t Size = OrigSizes[FI];
+ if (!Share || Size > MFI->getObjectSize(Color))
+ MFI->setObjectSize(Color, Size);
+ return Color;
+}
+
+/// Colorslots - Color all spill stack slots and rewrite all frameindex machine
+/// operands in the function.
+bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
+ unsigned NumObjs = MFI->getObjectIndexEnd();
+ SmallVector<int, 16> SlotMapping(NumObjs, -1);
+ SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
+ SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
+ BitVector SlotIsReg(NumObjs);
+ BitVector UsedColors(NumObjs);
+
+ DOUT << "Color spill slot intervals:\n";
+ bool Changed = false;
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = li->getStackSlotIndex();
+ int NewSS = ColorSlot(li);
+ assert(NewSS >= 0 && "Stack coloring failed?");
+ SlotMapping[SS] = NewSS;
+ RevMap[NewSS].push_back(SS);
+ SlotWeights[NewSS] += li->weight;
+ UsedColors.set(NewSS);
+ Changed |= (SS != NewSS);
+ }
+
+ DOUT << "\nSpill slots after coloring:\n";
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = li->getStackSlotIndex();
+ li->weight = SlotWeights[SS];
+ }
+ // Sort them by new weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
+ DEBUG(SSIntervals[i]->dump());
+ DOUT << '\n';
+#endif
+
+ // Can we "color" a stack slot with a unused register?
+ Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg);
+
+ if (!Changed)
+ return false;
+
+ // Rewrite all MO_FrameIndex operands.
+ SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
+ for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
+ bool isReg = SlotIsReg[SS];
+ int NewFI = SlotMapping[SS];
+ if (NewFI == -1 || (NewFI == (int)SS && !isReg))
+ continue;
+
+ const TargetRegisterClass *RC = LS->getIntervalRegClass(SS);
+ SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
+ if (!isReg)
+ RewriteInstruction(RefMIs[i], SS, NewFI, MF);
+ else {
+ // Rewrite to use a register instead.
+ unsigned MBBId = RefMIs[i]->getParent()->getNumber();
+ SmallSet<unsigned, 4> &Defs = NewDefs[MBBId];
+ UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF);
+ }
+ }
+
+ // Delete unused stack slots.
+ while (NextColor != -1) {
+ DOUT << "Removing unused stack object fi#" << NextColor << "\n";
+ MFI->RemoveStackObject(NextColor);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ return true;
+}
+
+/// AllMemRefsCanBeUnfolded - Return true if all references of the specified
+/// spill slot index can be unfolded.
+bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
+ SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) {
+ MachineInstr *MI = RefMIs[i];
+ if (TII->isLoadFromStackSlot(MI, SS) ||
+ TII->isStoreToStackSlot(MI, SS))
+ // Restore and spill will become copies.
+ return true;
+ if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false))
+ return false;
+ for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = MI->getOperand(j);
+ if (MO.isFI() && MO.getIndex() != SS)
+ // If it uses another frameindex, we can, currently* unfold it.
+ return false;
+ }
+ }
+ return true;
+}
+
+/// RewriteInstruction - Rewrite specified instruction by replacing references
+/// to old frame index with new one.
+void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
+ int NewFI, MachineFunction &MF) {
+ for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI != OldFI)
+ continue;
+ MO.setIndex(NewFI);
+ }
+
+ // Update the MachineMemOperand for the new memory location.
+ // FIXME: We need a better method of managing these too.
+ SmallVector<MachineMemOperand, 2> MMOs(MI->memoperands_begin(),
+ MI->memoperands_end());
+ MI->clearMemOperands(MF);
+ const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI);
+ for (unsigned i = 0, ee = MMOs.size(); i != ee; ++i) {
+ if (MMOs[i].getValue() != OldSV)
+ MI->addMemOperand(MF, MMOs[i]);
+ else {
+ MachineMemOperand MMO(PseudoSourceValue::getFixedStack(NewFI),
+ MMOs[i].getFlags(), MMOs[i].getOffset(),
+ MMOs[i].getSize(), MMOs[i].getAlignment());
+ MI->addMemOperand(MF, MMO);
+ }
+ }
+}
+
+/// PropagateBackward - Traverse backward and look for the definition of
+/// OldReg. If it can successfully update all of the references with NewReg,
+/// do so and return true.
+bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg) {
+ if (MII == MBB->begin())
+ return false;
+
+ SmallVector<MachineOperand*, 4> Uses;
+ SmallVector<MachineOperand*, 4> Refs;
+ while (--MII != MBB->begin()) {
+ bool FoundDef = false; // Not counting 2address def.
+
+ Uses.clear();
+ const TargetInstrDesc &TID = MII->getDesc();
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (Reg == OldReg) {
+ if (MO.isImplicit())
+ return false;
+ const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i);
+ if (RC && !RC->contains(NewReg))
+ return false;
+
+ if (MO.isUse()) {
+ Uses.push_back(&MO);
+ } else {
+ Refs.push_back(&MO);
+ if (!MII->isRegTiedToUseOperand(i))
+ FoundDef = true;
+ }
+ } else if (TRI->regsOverlap(Reg, NewReg)) {
+ return false;
+ } else if (TRI->regsOverlap(Reg, OldReg)) {
+ if (!MO.isUse() || !MO.isKill())
+ return false;
+ }
+ }
+
+ if (FoundDef) {
+ // Found non-two-address def. Stop here.
+ for (unsigned i = 0, e = Refs.size(); i != e; ++i)
+ Refs[i]->setReg(NewReg);
+ return true;
+ }
+
+ // Two-address uses must be updated as well.
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ Refs.push_back(Uses[i]);
+ }
+ return false;
+}
+
+/// PropagateForward - Traverse forward and look for the kill of OldReg. If
+/// it can successfully update all of the uses with NewReg, do so and
+/// return true.
+bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
+ MachineBasicBlock *MBB,
+ unsigned OldReg, unsigned NewReg) {
+ if (MII == MBB->end())
+ return false;
+
+ SmallVector<MachineOperand*, 4> Uses;
+ while (++MII != MBB->end()) {
+ bool FoundUse = false;
+ bool FoundKill = false;
+ const TargetInstrDesc &TID = MII->getDesc();
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (Reg == OldReg) {
+ if (MO.isDef() || MO.isImplicit())
+ return false;
+
+ const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i);
+ if (RC && !RC->contains(NewReg))
+ return false;
+ FoundUse = true;
+ if (MO.isKill())
+ FoundKill = true;
+ Uses.push_back(&MO);
+ } else if (TRI->regsOverlap(Reg, NewReg) ||
+ TRI->regsOverlap(Reg, OldReg))
+ return false;
+ }
+ if (FoundKill) {
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ Uses[i]->setReg(NewReg);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding
+/// folded memory references and replacing those references with register
+/// references instead.
+void
+StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+ unsigned Reg,
+ const TargetRegisterClass *RC,
+ SmallSet<unsigned, 4> &Defs,
+ MachineFunction &MF) {
+ MachineBasicBlock *MBB = MI->getParent();
+ if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
+ if (PropagateForward(MI, MBB, DstReg, Reg)) {
+ DOUT << "Eliminated load: ";
+ DEBUG(MI->dump());
+ ++NumLoadElim;
+ } else {
+ TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC);
+ ++NumRegRepl;
+ }
+
+ if (!Defs.count(Reg)) {
+ // If this is the first use of Reg in this MBB and it wasn't previously
+ // defined in MBB, add it to livein.
+ MBB->addLiveIn(Reg);
+ Defs.insert(Reg);
+ }
+ } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
+ if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
+ DOUT << "Eliminated store: ";
+ DEBUG(MI->dump());
+ ++NumStoreElim;
+ } else {
+ TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC);
+ ++NumRegRepl;
+ }
+
+ // Remember reg has been defined in MBB.
+ Defs.insert(Reg);
+ } else {
+ SmallVector<MachineInstr*, 4> NewMIs;
+ bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
+ Success = Success; // Silence compiler warning.
+ assert(Success && "Failed to unfold!");
+ MachineInstr *NewMI = NewMIs[0];
+ MBB->insert(MI, NewMI);
+ ++NumRegRepl;
+
+ if (NewMI->readsRegister(Reg)) {
+ if (!Defs.count(Reg))
+ // If this is the first use of Reg in this MBB and it wasn't previously
+ // defined in MBB, add it to livein.
+ MBB->addLiveIn(Reg);
+ Defs.insert(Reg);
+ }
+ }
+ MBB->erase(MI);
+}
+
+/// RemoveDeadStores - Scan through a basic block and look for loads followed
+/// by stores. If they're both using the same stack slot, then the store is
+/// definitely dead. This could obviously be much more aggressive (consider
+/// pairs with instructions between them), but such extensions might have a
+/// considerable compile time impact.
+bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
+ // FIXME: This could be much more aggressive, but we need to investigate
+ // the compile time impact of doing so.
+ bool changed = false;
+
+ SmallVector<MachineInstr*, 4> toErase;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (DCELimit != -1 && (int)NumDead >= DCELimit)
+ break;
+
+ MachineBasicBlock::iterator NextMI = next(I);
+ if (NextMI == MBB->end()) continue;
+
+ int FirstSS, SecondSS;
+ unsigned LoadReg = 0;
+ unsigned StoreReg = 0;
+ if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
+ if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
+ if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
+
+ ++NumDead;
+ changed = true;
+
+ if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
+ ++NumDead;
+ toErase.push_back(I);
+ }
+
+ toErase.push_back(NextMI);
+ ++I;
+ }
+
+ for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
+ E = toErase.end(); I != E; ++I)
+ (*I)->eraseFromParent();
+
+ return changed;
+}
+
+
+bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
+ DOUT << "********** Stack Slot Coloring **********\n";
+
+ MFI = MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ LS = &getAnalysis<LiveStacks>();
+ VRM = &getAnalysis<VirtRegMap>();
+ loopInfo = &getAnalysis<MachineLoopInfo>();
+
+ bool Changed = false;
+
+ unsigned NumSlots = LS->getNumIntervals();
+ if (NumSlots < 2) {
+ if (NumSlots == 0 || !VRM->HasUnusedRegisters())
+ // Nothing to do!
+ return false;
+ }
+
+ // Gather spill slot references
+ ScanForSpillSlotRefs(MF);
+ InitializeSlots();
+ Changed = ColorSlots(MF);
+
+ NextColor = -1;
+ SSIntervals.clear();
+ for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
+ SSRefs[i].clear();
+ SSRefs.clear();
+ OrigAlignments.clear();
+ OrigSizes.clear();
+ AllColors.clear();
+ UsedColors.clear();
+ for (unsigned i = 0, e = Assignments.size(); i != e; ++i)
+ Assignments[i].clear();
+ Assignments.clear();
+
+ if (Changed) {
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= RemoveDeadStores(I);
+ }
+
+ return Changed;
+}
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
new file mode 100644
index 0000000..a2c1255
--- /dev/null
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -0,0 +1,1053 @@
+//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions, using an intelligent copy-folding technique based on
+// dominator information. This is technique is derived from:
+//
+// Budimlic, et al. Fast copy coalescing and live-range identification.
+// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
+// Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
+// PLDI '02. ACM, New York, NY, 25-32.
+// DOI= http://doi.acm.org/10.1145/512529.512534
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strongphielim"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN StrongPHIElimination : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ StrongPHIElimination() : MachineFunctionPass(&ID) {}
+
+ // Waiting stores, for each MBB, the set of copies that need to
+ // be inserted into that MBB
+ DenseMap<MachineBasicBlock*,
+ std::multimap<unsigned, unsigned> > Waiting;
+
+ // Stacks holds the renaming stack for each register
+ std::map<unsigned, std::vector<unsigned> > Stacks;
+
+ // Registers in UsedByAnother are PHI nodes that are themselves
+ // used as operands to another another PHI node
+ std::set<unsigned> UsedByAnother;
+
+ // RenameSets are the is a map from a PHI-defined register
+ // to the input registers to be coalesced along with the
+ // predecessor block for those input registers.
+ std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets;
+
+ // PhiValueNumber holds the ID numbers of the VNs for each phi that we're
+ // eliminating, indexed by the register defined by that phi.
+ std::map<unsigned, unsigned> PhiValueNumber;
+
+ // Store the DFS-in number of each block
+ DenseMap<MachineBasicBlock*, unsigned> preorder;
+
+ // Store the DFS-out number of each block
+ DenseMap<MachineBasicBlock*, unsigned> maxpreorder;
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<LiveIntervals>();
+
+ // TODO: Actually make this true.
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<RegisterCoalescer>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual void releaseMemory() {
+ preorder.clear();
+ maxpreorder.clear();
+
+ Waiting.clear();
+ Stacks.clear();
+ UsedByAnother.clear();
+ RenameSets.clear();
+ }
+
+ private:
+
+ /// DomForestNode - Represents a node in the "dominator forest". This is
+ /// a forest in which the nodes represent registers and the edges
+ /// represent a dominance relation in the block defining those registers.
+ struct DomForestNode {
+ private:
+ // Store references to our children
+ std::vector<DomForestNode*> children;
+ // The register we represent
+ unsigned reg;
+
+ // Add another node as our child
+ void addChild(DomForestNode* DFN) { children.push_back(DFN); }
+
+ public:
+ typedef std::vector<DomForestNode*>::iterator iterator;
+
+ // Create a DomForestNode by providing the register it represents, and
+ // the node to be its parent. The virtual root node has register 0
+ // and a null parent.
+ DomForestNode(unsigned r, DomForestNode* parent) : reg(r) {
+ if (parent)
+ parent->addChild(this);
+ }
+
+ ~DomForestNode() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+ }
+
+ /// getReg - Return the regiser that this node represents
+ inline unsigned getReg() { return reg; }
+
+ // Provide iterator access to our children
+ inline DomForestNode::iterator begin() { return children.begin(); }
+ inline DomForestNode::iterator end() { return children.end(); }
+ };
+
+ void computeDFS(MachineFunction& MF);
+ void processBlock(MachineBasicBlock* MBB);
+
+ std::vector<DomForestNode*> computeDomForest(
+ std::map<unsigned, MachineBasicBlock*>& instrs,
+ MachineRegisterInfo& MRI);
+ void processPHIUnion(MachineInstr* Inst,
+ std::map<unsigned, MachineBasicBlock*>& PHIUnion,
+ std::vector<StrongPHIElimination::DomForestNode*>& DF,
+ std::vector<std::pair<unsigned, unsigned> >& locals);
+ void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed);
+ void InsertCopies(MachineDomTreeNode* MBB,
+ SmallPtrSet<MachineBasicBlock*, 16>& v);
+ bool mergeLiveIntervals(unsigned primary, unsigned secondary);
+ };
+}
+
+char StrongPHIElimination::ID = 0;
+static RegisterPass<StrongPHIElimination>
+X("strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently");
+
+const PassInfo *const llvm::StrongPHIEliminationID = &X;
+
+/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree
+/// of the given MachineFunction. These numbers are then used in other parts
+/// of the PHI elimination process.
+void StrongPHIElimination::computeDFS(MachineFunction& MF) {
+ SmallPtrSet<MachineDomTreeNode*, 8> frontier;
+ SmallPtrSet<MachineDomTreeNode*, 8> visited;
+
+ unsigned time = 0;
+
+ MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>();
+
+ MachineDomTreeNode* node = DT.getRootNode();
+
+ std::vector<MachineDomTreeNode*> worklist;
+ worklist.push_back(node);
+
+ while (!worklist.empty()) {
+ MachineDomTreeNode* currNode = worklist.back();
+
+ if (!frontier.count(currNode)) {
+ frontier.insert(currNode);
+ ++time;
+ preorder.insert(std::make_pair(currNode->getBlock(), time));
+ }
+
+ bool inserted = false;
+ for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end();
+ I != E; ++I)
+ if (!frontier.count(*I) && !visited.count(*I)) {
+ worklist.push_back(*I);
+ inserted = true;
+ break;
+ }
+
+ if (!inserted) {
+ frontier.erase(currNode);
+ visited.insert(currNode);
+ maxpreorder.insert(std::make_pair(currNode->getBlock(), time));
+
+ worklist.pop_back();
+ }
+ }
+}
+
+namespace {
+
+/// PreorderSorter - a helper class that is used to sort registers
+/// according to the preorder number of their defining blocks
+class PreorderSorter {
+private:
+ DenseMap<MachineBasicBlock*, unsigned>& preorder;
+ MachineRegisterInfo& MRI;
+
+public:
+ PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p,
+ MachineRegisterInfo& M) : preorder(p), MRI(M) { }
+
+ bool operator()(unsigned A, unsigned B) {
+ if (A == B)
+ return false;
+
+ MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent();
+ MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent();
+
+ if (preorder[ABlock] < preorder[BBlock])
+ return true;
+ else if (preorder[ABlock] > preorder[BBlock])
+ return false;
+
+ return false;
+ }
+};
+
+}
+
+/// computeDomForest - compute the subforest of the DomTree corresponding
+/// to the defining blocks of the registers in question
+std::vector<StrongPHIElimination::DomForestNode*>
+StrongPHIElimination::computeDomForest(
+ std::map<unsigned, MachineBasicBlock*>& regs,
+ MachineRegisterInfo& MRI) {
+ // Begin by creating a virtual root node, since the actual results
+ // may well be a forest. Assume this node has maximum DFS-out number.
+ DomForestNode* VirtualRoot = new DomForestNode(0, 0);
+ maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL));
+
+ // Populate a worklist with the registers
+ std::vector<unsigned> worklist;
+ worklist.reserve(regs.size());
+ for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(),
+ E = regs.end(); I != E; ++I)
+ worklist.push_back(I->first);
+
+ // Sort the registers by the DFS-in number of their defining block
+ PreorderSorter PS(preorder, MRI);
+ std::sort(worklist.begin(), worklist.end(), PS);
+
+ // Create a "current parent" stack, and put the virtual root on top of it
+ DomForestNode* CurrentParent = VirtualRoot;
+ std::vector<DomForestNode*> stack;
+ stack.push_back(VirtualRoot);
+
+ // Iterate over all the registers in the previously computed order
+ for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end();
+ I != E; ++I) {
+ unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()];
+ MachineBasicBlock* parentBlock = CurrentParent->getReg() ?
+ MRI.getVRegDef(CurrentParent->getReg())->getParent() :
+ 0;
+
+ // If the DFS-in number of the register is greater than the DFS-out number
+ // of the current parent, repeatedly pop the parent stack until it isn't.
+ while (pre > maxpreorder[parentBlock]) {
+ stack.pop_back();
+ CurrentParent = stack.back();
+
+ parentBlock = CurrentParent->getReg() ?
+ MRI.getVRegDef(CurrentParent->getReg())->getParent() :
+ 0;
+ }
+
+ // Now that we've found the appropriate parent, create a DomForestNode for
+ // this register and attach it to the forest
+ DomForestNode* child = new DomForestNode(*I, CurrentParent);
+
+ // Push this new node on the "current parent" stack
+ stack.push_back(child);
+ CurrentParent = child;
+ }
+
+ // Return a vector containing the children of the virtual root node
+ std::vector<DomForestNode*> ret;
+ ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end());
+ return ret;
+}
+
+/// isLiveIn - helper method that determines, from a regno, if a register
+/// is live into a block
+static bool isLiveIn(unsigned r, MachineBasicBlock* MBB,
+ LiveIntervals& LI) {
+ LiveInterval& I = LI.getOrCreateInterval(r);
+ unsigned idx = LI.getMBBStartIdx(MBB);
+ return I.liveAt(idx);
+}
+
+/// isLiveOut - help method that determines, from a regno, if a register is
+/// live out of a block.
+static bool isLiveOut(unsigned r, MachineBasicBlock* MBB,
+ LiveIntervals& LI) {
+ for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
+ E = MBB->succ_end(); PI != E; ++PI)
+ if (isLiveIn(r, *PI, LI))
+ return true;
+
+ return false;
+}
+
+/// interferes - checks for local interferences by scanning a block. The only
+/// trick parameter is 'mode' which tells it the relationship of the two
+/// registers. 0 - defined in the same block, 1 - first properly dominates
+/// second, 2 - second properly dominates first
+static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan,
+ LiveIntervals& LV, unsigned mode) {
+ MachineInstr* def = 0;
+ MachineInstr* kill = 0;
+
+ // The code is still in SSA form at this point, so there is only one
+ // definition per VReg. Thus we can safely use MRI->getVRegDef().
+ const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo();
+
+ bool interference = false;
+
+ // Wallk the block, checking for interferences
+ for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end();
+ MBI != MBE; ++MBI) {
+ MachineInstr* curr = MBI;
+
+ // Same defining block...
+ if (mode == 0) {
+ if (curr == MRI->getVRegDef(a)) {
+ // If we find our first definition, save it
+ if (!def) {
+ def = curr;
+ // If there's already an unkilled definition, then
+ // this is an interference
+ } else if (!kill) {
+ interference = true;
+ break;
+ // If there's a definition followed by a KillInst, then
+ // they can't interfere
+ } else {
+ interference = false;
+ break;
+ }
+ // Symmetric with the above
+ } else if (curr == MRI->getVRegDef(b)) {
+ if (!def) {
+ def = curr;
+ } else if (!kill) {
+ interference = true;
+ break;
+ } else {
+ interference = false;
+ break;
+ }
+ // Store KillInsts if they match up with the definition
+ } else if (curr->killsRegister(a)) {
+ if (def == MRI->getVRegDef(a)) {
+ kill = curr;
+ } else if (curr->killsRegister(b)) {
+ if (def == MRI->getVRegDef(b)) {
+ kill = curr;
+ }
+ }
+ }
+ // First properly dominates second...
+ } else if (mode == 1) {
+ if (curr == MRI->getVRegDef(b)) {
+ // Definition of second without kill of first is an interference
+ if (!kill) {
+ interference = true;
+ break;
+ // Definition after a kill is a non-interference
+ } else {
+ interference = false;
+ break;
+ }
+ // Save KillInsts of First
+ } else if (curr->killsRegister(a)) {
+ kill = curr;
+ }
+ // Symmetric with the above
+ } else if (mode == 2) {
+ if (curr == MRI->getVRegDef(a)) {
+ if (!kill) {
+ interference = true;
+ break;
+ } else {
+ interference = false;
+ break;
+ }
+ } else if (curr->killsRegister(b)) {
+ kill = curr;
+ }
+ }
+ }
+
+ return interference;
+}
+
+/// processBlock - Determine how to break up PHIs in the current block. Each
+/// PHI is broken up by some combination of renaming its operands and inserting
+/// copies. This method is responsible for determining which operands receive
+/// which treatment.
+void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+
+ // Holds names that have been added to a set in any PHI within this block
+ // before the current one.
+ std::set<unsigned> ProcessedNames;
+
+ // Iterate over all the PHI nodes in this block
+ MachineBasicBlock::iterator P = MBB->begin();
+ while (P != MBB->end() && P->getOpcode() == TargetInstrInfo::PHI) {
+ unsigned DestReg = P->getOperand(0).getReg();
+
+ // Don't both doing PHI elimination for dead PHI's.
+ if (P->registerDefIsDead(DestReg)) {
+ ++P;
+ continue;
+ }
+
+ LiveInterval& PI = LI.getOrCreateInterval(DestReg);
+ unsigned pIdx = LI.getDefIndex(LI.getInstructionIndex(P));
+ VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno;
+ PhiValueNumber.insert(std::make_pair(DestReg, PVN->id));
+
+ // PHIUnion is the set of incoming registers to the PHI node that
+ // are going to be renames rather than having copies inserted. This set
+ // is refinded over the course of this function. UnionedBlocks is the set
+ // of corresponding MBBs.
+ std::map<unsigned, MachineBasicBlock*> PHIUnion;
+ SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks;
+
+ // Iterate over the operands of the PHI node
+ for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
+ unsigned SrcReg = P->getOperand(i-1).getReg();
+
+ // Don't need to try to coalesce a register with itself.
+ if (SrcReg == DestReg) {
+ ProcessedNames.insert(SrcReg);
+ continue;
+ }
+
+ // We don't need to insert copies for implicit_defs.
+ MachineInstr* DefMI = MRI.getVRegDef(SrcReg);
+ if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+ ProcessedNames.insert(SrcReg);
+
+ // Check for trivial interferences via liveness information, allowing us
+ // to avoid extra work later. Any registers that interfere cannot both
+ // be in the renaming set, so choose one and add copies for it instead.
+ // The conditions are:
+ // 1) if the operand is live into the PHI node's block OR
+ // 2) if the PHI node is live out of the operand's defining block OR
+ // 3) if the operand is itself a PHI node and the original PHI is
+ // live into the operand's defining block OR
+ // 4) if the operand is already being renamed for another PHI node
+ // in this block OR
+ // 5) if any two operands are defined in the same block, insert copies
+ // for one of them
+ if (isLiveIn(SrcReg, P->getParent(), LI) ||
+ isLiveOut(P->getOperand(0).getReg(),
+ MRI.getVRegDef(SrcReg)->getParent(), LI) ||
+ ( MRI.getVRegDef(SrcReg)->getOpcode() == TargetInstrInfo::PHI &&
+ isLiveIn(P->getOperand(0).getReg(),
+ MRI.getVRegDef(SrcReg)->getParent(), LI) ) ||
+ ProcessedNames.count(SrcReg) ||
+ UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) {
+
+ // Add a copy for the selected register
+ MachineBasicBlock* From = P->getOperand(i).getMBB();
+ Waiting[From].insert(std::make_pair(SrcReg, DestReg));
+ UsedByAnother.insert(SrcReg);
+ } else {
+ // Otherwise, add it to the renaming set
+ PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB()));
+ UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent());
+ }
+ }
+
+ // Compute the dominator forest for the renaming set. This is a forest
+ // where the nodes are the registers and the edges represent dominance
+ // relations between the defining blocks of the registers
+ std::vector<StrongPHIElimination::DomForestNode*> DF =
+ computeDomForest(PHIUnion, MRI);
+
+ // Walk DomForest to resolve interferences at an inter-block level. This
+ // will remove registers from the renaming set (and insert copies for them)
+ // if interferences are found.
+ std::vector<std::pair<unsigned, unsigned> > localInterferences;
+ processPHIUnion(P, PHIUnion, DF, localInterferences);
+
+ // If one of the inputs is defined in the same block as the current PHI
+ // then we need to check for a local interference between that input and
+ // the PHI.
+ for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
+ E = PHIUnion.end(); I != E; ++I)
+ if (MRI.getVRegDef(I->first)->getParent() == P->getParent())
+ localInterferences.push_back(std::make_pair(I->first,
+ P->getOperand(0).getReg()));
+
+ // The dominator forest walk may have returned some register pairs whose
+ // interference cannot be determined from dominator analysis. We now
+ // examine these pairs for local interferences.
+ for (std::vector<std::pair<unsigned, unsigned> >::iterator I =
+ localInterferences.begin(), E = localInterferences.end(); I != E; ++I) {
+ std::pair<unsigned, unsigned> p = *I;
+
+ MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+
+ // Determine the block we need to scan and the relationship between
+ // the two registers
+ MachineBasicBlock* scan = 0;
+ unsigned mode = 0;
+ if (MRI.getVRegDef(p.first)->getParent() ==
+ MRI.getVRegDef(p.second)->getParent()) {
+ scan = MRI.getVRegDef(p.first)->getParent();
+ mode = 0; // Same block
+ } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(),
+ MRI.getVRegDef(p.second)->getParent())) {
+ scan = MRI.getVRegDef(p.second)->getParent();
+ mode = 1; // First dominates second
+ } else {
+ scan = MRI.getVRegDef(p.first)->getParent();
+ mode = 2; // Second dominates first
+ }
+
+ // If there's an interference, we need to insert copies
+ if (interferes(p.first, p.second, scan, LI, mode)) {
+ // Insert copies for First
+ for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
+ if (P->getOperand(i-1).getReg() == p.first) {
+ unsigned SrcReg = p.first;
+ MachineBasicBlock* From = P->getOperand(i).getMBB();
+
+ Waiting[From].insert(std::make_pair(SrcReg,
+ P->getOperand(0).getReg()));
+ UsedByAnother.insert(SrcReg);
+
+ PHIUnion.erase(SrcReg);
+ }
+ }
+ }
+ }
+
+ // Add the renaming set for this PHI node to our overall renaming information
+ for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
+ QE = PHIUnion.end(); QI != QE; ++QI) {
+ DOUT << "Adding Renaming: " << QI->first << " -> "
+ << P->getOperand(0).getReg() << "\n";
+ }
+
+ RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion));
+
+ // Remember which registers are already renamed, so that we don't try to
+ // rename them for another PHI node in this block
+ for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
+ E = PHIUnion.end(); I != E; ++I)
+ ProcessedNames.insert(I->first);
+
+ ++P;
+ }
+}
+
+/// processPHIUnion - Take a set of candidate registers to be coalesced when
+/// decomposing the PHI instruction. Use the DominanceForest to remove the ones
+/// that are known to interfere, and flag others that need to be checked for
+/// local interferences.
+void StrongPHIElimination::processPHIUnion(MachineInstr* Inst,
+ std::map<unsigned, MachineBasicBlock*>& PHIUnion,
+ std::vector<StrongPHIElimination::DomForestNode*>& DF,
+ std::vector<std::pair<unsigned, unsigned> >& locals) {
+
+ std::vector<DomForestNode*> worklist(DF.begin(), DF.end());
+ SmallPtrSet<DomForestNode*, 4> visited;
+
+ // Code is still in SSA form, so we can use MRI::getVRegDef()
+ MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo();
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ unsigned DestReg = Inst->getOperand(0).getReg();
+
+ // DF walk on the DomForest
+ while (!worklist.empty()) {
+ DomForestNode* DFNode = worklist.back();
+
+ visited.insert(DFNode);
+
+ bool inserted = false;
+ for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end();
+ CI != CE; ++CI) {
+ DomForestNode* child = *CI;
+
+ // If the current node is live-out of the defining block of one of its
+ // children, insert a copy for it. NOTE: The paper actually calls for
+ // a more elaborate heuristic for determining whether to insert copies
+ // for the child or the parent. In the interest of simplicity, we're
+ // just always choosing the parent.
+ if (isLiveOut(DFNode->getReg(),
+ MRI.getVRegDef(child->getReg())->getParent(), LI)) {
+ // Insert copies for parent
+ for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) {
+ if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) {
+ unsigned SrcReg = DFNode->getReg();
+ MachineBasicBlock* From = Inst->getOperand(i).getMBB();
+
+ Waiting[From].insert(std::make_pair(SrcReg, DestReg));
+ UsedByAnother.insert(SrcReg);
+
+ PHIUnion.erase(SrcReg);
+ }
+ }
+
+ // If a node is live-in to the defining block of one of its children, but
+ // not live-out, then we need to scan that block for local interferences.
+ } else if (isLiveIn(DFNode->getReg(),
+ MRI.getVRegDef(child->getReg())->getParent(), LI) ||
+ MRI.getVRegDef(DFNode->getReg())->getParent() ==
+ MRI.getVRegDef(child->getReg())->getParent()) {
+ // Add (p, c) to possible local interferences
+ locals.push_back(std::make_pair(DFNode->getReg(), child->getReg()));
+ }
+
+ if (!visited.count(child)) {
+ worklist.push_back(child);
+ inserted = true;
+ }
+ }
+
+ if (!inserted) worklist.pop_back();
+ }
+}
+
+/// ScheduleCopies - Insert copies into predecessor blocks, scheduling
+/// them properly so as to avoid the 'lost copy' and the 'virtual swap'
+/// problems.
+///
+/// Based on "Practical Improvements to the Construction and Destruction
+/// of Static Single Assignment Form" by Briggs, et al.
+void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
+ std::set<unsigned>& pushed) {
+ // FIXME: This function needs to update LiveIntervals
+ std::multimap<unsigned, unsigned>& copy_set= Waiting[MBB];
+
+ std::multimap<unsigned, unsigned> worklist;
+ std::map<unsigned, unsigned> map;
+
+ // Setup worklist of initial copies
+ for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
+ E = copy_set.end(); I != E; ) {
+ map.insert(std::make_pair(I->first, I->first));
+ map.insert(std::make_pair(I->second, I->second));
+
+ if (!UsedByAnother.count(I->second)) {
+ worklist.insert(*I);
+
+ // Avoid iterator invalidation
+ std::multimap<unsigned, unsigned>::iterator OI = I;
+ ++I;
+ copy_set.erase(OI);
+ } else {
+ ++I;
+ }
+ }
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ MachineFunction* MF = MBB->getParent();
+ MachineRegisterInfo& MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests;
+
+ // Iterate over the worklist, inserting copies
+ while (!worklist.empty() || !copy_set.empty()) {
+ while (!worklist.empty()) {
+ std::multimap<unsigned, unsigned>::iterator WI = worklist.begin();
+ std::pair<unsigned, unsigned> curr = *WI;
+ worklist.erase(WI);
+
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first);
+
+ if (isLiveOut(curr.second, MBB, LI)) {
+ // Create a temporary
+ unsigned t = MF->getRegInfo().createVirtualRegister(RC);
+
+ // Insert copy from curr.second to a temporary at
+ // the Phi defining curr.second
+ MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
+ TII->copyRegToReg(*PI->getParent(), PI, t,
+ curr.second, RC, RC);
+
+ DOUT << "Inserted copy from " << curr.second << " to " << t << "\n";
+
+ // Push temporary on Stacks
+ Stacks[curr.second].push_back(t);
+
+ // Insert curr.second in pushed
+ pushed.insert(curr.second);
+
+ // Create a live interval for this temporary
+ InsertedPHIDests.push_back(std::make_pair(t, --PI));
+ }
+
+ // Insert copy from map[curr.first] to curr.second
+ TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second,
+ map[curr.first], RC, RC);
+ map[curr.first] = curr.second;
+ DOUT << "Inserted copy from " << curr.first << " to "
+ << curr.second << "\n";
+
+ // Push this copy onto InsertedPHICopies so we can
+ // update LiveIntervals with it.
+ MachineBasicBlock::iterator MI = MBB->getFirstTerminator();
+ InsertedPHIDests.push_back(std::make_pair(curr.second, --MI));
+
+ // If curr.first is a destination in copy_set...
+ for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
+ E = copy_set.end(); I != E; )
+ if (curr.first == I->second) {
+ std::pair<unsigned, unsigned> temp = *I;
+ worklist.insert(temp);
+
+ // Avoid iterator invalidation
+ std::multimap<unsigned, unsigned>::iterator OI = I;
+ ++I;
+ copy_set.erase(OI);
+
+ break;
+ } else {
+ ++I;
+ }
+ }
+
+ if (!copy_set.empty()) {
+ std::multimap<unsigned, unsigned>::iterator CI = copy_set.begin();
+ std::pair<unsigned, unsigned> curr = *CI;
+ worklist.insert(curr);
+ copy_set.erase(CI);
+
+ LiveInterval& I = LI.getInterval(curr.second);
+ MachineBasicBlock::iterator term = MBB->getFirstTerminator();
+ unsigned endIdx = 0;
+ if (term != MBB->end())
+ endIdx = LI.getInstructionIndex(term);
+ else
+ endIdx = LI.getMBBEndIdx(MBB);
+
+ if (I.liveAt(endIdx)) {
+ const TargetRegisterClass *RC =
+ MF->getRegInfo().getRegClass(curr.first);
+
+ // Insert a copy from dest to a new temporary t at the end of b
+ unsigned t = MF->getRegInfo().createVirtualRegister(RC);
+ TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t,
+ curr.second, RC, RC);
+ map[curr.second] = t;
+
+ MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
+ InsertedPHIDests.push_back(std::make_pair(t, --TI));
+ }
+ }
+ }
+
+ // Renumber the instructions so that we can perform the index computations
+ // needed to create new live intervals.
+ LI.computeNumbering();
+
+ // For copies that we inserted at the ends of predecessors, we construct
+ // live intervals. This is pretty easy, since we know that the destination
+ // register cannot have be in live at that point previously. We just have
+ // to make sure that, for registers that serve as inputs to more than one
+ // PHI, we don't create multiple overlapping live intervals.
+ std::set<unsigned> RegHandled;
+ for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I =
+ InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) {
+ if (RegHandled.insert(I->first).second) {
+ LiveInterval& Int = LI.getOrCreateInterval(I->first);
+ unsigned instrIdx = LI.getInstructionIndex(I->second);
+ if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx)))
+ Int.removeRange(LiveIntervals::getDefIndex(instrIdx),
+ LI.getMBBEndIdx(I->second->getParent())+1,
+ true);
+
+ LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second);
+ R.valno->copy = I->second;
+ R.valno->def =
+ LiveIntervals::getDefIndex(LI.getInstructionIndex(I->second));
+ }
+ }
+}
+
+/// InsertCopies - insert copies into MBB and all of its successors
+void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
+ SmallPtrSet<MachineBasicBlock*, 16>& visited) {
+ MachineBasicBlock* MBB = MDTN->getBlock();
+ visited.insert(MBB);
+
+ std::set<unsigned> pushed;
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ // Rewrite register uses from Stacks
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->getOpcode() == TargetInstrInfo::PHI)
+ continue;
+
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ if (I->getOperand(i).isReg() &&
+ Stacks[I->getOperand(i).getReg()].size()) {
+ // Remove the live range for the old vreg.
+ LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg());
+ LiveInterval::iterator OldLR = OldInt.FindLiveRangeContaining(
+ LiveIntervals::getUseIndex(LI.getInstructionIndex(I)));
+ if (OldLR != OldInt.end())
+ OldInt.removeRange(*OldLR, true);
+
+ // Change the register
+ I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back());
+
+ // Add a live range for the new vreg
+ LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
+ VNInfo* FirstVN = *Int.vni_begin();
+ FirstVN->hasPHIKill = false;
+ if (I->getOperand(i).isKill())
+ FirstVN->kills.push_back(
+ LiveIntervals::getUseIndex(LI.getInstructionIndex(I)));
+
+ LiveRange LR (LI.getMBBStartIdx(I->getParent()),
+ LiveIntervals::getUseIndex(LI.getInstructionIndex(I))+1,
+ FirstVN);
+
+ Int.addRange(LR);
+ }
+ }
+
+ // Schedule the copies for this block
+ ScheduleCopies(MBB, pushed);
+
+ // Recur down the dominator tree.
+ for (MachineDomTreeNode::iterator I = MDTN->begin(),
+ E = MDTN->end(); I != E; ++I)
+ if (!visited.count((*I)->getBlock()))
+ InsertCopies(*I, visited);
+
+ // As we exit this block, pop the names we pushed while processing it
+ for (std::set<unsigned>::iterator I = pushed.begin(),
+ E = pushed.end(); I != E; ++I)
+ Stacks[*I].pop_back();
+}
+
+bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
+ unsigned secondary) {
+
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+ LiveInterval& LHS = LI.getOrCreateInterval(primary);
+ LiveInterval& RHS = LI.getOrCreateInterval(secondary);
+
+ LI.computeNumbering();
+
+ DenseMap<VNInfo*, VNInfo*> VNMap;
+ for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ LiveRange R = *I;
+
+ unsigned Start = R.start;
+ unsigned End = R.end;
+ if (LHS.getLiveRangeContaining(Start))
+ return false;
+
+ if (LHS.getLiveRangeContaining(End))
+ return false;
+
+ LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R);
+ if (RI != LHS.end() && RI->start < End)
+ return false;
+ }
+
+ for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ LiveRange R = *I;
+ VNInfo* OldVN = R.valno;
+ VNInfo*& NewVN = VNMap[OldVN];
+ if (!NewVN) {
+ NewVN = LHS.getNextValue(OldVN->def,
+ OldVN->copy,
+ LI.getVNInfoAllocator());
+ NewVN->kills = OldVN->kills;
+ }
+
+ LiveRange LR (R.start, R.end, NewVN);
+ LHS.addRange(LR);
+ }
+
+ LI.removeInterval(RHS.reg);
+
+ return true;
+}
+
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
+ LiveIntervals& LI = getAnalysis<LiveIntervals>();
+
+ // Compute DFS numbers of each block
+ computeDFS(Fn);
+
+ // Determine which phi node operands need copies
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ if (!I->empty() &&
+ I->begin()->getOpcode() == TargetInstrInfo::PHI)
+ processBlock(I);
+
+ // Break interferences where two different phis want to coalesce
+ // in the same register.
+ std::set<unsigned> seen;
+ typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> >
+ RenameSetType;
+ for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
+ I != E; ++I) {
+ for (std::map<unsigned, MachineBasicBlock*>::iterator
+ OI = I->second.begin(), OE = I->second.end(); OI != OE; ) {
+ if (!seen.count(OI->first)) {
+ seen.insert(OI->first);
+ ++OI;
+ } else {
+ Waiting[OI->second].insert(std::make_pair(OI->first, I->first));
+ unsigned reg = OI->first;
+ ++OI;
+ I->second.erase(reg);
+ DOUT << "Removing Renaming: " << reg << " -> " << I->first << "\n";
+ }
+ }
+ }
+
+ // Insert copies
+ // FIXME: This process should probably preserve LiveIntervals
+ SmallPtrSet<MachineBasicBlock*, 16> visited;
+ MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+ InsertCopies(MDT.getRootNode(), visited);
+
+ // Perform renaming
+ for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
+ I != E; ++I)
+ while (I->second.size()) {
+ std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
+
+ DOUT << "Renaming: " << SI->first << " -> " << I->first << "\n";
+
+ if (SI->first != I->first) {
+ if (mergeLiveIntervals(I->first, SI->first)) {
+ Fn.getRegInfo().replaceRegWith(SI->first, I->first);
+
+ if (RenameSets.count(SI->first)) {
+ I->second.insert(RenameSets[SI->first].begin(),
+ RenameSets[SI->first].end());
+ RenameSets.erase(SI->first);
+ }
+ } else {
+ // Insert a last-minute copy if a conflict was detected.
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first);
+ TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(),
+ I->first, SI->first, RC, RC);
+
+ LI.computeNumbering();
+
+ LiveInterval& Int = LI.getOrCreateInterval(I->first);
+ unsigned instrIdx =
+ LI.getInstructionIndex(--SI->second->getFirstTerminator());
+ if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx)))
+ Int.removeRange(LiveIntervals::getDefIndex(instrIdx),
+ LI.getMBBEndIdx(SI->second)+1, true);
+
+ LiveRange R = LI.addLiveRangeToEndOfBlock(I->first,
+ --SI->second->getFirstTerminator());
+ R.valno->copy = --SI->second->getFirstTerminator();
+ R.valno->def = LiveIntervals::getDefIndex(instrIdx);
+
+ DOUT << "Renaming failed: " << SI->first << " -> "
+ << I->first << "\n";
+ }
+ }
+
+ LiveInterval& Int = LI.getOrCreateInterval(I->first);
+ const LiveRange* LR =
+ Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second));
+ LR->valno->hasPHIKill = true;
+
+ I->second.erase(SI->first);
+ }
+
+ // Remove PHIs
+ std::vector<MachineInstr*> phis;
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end();
+ BI != BE; ++BI)
+ if (BI->getOpcode() == TargetInstrInfo::PHI)
+ phis.push_back(BI);
+ }
+
+ for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end();
+ I != E; ) {
+ MachineInstr* PInstr = *(I++);
+
+ // If this is a dead PHI node, then remove it from LiveIntervals.
+ unsigned DestReg = PInstr->getOperand(0).getReg();
+ LiveInterval& PI = LI.getInterval(DestReg);
+ if (PInstr->registerDefIsDead(DestReg)) {
+ if (PI.containsOneValue()) {
+ LI.removeInterval(DestReg);
+ } else {
+ unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
+ PI.removeRange(*PI.getLiveRangeContaining(idx), true);
+ }
+ } else {
+ // Trim live intervals of input registers. They are no longer live into
+ // this block if they died after the PHI. If they lived after it, don't
+ // trim them because they might have other legitimate uses.
+ for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) {
+ unsigned reg = PInstr->getOperand(i).getReg();
+
+ MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB();
+ LiveInterval& InputI = LI.getInterval(reg);
+ if (MBB != PInstr->getParent() &&
+ InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) &&
+ InputI.expiredAt(LI.getInstructionIndex(PInstr) +
+ LiveInterval::InstrSlots::NUM))
+ InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()),
+ LI.getInstructionIndex(PInstr),
+ true);
+ }
+
+ // If the PHI is not dead, then the valno defined by the PHI
+ // now has an unknown def.
+ unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
+ const LiveRange* PLR = PI.getLiveRangeContaining(idx);
+ PLR->valno->def = ~0U;
+ LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
+ PLR->start, PLR->valno);
+ PI.addRange(R);
+ }
+
+ LI.RemoveMachineInstrFromMaps(PInstr);
+ PInstr->eraseFromParent();
+ }
+
+ LI.computeNumbering();
+
+ return true;
+}
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
new file mode 100644
index 0000000..a5e1ee4
--- /dev/null
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -0,0 +1,194 @@
+//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfoImpl class, it just provides default
+// implementations of various methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+// commuteInstruction - The default implementation of this method just exchanges
+// operand 1 and 2.
+MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
+ bool NewMI) const {
+ assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
+ "This only knows how to commute register operands so far");
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ bool Reg1IsKill = MI->getOperand(1).isKill();
+ bool Reg2IsKill = MI->getOperand(2).isKill();
+ bool ChangeReg0 = false;
+ if (MI->getOperand(0).getReg() == Reg1) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ Reg2IsKill = false;
+ ChangeReg0 = true;
+ }
+
+ if (NewMI) {
+ // Create a new instruction.
+ unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
+ bool Reg0IsDead = MI->getOperand(0).isDead();
+ MachineFunction &MF = *MI->getParent()->getParent();
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg2IsKill));
+ }
+
+ if (ChangeReg0)
+ MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(2).setReg(Reg1);
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setIsKill(Reg1IsKill);
+ MI->getOperand(1).setIsKill(Reg2IsKill);
+ return MI;
+}
+
+/// CommuteChangesDestination - Return true if commuting the specified
+/// instruction will also changes the destination operand. Also return the
+/// current operand index of the would be new destination register by
+/// reference. This can happen when the commutable instruction is also a
+/// two-address instruction.
+bool TargetInstrInfoImpl::CommuteChangesDestination(MachineInstr *MI,
+ unsigned &OpIdx) const{
+ assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
+ "This only knows how to commute register operands so far");
+ if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ OpIdx = 2;
+ return true;
+ }
+ return false;
+}
+
+
+bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ bool MadeChange = false;
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isPredicable())
+ return false;
+
+ for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (TID.OpInfo[i].isPredicate()) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ MO.setReg(Pred[j].getReg());
+ MadeChange = true;
+ } else if (MO.isImm()) {
+ MO.setImm(Pred[j].getImm());
+ MadeChange = true;
+ } else if (MO.isMBB()) {
+ MO.setMBB(Pred[j].getMBB());
+ MadeChange = true;
+ }
+ ++j;
+ }
+ }
+ return MadeChange;
+}
+
+void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+}
+
+unsigned
+TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
+/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
+/// slot into the specified machine instruction for the specified operand(s).
+/// If this is possible, a new instruction is returned with the specified
+/// operand folded, otherwise NULL is returned. The client is responsible for
+/// removing the old instruction and adding the new one in the instruction
+/// stream.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ unsigned Flags = 0;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (MI->getOperand(Ops[i]).isDef())
+ Flags |= MachineMemOperand::MOStore;
+ else
+ Flags |= MachineMemOperand::MOLoad;
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+ if (!NewMI) return 0;
+
+ assert((!(Flags & MachineMemOperand::MOStore) ||
+ NewMI->getDesc().mayStore()) &&
+ "Folded a def to a non-store!");
+ assert((!(Flags & MachineMemOperand::MOLoad) ||
+ NewMI->getDesc().mayLoad()) &&
+ "Folded a use to a non-load!");
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ assert(MFI.getObjectOffset(FrameIndex) != -1);
+ MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FrameIndex),
+ Flags,
+ MFI.getObjectOffset(FrameIndex),
+ MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
+ NewMI->addMemOperand(MF, MMO);
+
+ return NewMI;
+}
+
+/// foldMemoryOperand - Same as the previous version except it allows folding
+/// of any load and store from / to any address, not just from a specific
+/// stack slot.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
+#endif
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+ if (!NewMI) return 0;
+
+ // Copy the memoperands from the load to the folded instruction.
+ for (std::list<MachineMemOperand>::iterator I = LoadMI->memoperands_begin(),
+ E = LoadMI->memoperands_end(); I != E; ++I)
+ NewMI->addMemOperand(MF, *I);
+
+ return NewMI;
+}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 0000000..3c40404
--- /dev/null
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,997 @@
+//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+// A = B op C
+//
+// to:
+//
+// A = B
+// A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "twoaddrinstr"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
+STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
+STATISTIC(NumReMats, "Number of instructions re-materialized");
+STATISTIC(NumDeletes, "Number of dead instructions deleted");
+
+namespace {
+ class VISIBILITY_HIDDEN TwoAddressInstructionPass
+ : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveVariables *LV;
+
+ // DistanceMap - Keep track the distance of a MI from the start of the
+ // current basic block.
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+ // SrcRegMap - A map from virtual registers to physical registers which
+ // are likely targets to be coalesced to due to copies from physical
+ // registers to virtual registers. e.g. v1024 = move r0.
+ DenseMap<unsigned, unsigned> SrcRegMap;
+
+ // DstRegMap - A map from virtual registers to physical registers which
+ // are likely targets to be coalesced to due to copies to physical
+ // registers from virtual registers. e.g. r1 = move v1024.
+ DenseMap<unsigned, unsigned> DstRegMap;
+
+ bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI,
+ unsigned Reg,
+ MachineBasicBlock::iterator OldPos);
+
+ bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC,
+ MachineInstr *MI, MachineInstr *DefMI,
+ MachineBasicBlock *MBB, unsigned Loc);
+
+ bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
+ unsigned &LastDef);
+
+ MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
+ unsigned Dist);
+
+ bool isProfitableToCommute(unsigned regB, unsigned regC,
+ MachineInstr *MI, MachineBasicBlock *MBB,
+ unsigned Dist);
+
+ bool CommuteInstruction(MachineBasicBlock::iterator &mi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned RegC, unsigned Dist);
+
+ bool isProfitableToConv3Addr(unsigned RegA);
+
+ bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned Dist);
+
+ void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ TwoAddressInstructionPass() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ if (StrongPHIElim)
+ AU.addPreservedID(StrongPHIEliminationID);
+ else
+ AU.addPreservedID(PHIEliminationID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - Pass entry point.
+ bool runOnMachineFunction(MachineFunction&);
+ };
+}
+
+char TwoAddressInstructionPass::ID = 0;
+static RegisterPass<TwoAddressInstructionPass>
+X("twoaddressinstruction", "Two-Address instruction pass");
+
+const PassInfo *const llvm::TwoAddressInstructionPassID = &X;
+
+/// Sink3AddrInstruction - A two-address instruction has been converted to a
+/// three-address instruction to avoid clobbering a register. Try to sink it
+/// past the instruction that would kill the above mentioned register to reduce
+/// register pressure.
+bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
+ MachineInstr *MI, unsigned SavedReg,
+ MachineBasicBlock::iterator OldPos) {
+ // Check if it's safe to move this instruction.
+ bool SeenStore = true; // Be conservative.
+ if (!MI->isSafeToMove(TII, SeenStore))
+ return false;
+
+ unsigned DefReg = 0;
+ SmallSet<unsigned, 4> UseRegs;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse() && MOReg != SavedReg)
+ UseRegs.insert(MO.getReg());
+ if (!MO.isDef())
+ continue;
+ if (MO.isImplicit())
+ // Don't try to move it if it implicitly defines a register.
+ return false;
+ if (DefReg)
+ // For now, don't move any instructions that define multiple registers.
+ return false;
+ DefReg = MO.getReg();
+ }
+
+ // Find the instruction that kills SavedReg.
+ MachineInstr *KillMI = NULL;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SavedReg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ if (!UseMO.isKill())
+ continue;
+ KillMI = UseMO.getParent();
+ break;
+ }
+
+ if (!KillMI || KillMI->getParent() != MBB || KillMI == MI)
+ return false;
+
+ // If any of the definitions are used by another instruction between the
+ // position and the kill use, then it's not safe to sink it.
+ //
+ // FIXME: This can be sped up if there is an easy way to query whether an
+ // instruction is before or after another instruction. Then we can use
+ // MachineRegisterInfo def / use instead.
+ MachineOperand *KillMO = NULL;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+
+ unsigned NumVisited = 0;
+ for (MachineBasicBlock::iterator I = next(OldPos); I != KillPos; ++I) {
+ MachineInstr *OtherMI = I;
+ if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = OtherMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (DefReg == MOReg)
+ return false;
+
+ if (MO.isKill()) {
+ if (OtherMI == KillMI && MOReg == SavedReg)
+ // Save the operand that kills the register. We want to unset the kill
+ // marker if we can sink MI past it.
+ KillMO = &MO;
+ else if (UseRegs.count(MOReg))
+ // One of the uses is killed before the destination.
+ return false;
+ }
+ }
+ }
+
+ // Update kill and LV information.
+ KillMO->setIsKill(false);
+ KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
+ KillMO->setIsKill(true);
+
+ if (LV)
+ LV->replaceKillInstruction(SavedReg, KillMI, MI);
+
+ // Move instruction to its destination.
+ MBB->remove(MI);
+ MBB->insert(KillPos, MI);
+
+ ++Num3AddrSunk;
+ return true;
+}
+
+/// isTwoAddrUse - Return true if the specified MI is using the specified
+/// register as a two-address operand.
+static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
+ const TargetInstrDesc &TID = UseMI->getDesc();
+ for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == Reg &&
+ (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
+ // Earlier use is a two-address one.
+ return true;
+ }
+ return false;
+}
+
+/// isProfitableToReMat - Return true if the heuristics determines it is likely
+/// to be profitable to re-materialize the definition of Reg rather than copy
+/// the register.
+bool
+TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
+ const TargetRegisterClass *RC,
+ MachineInstr *MI, MachineInstr *DefMI,
+ MachineBasicBlock *MBB, unsigned Loc) {
+ bool OtherUse = false;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = UseMO.getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end() && DI->second == Loc)
+ continue; // Current use.
+ OtherUse = true;
+ // There is at least one other use in the MBB that will clobber the
+ // register.
+ if (isTwoAddrUse(UseMI, Reg))
+ return true;
+ }
+ }
+
+ // If other uses in MBB are not two-address uses, then don't remat.
+ if (OtherUse)
+ return false;
+
+ // No other uses in the same block, remat if it's defined in the same
+ // block so it does not unnecessarily extend the live range.
+ return MBB == DefMI->getParent();
+}
+
+/// NoUseAfterLastDef - Return true if there are no intervening uses between the
+/// last instruction in the MBB that defines the specified register and the
+/// two-address instruction which is being processed. It also returns the last
+/// def location by reference
+bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
+ MachineBasicBlock *MBB, unsigned Dist,
+ unsigned &LastDef) {
+ LastDef = 0;
+ unsigned LastUse = Dist;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (MO.isUse() && DI->second < LastUse)
+ LastUse = DI->second;
+ if (MO.isDef() && DI->second > LastDef)
+ LastDef = DI->second;
+ }
+
+ return !(LastUse > LastDef && LastUse < Dist);
+}
+
+MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
+ MachineBasicBlock *MBB,
+ unsigned Dist) {
+ unsigned LastUseDist = 0;
+ MachineInstr *LastUse = 0;
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+ E = MRI->reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (DI->second >= Dist)
+ continue;
+
+ if (MO.isUse() && DI->second > LastUseDist) {
+ LastUse = DI->first;
+ LastUseDist = DI->second;
+ }
+ }
+ return LastUse;
+}
+
+/// isCopyToReg - Return true if the specified MI is a copy instruction or
+/// a extract_subreg instruction. It also returns the source and destination
+/// registers and whether they are physical registers by reference.
+static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
+ unsigned &SrcReg, unsigned &DstReg,
+ bool &IsSrcPhys, bool &IsDstPhys) {
+ SrcReg = 0;
+ DstReg = 0;
+ unsigned SrcSubIdx, DstSubIdx;
+ if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ if (MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ } else if (MI.getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ } else if (MI.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ }
+ }
+
+ if (DstReg) {
+ IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return true;
+ }
+ return false;
+}
+
+/// isKilled - Test if the given register value, which is used by the given
+/// instruction, is killed by the given instruction. This looks through
+/// coalescable copies to see if the original value is potentially not killed.
+///
+/// For example, in this code:
+///
+/// %reg1034 = copy %reg1024
+/// %reg1035 = copy %reg1025<kill>
+/// %reg1036 = add %reg1034<kill>, %reg1035<kill>
+///
+/// %reg1034 is not considered to be killed, since it is copied from a
+/// register which is not killed. Treating it as not killed lets the
+/// normal heuristics commute the (two-address) add, which lets
+/// coalescing eliminate the extra copy.
+///
+static bool isKilled(MachineInstr &MI, unsigned Reg,
+ const MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ MachineInstr *DefMI = &MI;
+ for (;;) {
+ if (!DefMI->killsRegister(Reg))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return true;
+ MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
+ // If there are multiple defs, we can't do a simple analysis, so just
+ // go with what the kill flag says.
+ if (next(Begin) != MRI->def_end())
+ return true;
+ DefMI = &*Begin;
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ // If the def is something other than a copy, then it isn't going to
+ // be coalesced, so follow the kill flag.
+ if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return true;
+ Reg = SrcReg;
+ }
+}
+
+/// isTwoAddrUse - Return true if the specified MI uses the specified register
+/// as a two-address use. If so, return the destination register by reference.
+static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned NumOps = (MI.getOpcode() == TargetInstrInfo::INLINEASM)
+ ? MI.getNumOperands() : TID.getNumOperands();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ unsigned ti;
+ if (MI.isRegTiedToDefOperand(i, &ti)) {
+ DstReg = MI.getOperand(ti).getReg();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// findOnlyInterestingUse - Given a register, if has a single in-basic block
+/// use, return the use instruction if it's a copy or a two-address use.
+static
+MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII,
+ bool &IsCopy,
+ unsigned &DstReg, bool &IsDstPhys) {
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg);
+ if (UI == MRI->use_end())
+ return 0;
+ MachineInstr &UseMI = *UI;
+ if (++UI != MRI->use_end())
+ // More than one use.
+ return 0;
+ if (UseMI.getParent() != MBB)
+ return 0;
+ unsigned SrcReg;
+ bool IsSrcPhys;
+ if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
+ IsCopy = true;
+ return &UseMI;
+ }
+ IsDstPhys = false;
+ if (isTwoAddrUse(UseMI, Reg, DstReg)) {
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return &UseMI;
+ }
+ return 0;
+}
+
+/// getMappedReg - Return the physical register the specified virtual register
+/// might be mapped to.
+static unsigned
+getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
+ while (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
+ if (SI == RegMap.end())
+ return 0;
+ Reg = SI->second;
+ }
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Reg;
+ return 0;
+}
+
+/// regsAreCompatible - Return true if the two registers are equal or aliased.
+///
+static bool
+regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
+ if (RegA == RegB)
+ return true;
+ if (!RegA || !RegB)
+ return false;
+ return TRI->regsOverlap(RegA, RegB);
+}
+
+
+/// isProfitableToReMat - Return true if it's potentially profitable to commute
+/// the two-address instruction that's being processed.
+bool
+TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
+ MachineInstr *MI, MachineBasicBlock *MBB,
+ unsigned Dist) {
+ // Determine if it's profitable to commute this two address instruction. In
+ // general, we want no uses between this instruction and the definition of
+ // the two-address register.
+ // e.g.
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1028
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // In this case, it might not be possible to coalesce the second MOV8rr
+ // instruction if the first one is coalesced. So it would be profitable to
+ // commute it:
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1029
+ // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
+
+ if (!MI->killsRegister(regC))
+ return false;
+
+ // Ok, we have something like:
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // let's see if it's worth commuting it.
+
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r0 = MOV %reg1026
+ // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
+ unsigned FromRegB = getMappedReg(regB, SrcRegMap);
+ unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+ unsigned ToRegB = getMappedReg(regB, DstRegMap);
+ unsigned ToRegC = getMappedReg(regC, DstRegMap);
+ if (!regsAreCompatible(FromRegB, ToRegB, TRI) &&
+ (regsAreCompatible(FromRegB, ToRegC, TRI) ||
+ regsAreCompatible(FromRegC, ToRegB, TRI)))
+ return true;
+
+ // If there is a use of regC between its last def (could be livein) and this
+ // instruction, then bail.
+ unsigned LastDefC = 0;
+ if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC))
+ return false;
+
+ // If there is a use of regB between its last def (could be livein) and this
+ // instruction, then go ahead and make this transformation.
+ unsigned LastDefB = 0;
+ if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB))
+ return true;
+
+ // Since there are no intervening uses for both registers, then commute
+ // if the def of regC is closer. Its live interval is shorter.
+ return LastDefB && LastDefC && LastDefC > LastDefB;
+}
+
+/// CommuteInstruction - Commute a two-address instruction and update the basic
+/// block, distance map, and live variables if needed. Return true if it is
+/// successful.
+bool
+TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned RegC, unsigned Dist) {
+ MachineInstr *MI = mi;
+ DOUT << "2addr: COMMUTING : " << *MI;
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+
+ if (NewMI == 0) {
+ DOUT << "2addr: COMMUTING FAILED!\n";
+ return false;
+ }
+
+ DOUT << "2addr: COMMUTED TO: " << *NewMI;
+ // If the instruction changed to commute it, update livevar.
+ if (NewMI != MI) {
+ if (LV)
+ // Update live variables
+ LV->replaceKillInstruction(RegC, MI, NewMI);
+
+ mbbi->insert(mi, NewMI); // Insert the new inst
+ mbbi->erase(mi); // Nuke the old inst.
+ mi = NewMI;
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ }
+
+ // Update source register map.
+ unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
+ if (FromRegC) {
+ unsigned RegA = MI->getOperand(0).getReg();
+ SrcRegMap[RegA] = FromRegC;
+ }
+
+ return true;
+}
+
+/// isProfitableToConv3Addr - Return true if it is profitable to convert the
+/// given 2-address instruction to a 3-address one.
+bool
+TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) {
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r2 = MOV %reg1026
+ // Turn ADD into a 3-address instruction to avoid a copy.
+ unsigned FromRegA = getMappedReg(RegA, SrcRegMap);
+ unsigned ToRegA = getMappedReg(RegA, DstRegMap);
+ return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI));
+}
+
+/// ConvertInstTo3Addr - Convert the specified two-address instruction into a
+/// three address one. Return true if this transformation was successful.
+bool
+TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ MachineFunction::iterator &mbbi,
+ unsigned RegB, unsigned Dist) {
+ MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
+ if (NewMI) {
+ DOUT << "2addr: CONVERTING 2-ADDR: " << *mi;
+ DOUT << "2addr: TO 3-ADDR: " << *NewMI;
+ bool Sunk = false;
+
+ if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+ // FIXME: Temporary workaround. If the new instruction doesn't
+ // uses RegB, convertToThreeAddress must have created more
+ // then one instruction.
+ Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi);
+
+ mbbi->erase(mi); // Nuke the old inst.
+
+ if (!Sunk) {
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ mi = NewMI;
+ nmi = next(mi);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// ProcessCopy - If the specified instruction is not yet processed, process it
+/// if it's a copy. For a copy instruction, we find the physical registers the
+/// source and destination registers might be mapped to. These are kept in
+/// point-to maps used to determine future optimizations. e.g.
+/// v1024 = mov r0
+/// v1025 = mov r1
+/// v1026 = add v1024, v1025
+/// r1 = mov r1026
+/// If 'add' is a two-address instruction, v1024, v1026 are both potentially
+/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
+/// potentially joined with r1 on the output side. It's worthwhile to commute
+/// 'add' to eliminate a copy.
+void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &Processed) {
+ if (Processed.count(MI))
+ return;
+
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return;
+
+ if (IsDstPhys && !IsSrcPhys)
+ DstRegMap.insert(std::make_pair(SrcReg, DstReg));
+ else if (!IsDstPhys && IsSrcPhys) {
+ bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
+ if (!isNew)
+ assert(SrcRegMap[DstReg] == SrcReg &&
+ "Can't map to two src physical registers!");
+
+ SmallVector<unsigned, 4> VirtRegPairs;
+ bool IsCopy = false;
+ unsigned NewReg = 0;
+ while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII,
+ IsCopy, NewReg, IsDstPhys)) {
+ if (IsCopy) {
+ if (!Processed.insert(UseMI))
+ break;
+ }
+
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end())
+ // Earlier in the same MBB.Reached via a back edge.
+ break;
+
+ if (IsDstPhys) {
+ VirtRegPairs.push_back(NewReg);
+ break;
+ }
+ bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second;
+ if (!isNew)
+ assert(SrcRegMap[NewReg] == DstReg &&
+ "Can't map to two src physical registers!");
+ VirtRegPairs.push_back(NewReg);
+ DstReg = NewReg;
+ }
+
+ if (!VirtRegPairs.empty()) {
+ unsigned ToReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ while (!VirtRegPairs.empty()) {
+ unsigned FromReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[FromReg] == ToReg &&
+ "Can't map to two dst physical registers!");
+ ToReg = FromReg;
+ }
+ }
+ }
+
+ Processed.insert(MI);
+}
+
+/// isSafeToDelete - If the specified instruction does not produce any side
+/// effects and all of its defs are dead, then it's safe to delete.
+static bool isSafeToDelete(MachineInstr *MI, unsigned Reg,
+ const TargetInstrInfo *TII,
+ SmallVector<unsigned, 4> &Kills) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.mayStore() || TID.isCall())
+ return false;
+ if (TID.isTerminator() || TID.hasUnmodeledSideEffects())
+ return false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef() && !MO.isDead())
+ return false;
+ if (MO.isUse() && MO.getReg() != Reg && MO.isKill())
+ Kills.push_back(MO.getReg());
+ }
+
+ return true;
+}
+
+/// runOnMachineFunction - Reduce two-address instructions to two operands.
+///
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+ DOUT << "Machine Function\n";
+ const TargetMachine &TM = MF.getTarget();
+ MRI = &MF.getRegInfo();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+
+ bool MadeChange = false;
+
+ DOUT << "********** REWRITING TWO-ADDR INSTRS **********\n";
+ DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+
+ // ReMatRegs - Keep track of the registers whose def's are remat'ed.
+ BitVector ReMatRegs;
+ ReMatRegs.resize(MRI->getLastVirtReg()+1);
+
+ SmallPtrSet<MachineInstr*, 8> Processed;
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ unsigned Dist = 0;
+ DistanceMap.clear();
+ SrcRegMap.clear();
+ DstRegMap.clear();
+ Processed.clear();
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me; ) {
+ MachineBasicBlock::iterator nmi = next(mi);
+ const TargetInstrDesc &TID = mi->getDesc();
+ bool FirstTied = true;
+
+ DistanceMap.insert(std::make_pair(mi, ++Dist));
+
+ ProcessCopy(&*mi, &*mbbi, Processed);
+
+ unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM)
+ ? mi->getNumOperands() : TID.getNumOperands();
+ for (unsigned si = 0; si < NumOps; ++si) {
+ unsigned ti = 0;
+ if (!mi->isRegTiedToDefOperand(si, &ti))
+ continue;
+
+ if (FirstTied) {
+ ++NumTwoAddressInstrs;
+ DOUT << '\t'; DEBUG(mi->print(*cerr.stream(), &TM));
+ }
+
+ FirstTied = false;
+
+ assert(mi->getOperand(si).isReg() && mi->getOperand(si).getReg() &&
+ mi->getOperand(si).isUse() && "two address instruction invalid");
+
+ // If the two operands are the same we just remove the use
+ // and mark the def as def&use, otherwise we have to insert a copy.
+ if (mi->getOperand(ti).getReg() != mi->getOperand(si).getReg()) {
+ // Rewrite:
+ // a = b op c
+ // to:
+ // a = b
+ // a = a op c
+ unsigned regA = mi->getOperand(ti).getReg();
+ unsigned regB = mi->getOperand(si).getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+ "cannot update physical register live information");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of a in the instruction (a =
+ // b + a for example) because our transformation will not work. This
+ // should never occur because we are in SSA form.
+ for (unsigned i = 0; i != mi->getNumOperands(); ++i)
+ assert(i == ti ||
+ !mi->getOperand(i).isReg() ||
+ mi->getOperand(i).getReg() != regA);
+#endif
+
+ // If this instruction is not the killing user of B, see if we can
+ // rearrange the code to make it so. Making it the killing user will
+ // allow us to coalesce A and B together, eliminating the copy we are
+ // about to insert.
+ if (!isKilled(*mi, regB, MRI, TII)) {
+ // If regA is dead and the instruction can be deleted, just delete
+ // it so it doesn't clobber regB.
+ SmallVector<unsigned, 4> Kills;
+ if (mi->getOperand(ti).isDead() &&
+ isSafeToDelete(mi, regB, TII, Kills)) {
+ SmallVector<std::pair<std::pair<unsigned, bool>
+ ,MachineInstr*>, 4> NewKills;
+ bool ReallySafe = true;
+ // If this instruction kills some virtual registers, we need
+ // update the kill information. If it's not possible to do so,
+ // then bail out.
+ while (!Kills.empty()) {
+ unsigned Kill = Kills.back();
+ Kills.pop_back();
+ if (TargetRegisterInfo::isPhysicalRegister(Kill)) {
+ ReallySafe = false;
+ break;
+ }
+ MachineInstr *LastKill = FindLastUseInMBB(Kill, &*mbbi, Dist);
+ if (LastKill) {
+ bool isModRef = LastKill->modifiesRegister(Kill);
+ NewKills.push_back(std::make_pair(std::make_pair(Kill,isModRef),
+ LastKill));
+ } else {
+ ReallySafe = false;
+ break;
+ }
+ }
+
+ if (ReallySafe) {
+ if (LV) {
+ while (!NewKills.empty()) {
+ MachineInstr *NewKill = NewKills.back().second;
+ unsigned Kill = NewKills.back().first.first;
+ bool isDead = NewKills.back().first.second;
+ NewKills.pop_back();
+ if (LV->removeVirtualRegisterKilled(Kill, mi)) {
+ if (isDead)
+ LV->addVirtualRegisterDead(Kill, NewKill);
+ else
+ LV->addVirtualRegisterKilled(Kill, NewKill);
+ }
+ }
+ }
+
+ // We're really going to nuke the old inst. If regB was marked
+ // as a kill we need to update its Kills list.
+ if (mi->getOperand(si).isKill())
+ LV->removeVirtualRegisterKilled(regB, mi);
+
+ mbbi->erase(mi); // Nuke the old inst.
+ mi = nmi;
+ ++NumDeletes;
+ break; // Done with this instruction.
+ }
+ }
+
+ // If this instruction is commutative, check to see if C dies. If
+ // so, swap the B and C operands. This makes the live ranges of A
+ // and C joinable.
+ // FIXME: This code also works for A := B op C instructions.
+ if (TID.isCommutable() && mi->getNumOperands() >= 3) {
+ assert(mi->getOperand(3-si).isReg() &&
+ "Not a proper commutative instruction!");
+ unsigned regC = mi->getOperand(3-si).getReg();
+ if (isKilled(*mi, regC, MRI, TII)) {
+ if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+ ++NumCommuted;
+ regB = regC;
+ goto InstructionRearranged;
+ }
+ }
+ }
+
+ // If this instruction is potentially convertible to a true
+ // three-address instruction,
+ if (TID.isConvertibleTo3Addr()) {
+ // FIXME: This assumes there are no more operands which are tied
+ // to another register.
+#ifndef NDEBUG
+ for (unsigned i = si + 1, e = TID.getNumOperands(); i < e; ++i)
+ assert(TID.getOperandConstraint(i, TOI::TIED_TO) == -1);
+#endif
+
+ if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+ ++NumConvertedTo3Addr;
+ break; // Done with this instruction.
+ }
+ }
+ }
+
+ // If it's profitable to commute the instruction, do so.
+ if (TID.isCommutable() && mi->getNumOperands() >= 3) {
+ unsigned regC = mi->getOperand(3-si).getReg();
+ if (isProfitableToCommute(regB, regC, mi, mbbi, Dist))
+ if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+ ++NumAggrCommuted;
+ ++NumCommuted;
+ regB = regC;
+ goto InstructionRearranged;
+ }
+ }
+
+ // If it's profitable to convert the 2-address instruction to a
+ // 3-address one, do so.
+ if (TID.isConvertibleTo3Addr() && isProfitableToConv3Addr(regA)) {
+ if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+ ++NumConvertedTo3Addr;
+ break; // Done with this instruction.
+ }
+ }
+
+ InstructionRearranged:
+ const TargetRegisterClass* rc = MRI->getRegClass(regB);
+ MachineInstr *DefMI = MRI->getVRegDef(regB);
+ // If it's safe and profitable, remat the definition instead of
+ // copying it.
+ if (DefMI &&
+ DefMI->getDesc().isAsCheapAsAMove() &&
+ DefMI->isSafeToReMat(TII, regB) &&
+ isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
+ DEBUG(cerr << "2addr: REMATTING : " << *DefMI << "\n");
+ TII->reMaterialize(*mbbi, mi, regA, DefMI);
+ ReMatRegs.set(regB);
+ ++NumReMats;
+ } else {
+ bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc);
+ (void)Emitted;
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ }
+
+ MachineBasicBlock::iterator prevMI = prior(mi);
+ // Update DistanceMap.
+ DistanceMap.insert(std::make_pair(prevMI, Dist));
+ DistanceMap[mi] = ++Dist;
+
+ // Update live variables for regB.
+ if (LV) {
+ if (LV->removeVirtualRegisterKilled(regB, mi))
+ LV->addVirtualRegisterKilled(regB, prevMI);
+
+ if (LV->removeVirtualRegisterDead(regB, mi))
+ LV->addVirtualRegisterDead(regB, prevMI);
+ }
+
+ DOUT << "\t\tprepend:\t"; DEBUG(prevMI->print(*cerr.stream(), &TM));
+
+ // Replace all occurences of regB with regA.
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ if (mi->getOperand(i).isReg() &&
+ mi->getOperand(i).getReg() == regB)
+ mi->getOperand(i).setReg(regA);
+ }
+ }
+
+ assert(mi->getOperand(ti).isDef() && mi->getOperand(si).isUse());
+ mi->getOperand(ti).setReg(mi->getOperand(si).getReg());
+ MadeChange = true;
+
+ DOUT << "\t\trewrite to:\t"; DEBUG(mi->print(*cerr.stream(), &TM));
+ }
+
+ mi = nmi;
+ }
+ }
+
+ // Some remat'ed instructions are dead.
+ int VReg = ReMatRegs.find_first();
+ while (VReg != -1) {
+ if (MRI->use_empty(VReg)) {
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ DefMI->eraseFromParent();
+ }
+ VReg = ReMatRegs.find_next(VReg);
+ }
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 0000000..c3b213c
--- /dev/null
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,199 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass. Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node. To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack. In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks. Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+namespace {
+ class VISIBILITY_HIDDEN UnreachableBlockElim : public FunctionPass {
+ virtual bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableBlockElim() : FunctionPass(&ID) {}
+ };
+}
+char UnreachableBlockElim::ID = 0;
+static RegisterPass<UnreachableBlockElim>
+X("unreachableblockelim", "Remove unreachable blocks from the CFG");
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+ return new UnreachableBlockElim();
+}
+
+bool UnreachableBlockElim::runOnFunction(Function &F) {
+ SmallPtrSet<BasicBlock*, 8> Reachable;
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I =
+ df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<BasicBlock*> DeadBlocks;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (!Reachable.count(I)) {
+ BasicBlock *BB = I;
+ DeadBlocks.push_back(BB);
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
+ BB->getInstList().pop_front();
+ }
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ // Actually remove the blocks now.
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+ DeadBlocks[i]->eraseFromParent();
+
+ return DeadBlocks.size();
+}
+
+
+namespace {
+ class VISIBILITY_HIDDEN UnreachableMachineBlockElim :
+ public MachineFunctionPass {
+ virtual bool runOnMachineFunction(MachineFunction &F);
+ MachineModuleInfo *MMI;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableMachineBlockElim() : MachineFunctionPass(&ID) {}
+ };
+}
+char UnreachableMachineBlockElim::ID = 0;
+
+static RegisterPass<UnreachableMachineBlockElim>
+Y("unreachable-mbb-elimination",
+ "Remove unreachable machine basic blocks");
+
+const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y;
+
+bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
+ SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
+ I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable);
+ I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<MachineBasicBlock*> DeadBlocks;
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+
+ // Test for deadness.
+ if (!Reachable.count(BB)) {
+ DeadBlocks.push_back(BB);
+
+ while (BB->succ_begin() != BB->succ_end()) {
+ MachineBasicBlock* succ = *BB->succ_begin();
+
+ MachineBasicBlock::iterator start = succ->begin();
+ while (start != succ->end() &&
+ start->getOpcode() == TargetInstrInfo::PHI) {
+ for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
+ if (start->getOperand(i).isMBB() &&
+ start->getOperand(i).getMBB() == BB) {
+ start->RemoveOperand(i);
+ start->RemoveOperand(i-1);
+ }
+
+ start++;
+ }
+
+ BB->removeSuccessor(BB->succ_begin());
+ }
+ }
+ }
+
+ // Actually remove the blocks now.
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = DeadBlocks[i];
+ // If there are any labels in the basic block, unregister them from
+ // MachineModuleInfo.
+ if (MMI && !MBB->empty()) {
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->end(); I != E; ++I) {
+ if (I->isLabel())
+ // The label ID # is always operand #0, an immediate.
+ MMI->InvalidateLabel(I->getOperand(0).getImm());
+ }
+ }
+ MBB->eraseFromParent();
+ }
+
+ // Cleanup PHI nodes.
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = I;
+ // Prune unneeded PHI entries.
+ SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
+ BB->pred_end());
+ MachineBasicBlock::iterator phi = BB->begin();
+ while (phi != BB->end() &&
+ phi->getOpcode() == TargetInstrInfo::PHI) {
+ for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
+ if (!preds.count(phi->getOperand(i).getMBB())) {
+ phi->RemoveOperand(i);
+ phi->RemoveOperand(i-1);
+ }
+
+ if (phi->getNumOperands() == 3) {
+ unsigned Input = phi->getOperand(1).getReg();
+ unsigned Output = phi->getOperand(0).getReg();
+
+ MachineInstr* temp = phi;
+ ++phi;
+ temp->eraseFromParent();
+
+ if (Input != Output)
+ F.getRegInfo().replaceRegWith(Output, Input);
+
+ continue;
+ }
+
+ ++phi;
+ }
+ }
+
+ F.RenumberBlocks();
+
+ return DeadBlocks.size();
+}
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 0000000..29637b9
--- /dev/null
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,269 @@
+//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregmap"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSpills , "Number of register spills");
+
+//===----------------------------------------------------------------------===//
+// VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+char VirtRegMap::ID = 0;
+
+static RegisterPass<VirtRegMap>
+X("virtregmap", "Virtual Register Map");
+
+bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+ TII = mf.getTarget().getInstrInfo();
+ TRI = mf.getTarget().getRegisterInfo();
+ MF = &mf;
+
+ ReMatId = MAX_STACK_SLOT+1;
+ LowSpillSlot = HighSpillSlot = NO_STACK_SLOT;
+
+ Virt2PhysMap.clear();
+ Virt2StackSlotMap.clear();
+ Virt2ReMatIdMap.clear();
+ Virt2SplitMap.clear();
+ Virt2SplitKillMap.clear();
+ ReMatMap.clear();
+ ImplicitDefed.clear();
+ SpillSlotToUsesMap.clear();
+ MI2VirtMap.clear();
+ SpillPt2VirtMap.clear();
+ RestorePt2VirtMap.clear();
+ EmergencySpillMap.clear();
+ EmergencySpillSlots.clear();
+
+ SpillSlotToUsesMap.resize(8);
+ ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1-
+ TargetRegisterInfo::FirstVirtualRegister);
+
+ allocatableRCRegs.clear();
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ allocatableRCRegs.insert(std::make_pair(*I,
+ TRI->getAllocatableSet(mf, *I)));
+
+ grow();
+
+ return false;
+}
+
+void VirtRegMap::grow() {
+ unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
+ Virt2PhysMap.grow(LastVirtReg);
+ Virt2StackSlotMap.grow(LastVirtReg);
+ Virt2ReMatIdMap.grow(LastVirtReg);
+ Virt2SplitMap.grow(LastVirtReg);
+ Virt2SplitKillMap.grow(LastVirtReg);
+ ReMatMap.grow(LastVirtReg);
+ ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1);
+}
+
+int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
+ int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+ if (LowSpillSlot == NO_STACK_SLOT)
+ LowSpillSlot = SS;
+ if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+ HighSpillSlot = SS;
+ unsigned Idx = SS-LowSpillSlot;
+ while (Idx >= SpillSlotToUsesMap.size())
+ SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+ Virt2StackSlotMap[virtReg] = SS;
+ ++NumSpills;
+ return SS;
+}
+
+void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ assert((SS >= 0 ||
+ (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+ "illegal fixed frame index");
+ Virt2StackSlotMap[virtReg] = SS;
+}
+
+int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign re-mat id to already spilled register");
+ Virt2ReMatIdMap[virtReg] = ReMatId;
+ return ReMatId++;
+}
+
+void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign re-mat id to already spilled register");
+ Virt2ReMatIdMap[virtReg] = id;
+}
+
+int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
+ std::map<const TargetRegisterClass*, int>::iterator I =
+ EmergencySpillSlots.find(RC);
+ if (I != EmergencySpillSlots.end())
+ return I->second;
+ int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+ if (LowSpillSlot == NO_STACK_SLOT)
+ LowSpillSlot = SS;
+ if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+ HighSpillSlot = SS;
+ EmergencySpillSlots[RC] = SS;
+ return SS;
+}
+
+void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
+ if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) {
+ // If FI < LowSpillSlot, this stack reference was produced by
+ // instruction selection and is not a spill
+ if (FI >= LowSpillSlot) {
+ assert(FI >= 0 && "Spill slot index should not be negative!");
+ assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+ && "Invalid spill slot");
+ SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI);
+ }
+ }
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
+ MachineInstr *NewMI, ModRef MRInfo) {
+ // Move previous memory references folded to new instruction.
+ MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
+ for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
+ E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
+ MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
+ MI2VirtMap.erase(I++);
+ }
+
+ // add new memory reference
+ MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) {
+ MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI);
+ MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (MF->getFrameInfo()->isFixedObjectIndex(FI))
+ continue;
+ // This stack reference was produced by instruction selection and
+ // is not a spill
+ if (FI < LowSpillSlot)
+ continue;
+ assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+ && "Invalid spill slot");
+ SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI);
+ }
+ MI2VirtMap.erase(MI);
+ SpillPt2VirtMap.erase(MI);
+ RestorePt2VirtMap.erase(MI);
+ EmergencySpillMap.erase(MI);
+}
+
+/// FindUnusedRegisters - Gather a list of allocatable registers that
+/// have not been allocated to any virtual register.
+bool VirtRegMap::FindUnusedRegisters(const TargetRegisterInfo *TRI,
+ LiveIntervals* LIs) {
+ unsigned NumRegs = TRI->getNumRegs();
+ UnusedRegs.reset();
+ UnusedRegs.resize(NumRegs);
+
+ BitVector Used(NumRegs);
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
+ if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
+ Used.set(Virt2PhysMap[i]);
+
+ BitVector Allocatable = TRI->getAllocatableSet(*MF);
+ bool AnyUnused = false;
+ for (unsigned Reg = 1; Reg < NumRegs; ++Reg) {
+ if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) {
+ bool ReallyUnused = true;
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ if (Used[*AS] || LIs->hasInterval(*AS)) {
+ ReallyUnused = false;
+ break;
+ }
+ }
+ if (ReallyUnused) {
+ AnyUnused = true;
+ UnusedRegs.set(Reg);
+ }
+ }
+ }
+
+ return AnyUnused;
+}
+
+void VirtRegMap::print(std::ostream &OS, const Module* M) const {
+ const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+
+ OS << "********** REGISTER MAP **********\n";
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) {
+ if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
+ OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i])
+ << "]\n";
+ }
+
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
+ if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
+ OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n";
+ OS << '\n';
+}
+
+void VirtRegMap::dump() const {
+ print(cerr);
+}
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
new file mode 100644
index 0000000..507557d
--- /dev/null
+++ b/lib/CodeGen/VirtRegMap.h
@@ -0,0 +1,495 @@
+//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a virtual register map. This maps virtual registers to
+// physical registers and virtual registers to stack slots. It is created and
+// updated by a register allocator and then used by a machine code rewriter that
+// adds spill code and rewrites virtual into physical register references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGMAP_H
+#define LLVM_CODEGEN_VIRTREGMAP_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Streams.h"
+#include <map>
+
+namespace llvm {
+ class LiveIntervals;
+ class MachineInstr;
+ class MachineFunction;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
+
+ class VirtRegMap : public MachineFunctionPass {
+ public:
+ enum {
+ NO_PHYS_REG = 0,
+ NO_STACK_SLOT = (1L << 30)-1,
+ MAX_STACK_SLOT = (1L << 18)-1
+ };
+
+ enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
+ typedef std::multimap<MachineInstr*,
+ std::pair<unsigned, ModRef> > MI2VirtMapTy;
+
+ private:
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineFunction *MF;
+
+ DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs;
+
+ /// Virt2PhysMap - This is a virtual to physical register
+ /// mapping. Each virtual register is required to have an entry in
+ /// it; even spilled virtual registers (the register mapped to a
+ /// spilled register is the temporary used to load it from the
+ /// stack).
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
+
+ /// Virt2StackSlotMap - This is virtual register to stack slot
+ /// mapping. Each spilled virtual register has an entry in it
+ /// which corresponds to the stack slot this register is spilled
+ /// at.
+ IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
+
+ /// Virt2ReMatIdMap - This is virtual register to rematerialization id
+ /// mapping. Each spilled virtual register that should be remat'd has an
+ /// entry in it which corresponds to the remat id.
+ IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap;
+
+ /// Virt2SplitMap - This is virtual register to splitted virtual register
+ /// mapping.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
+
+ /// Virt2SplitKillMap - This is splitted virtual register to its last use
+ /// (kill) index mapping.
+ IndexedMap<unsigned> Virt2SplitKillMap;
+
+ /// ReMatMap - This is virtual register to re-materialized instruction
+ /// mapping. Each virtual register whose definition is going to be
+ /// re-materialized has an entry in it.
+ IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap;
+
+ /// MI2VirtMap - This is MachineInstr to virtual register
+ /// mapping. In the case of memory spill code being folded into
+ /// instructions, we need to know which virtual register was
+ /// read/written by this instruction.
+ MI2VirtMapTy MI2VirtMap;
+
+ /// SpillPt2VirtMap - This records the virtual registers which should
+ /// be spilled right after the MachineInstr due to live interval
+ /// splitting.
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >
+ SpillPt2VirtMap;
+
+ /// RestorePt2VirtMap - This records the virtual registers which should
+ /// be restored right before the MachineInstr due to live interval
+ /// splitting.
+ std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap;
+
+ /// EmergencySpillMap - This records the physical registers that should
+ /// be spilled / restored around the MachineInstr since the register
+ /// allocator has run out of registers.
+ std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap;
+
+ /// EmergencySpillSlots - This records emergency spill slots used to
+ /// spill physical registers when the register allocator runs out of
+ /// registers. Ideally only one stack slot is used per function per
+ /// register class.
+ std::map<const TargetRegisterClass*, int> EmergencySpillSlots;
+
+ /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
+ /// virtual register, an unique id is being assigned. This keeps track of
+ /// the highest id used so far. Note, this starts at (1<<18) to avoid
+ /// conflicts with stack slot numbers.
+ int ReMatId;
+
+ /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes.
+ int LowSpillSlot, HighSpillSlot;
+
+ /// SpillSlotToUsesMap - Records uses for each register spill slot.
+ SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap;
+
+ /// ImplicitDefed - One bit for each virtual register. If set it indicates
+ /// the register is implicitly defined.
+ BitVector ImplicitDefed;
+
+ /// UnusedRegs - A list of physical registers that have not been used.
+ BitVector UnusedRegs;
+
+ VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT
+ void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
+
+ public:
+ static char ID;
+ VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG),
+ Virt2StackSlotMap(NO_STACK_SLOT),
+ Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
+ Virt2SplitKillMap(0), ReMatMap(NULL),
+ ReMatId(MAX_STACK_SLOT+1),
+ LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ void grow();
+
+ /// @brief returns true if the specified virtual register is
+ /// mapped to a physical register
+ bool hasPhys(unsigned virtReg) const {
+ return getPhys(virtReg) != NO_PHYS_REG;
+ }
+
+ /// @brief returns the physical register mapped to the specified
+ /// virtual register
+ unsigned getPhys(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2PhysMap[virtReg];
+ }
+
+ /// @brief creates a mapping for the specified virtual register to
+ /// the specified physical register
+ void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(physReg));
+ assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+ "attempt to assign physical register to already mapped "
+ "virtual register");
+ Virt2PhysMap[virtReg] = physReg;
+ }
+
+ /// @brief clears the specified virtual register's, physical
+ /// register mapping
+ void clearVirt(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
+ "attempt to clear a not assigned virtual register");
+ Virt2PhysMap[virtReg] = NO_PHYS_REG;
+ }
+
+ /// @brief clears all virtual to physical register mappings
+ void clearAllVirt() {
+ Virt2PhysMap.clear();
+ grow();
+ }
+
+ /// @brief records virtReg is a split live interval from SReg.
+ void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
+ Virt2SplitMap[virtReg] = SReg;
+ }
+
+ /// @brief returns the live interval virtReg is split from.
+ unsigned getPreSplitReg(unsigned virtReg) {
+ return Virt2SplitMap[virtReg];
+ }
+
+ /// @brief returns true if the specified virtual register is not
+ /// mapped to a stack slot or rematerialized.
+ bool isAssignedReg(unsigned virtReg) const {
+ if (getStackSlot(virtReg) == NO_STACK_SLOT &&
+ getReMatId(virtReg) == NO_STACK_SLOT)
+ return true;
+ // Split register can be assigned a physical register as well as a
+ // stack slot or remat id.
+ return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
+ }
+
+ /// @brief returns the stack slot mapped to the specified virtual
+ /// register
+ int getStackSlot(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2StackSlotMap[virtReg];
+ }
+
+ /// @brief returns the rematerialization id mapped to the specified virtual
+ /// register
+ int getReMatId(unsigned virtReg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2ReMatIdMap[virtReg];
+ }
+
+ /// @brief create a mapping for the specifed virtual register to
+ /// the next available stack slot
+ int assignVirt2StackSlot(unsigned virtReg);
+ /// @brief create a mapping for the specified virtual register to
+ /// the specified stack slot
+ void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
+
+ /// @brief assign an unique re-materialization id to the specified
+ /// virtual register.
+ int assignVirtReMatId(unsigned virtReg);
+ /// @brief assign an unique re-materialization id to the specified
+ /// virtual register.
+ void assignVirtReMatId(unsigned virtReg, int id);
+
+ /// @brief returns true if the specified virtual register is being
+ /// re-materialized.
+ bool isReMaterialized(unsigned virtReg) const {
+ return ReMatMap[virtReg] != NULL;
+ }
+
+ /// @brief returns the original machine instruction being re-issued
+ /// to re-materialize the specified virtual register.
+ MachineInstr *getReMaterializedMI(unsigned virtReg) const {
+ return ReMatMap[virtReg];
+ }
+
+ /// @brief records the specified virtual register will be
+ /// re-materialized and the original instruction which will be re-issed
+ /// for this purpose. If parameter all is true, then all uses of the
+ /// registers are rematerialized and it's safe to delete the definition.
+ void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
+ ReMatMap[virtReg] = def;
+ }
+
+ /// @brief record the last use (kill) of a split virtual register.
+ void addKillPoint(unsigned virtReg, unsigned index) {
+ Virt2SplitKillMap[virtReg] = index;
+ }
+
+ unsigned getKillPoint(unsigned virtReg) const {
+ return Virt2SplitKillMap[virtReg];
+ }
+
+ /// @brief remove the last use (kill) of a split virtual register.
+ void removeKillPoint(unsigned virtReg) {
+ Virt2SplitKillMap[virtReg] = 0;
+ }
+
+ /// @brief returns true if the specified MachineInstr is a spill point.
+ bool isSpillPt(MachineInstr *Pt) const {
+ return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end();
+ }
+
+ /// @brief returns the virtual registers that should be spilled due to
+ /// splitting right after the specified MachineInstr.
+ std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) {
+ return SpillPt2VirtMap[Pt];
+ }
+
+ /// @brief records the specified MachineInstr as a spill point for virtReg.
+ void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) {
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+ I = SpillPt2VirtMap.find(Pt);
+ if (I != SpillPt2VirtMap.end())
+ I->second.push_back(std::make_pair(virtReg, isKill));
+ else {
+ std::vector<std::pair<unsigned,bool> > Virts;
+ Virts.push_back(std::make_pair(virtReg, isKill));
+ SpillPt2VirtMap.insert(std::make_pair(Pt, Virts));
+ }
+ }
+
+ /// @brief - transfer spill point information from one instruction to
+ /// another.
+ void transferSpillPts(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+ I = SpillPt2VirtMap.find(Old);
+ if (I == SpillPt2VirtMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back().first;
+ bool isKill = I->second.back().second;
+ I->second.pop_back();
+ addSpillPoint(virtReg, isKill, New);
+ }
+ SpillPt2VirtMap.erase(I);
+ }
+
+ /// @brief returns true if the specified MachineInstr is a restore point.
+ bool isRestorePt(MachineInstr *Pt) const {
+ return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end();
+ }
+
+ /// @brief returns the virtual registers that should be restoreed due to
+ /// splitting right after the specified MachineInstr.
+ std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) {
+ return RestorePt2VirtMap[Pt];
+ }
+
+ /// @brief records the specified MachineInstr as a restore point for virtReg.
+ void addRestorePoint(unsigned virtReg, MachineInstr *Pt) {
+ std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+ RestorePt2VirtMap.find(Pt);
+ if (I != RestorePt2VirtMap.end())
+ I->second.push_back(virtReg);
+ else {
+ std::vector<unsigned> Virts;
+ Virts.push_back(virtReg);
+ RestorePt2VirtMap.insert(std::make_pair(Pt, Virts));
+ }
+ }
+
+ /// @brief - transfer restore point information from one instruction to
+ /// another.
+ void transferRestorePts(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+ RestorePt2VirtMap.find(Old);
+ if (I == RestorePt2VirtMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back();
+ I->second.pop_back();
+ addRestorePoint(virtReg, New);
+ }
+ RestorePt2VirtMap.erase(I);
+ }
+
+ /// @brief records that the specified physical register must be spilled
+ /// around the specified machine instr.
+ void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) {
+ if (EmergencySpillMap.find(MI) != EmergencySpillMap.end())
+ EmergencySpillMap[MI].push_back(PhysReg);
+ else {
+ std::vector<unsigned> PhysRegs;
+ PhysRegs.push_back(PhysReg);
+ EmergencySpillMap.insert(std::make_pair(MI, PhysRegs));
+ }
+ }
+
+ /// @brief returns true if one or more physical registers must be spilled
+ /// around the specified instruction.
+ bool hasEmergencySpills(MachineInstr *MI) const {
+ return EmergencySpillMap.find(MI) != EmergencySpillMap.end();
+ }
+
+ /// @brief returns the physical registers to be spilled and restored around
+ /// the instruction.
+ std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) {
+ return EmergencySpillMap[MI];
+ }
+
+ /// @brief - transfer emergency spill information from one instruction to
+ /// another.
+ void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) {
+ std::map<MachineInstr*,std::vector<unsigned> >::iterator I =
+ EmergencySpillMap.find(Old);
+ if (I == EmergencySpillMap.end())
+ return;
+ while (!I->second.empty()) {
+ unsigned virtReg = I->second.back();
+ I->second.pop_back();
+ addEmergencySpill(virtReg, New);
+ }
+ EmergencySpillMap.erase(I);
+ }
+
+ /// @brief return or get a emergency spill slot for the register class.
+ int getEmergencySpillSlot(const TargetRegisterClass *RC);
+
+ /// @brief Return lowest spill slot index.
+ int getLowSpillSlot() const {
+ return LowSpillSlot;
+ }
+
+ /// @brief Return highest spill slot index.
+ int getHighSpillSlot() const {
+ return HighSpillSlot;
+ }
+
+ /// @brief Records a spill slot use.
+ void addSpillSlotUse(int FrameIndex, MachineInstr *MI);
+
+ /// @brief Returns true if spill slot has been used.
+ bool isSpillSlotUsed(int FrameIndex) const {
+ assert(FrameIndex >= 0 && "Spill slot index should not be negative!");
+ return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty();
+ }
+
+ /// @brief Mark the specified register as being implicitly defined.
+ void setIsImplicitlyDefined(unsigned VirtReg) {
+ ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister);
+ }
+
+ /// @brief Returns true if the virtual register is implicitly defined.
+ bool isImplicitlyDefined(unsigned VirtReg) const {
+ return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister];
+ }
+
+ /// @brief Updates information about the specified virtual register's value
+ /// folded into newMI machine instruction.
+ void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI,
+ ModRef MRInfo);
+
+ /// @brief Updates information about the specified virtual register's value
+ /// folded into the specified machine instruction.
+ void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo);
+
+ /// @brief returns the virtual registers' values folded in memory
+ /// operands of this instruction
+ std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
+ getFoldedVirts(MachineInstr* MI) const {
+ return MI2VirtMap.equal_range(MI);
+ }
+
+ /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the
+ /// the folded instruction map and spill point map.
+ void RemoveMachineInstrFromMaps(MachineInstr *MI);
+
+ /// FindUnusedRegisters - Gather a list of allocatable registers that
+ /// have not been allocated to any virtual register.
+ bool FindUnusedRegisters(const TargetRegisterInfo *TRI,
+ LiveIntervals* LIs);
+
+ /// HasUnusedRegisters - Return true if there are any allocatable registers
+ /// that have not been allocated to any virtual register.
+ bool HasUnusedRegisters() const {
+ return !UnusedRegs.none();
+ }
+
+ /// setRegisterUsed - Remember the physical register is now used.
+ void setRegisterUsed(unsigned Reg) {
+ UnusedRegs.reset(Reg);
+ }
+
+ /// isRegisterUnused - Return true if the physical register has not been
+ /// used.
+ bool isRegisterUnused(unsigned Reg) const {
+ return UnusedRegs[Reg];
+ }
+
+ /// getFirstUnusedRegister - Return the first physical register that has not
+ /// been used.
+ unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) {
+ int Reg = UnusedRegs.find_first();
+ while (Reg != -1) {
+ if (allocatableRCRegs[RC][Reg])
+ return (unsigned)Reg;
+ Reg = UnusedRegs.find_next(Reg);
+ }
+ return 0;
+ }
+
+ void print(std::ostream &OS, const Module* M = 0) const;
+ void print(std::ostream *OS) const { if (OS) print(*OS); }
+ void dump() const;
+ };
+
+ inline std::ostream *operator<<(std::ostream *OS, const VirtRegMap &VRM) {
+ VRM.print(OS);
+ return OS;
+ }
+ inline std::ostream &operator<<(std::ostream &OS, const VirtRegMap &VRM) {
+ VRM.print(OS);
+ return OS;
+ }
+} // End llvm namespace
+
+#endif
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
new file mode 100644
index 0000000..b4c8bc1
--- /dev/null
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -0,0 +1,2225 @@
+//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregrewriter"
+#include "VirtRegRewriter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDSE , "Number of dead stores elided");
+STATISTIC(NumDSS , "Number of dead spill slots removed");
+STATISTIC(NumCommutes, "Number of instructions commuted");
+STATISTIC(NumDRM , "Number of re-materializable defs elided");
+STATISTIC(NumStores , "Number of stores added");
+STATISTIC(NumPSpills , "Number of physical register spills");
+STATISTIC(NumOmitted , "Number of reloads omited");
+STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
+STATISTIC(NumCopified, "Number of available reloads turned into copies");
+STATISTIC(NumReMats , "Number of re-materialization");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumReused , "Number of values reused");
+STATISTIC(NumDCE , "Number of copies elided");
+STATISTIC(NumSUnfold , "Number of stores unfolded");
+STATISTIC(NumModRefUnfold, "Number of modref unfolded");
+
+namespace {
+ enum RewriterName { simple, local, trivial };
+}
+
+static cl::opt<RewriterName>
+RewriterOpt("rewriter",
+ cl::desc("Rewriter to use: (default: local)"),
+ cl::Prefix,
+ cl::values(clEnumVal(simple, "simple rewriter"),
+ clEnumVal(local, "local rewriter"),
+ clEnumVal(trivial, "trivial rewriter"),
+ clEnumValEnd),
+ cl::init(local));
+
+VirtRegRewriter::~VirtRegRewriter() {}
+
+
+// ****************************** //
+// Simple Spiller Implementation //
+// ****************************** //
+
+struct VISIBILITY_HIDDEN SimpleRewriter : public VirtRegRewriter {
+
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) {
+ DOUT << "********** REWRITE MACHINE CODE **********\n";
+ DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+ const TargetMachine &TM = MF.getTarget();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+
+ // LoadedRegs - Keep track of which vregs are loaded, so that we only load
+ // each vreg once (in the case where a spilled vreg is used by multiple
+ // operands). This is always smaller than the number of operands to the
+ // current machine instr, so it should be small.
+ std::vector<unsigned> LoadedRegs;
+
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ DOUT << MBBI->getBasicBlock()->getName() << ":\n";
+ MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+ MII != E; ++MII) {
+ MachineInstr &MI = *MII;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && MO.getReg()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned VirtReg = MO.getReg();
+ unsigned SubIdx = MO.getSubReg();
+ unsigned PhysReg = VRM.getPhys(VirtReg);
+ unsigned RReg = SubIdx ? TRI.getSubReg(PhysReg, SubIdx) : PhysReg;
+ if (!VRM.isAssignedReg(VirtReg)) {
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ const TargetRegisterClass* RC =
+ MF.getRegInfo().getRegClass(VirtReg);
+
+ if (MO.isUse() &&
+ std::find(LoadedRegs.begin(), LoadedRegs.end(), VirtReg)
+ == LoadedRegs.end()) {
+ TII.loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC);
+ MachineInstr *LoadMI = prior(MII);
+ VRM.addSpillSlotUse(StackSlot, LoadMI);
+ LoadedRegs.push_back(VirtReg);
+ ++NumLoads;
+ DOUT << '\t' << *LoadMI;
+ }
+
+ if (MO.isDef()) {
+ TII.storeRegToStackSlot(MBB, next(MII), PhysReg, true,
+ StackSlot, RC);
+ MachineInstr *StoreMI = next(MII);
+ VRM.addSpillSlotUse(StackSlot, StoreMI);
+ ++NumStores;
+ }
+ }
+ MF.getRegInfo().setPhysRegUsed(RReg);
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ } else {
+ MF.getRegInfo().setPhysRegUsed(MO.getReg());
+ }
+ }
+ }
+
+ DOUT << '\t' << MI;
+ LoadedRegs.clear();
+ }
+ }
+ return true;
+ }
+
+};
+
+/// This class is intended for use with the new spilling framework only. It
+/// rewrites vreg def/uses to use the assigned preg, but does not insert any
+/// spill code.
+struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter {
+
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) {
+ DOUT << "********** REWRITE MACHINE CODE **********\n";
+ DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+ MachineRegisterInfo *mri = &MF.getRegInfo();
+
+ bool changed = false;
+
+ for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end();
+ liItr != liEnd; ++liItr) {
+
+ if (TargetRegisterInfo::isVirtualRegister(liItr->first)) {
+ if (VRM.hasPhys(liItr->first)) {
+ unsigned preg = VRM.getPhys(liItr->first);
+ mri->replaceRegWith(liItr->first, preg);
+ mri->setPhysRegUsed(preg);
+ changed = true;
+ }
+ }
+ else {
+ if (!liItr->second->empty()) {
+ mri->setPhysRegUsed(liItr->first);
+ }
+ }
+ }
+
+ return changed;
+ }
+
+};
+
+// ************************************************************************ //
+
+/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
+/// from top down, keep track of which spill slots or remat are available in
+/// each register.
+///
+/// Note that not all physregs are created equal here. In particular, some
+/// physregs are reloads that we are allowed to clobber or ignore at any time.
+/// Other physregs are values that the register allocated program is using
+/// that we cannot CHANGE, but we can read if we like. We keep track of this
+/// on a per-stack-slot / remat id basis as the low bit in the value of the
+/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks
+/// this bit and addAvailable sets it if.
+class VISIBILITY_HIDDEN AvailableSpills {
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled
+ // or remat'ed virtual register values that are still available, due to
+ // being loaded or stored to, but not invalidated yet.
+ std::map<int, unsigned> SpillSlotsOrReMatsAvailable;
+
+ // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable,
+ // indicating which stack slot values are currently held by a physreg. This
+ // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a
+ // physreg is modified.
+ std::multimap<unsigned, int> PhysRegsAvailable;
+
+ void disallowClobberPhysRegOnly(unsigned PhysReg);
+
+ void ClobberPhysRegOnly(unsigned PhysReg);
+public:
+ AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii)
+ : TRI(tri), TII(tii) {
+ }
+
+ /// clear - Reset the state.
+ void clear() {
+ SpillSlotsOrReMatsAvailable.clear();
+ PhysRegsAvailable.clear();
+ }
+
+ const TargetRegisterInfo *getRegInfo() const { return TRI; }
+
+ /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is
+ /// available in a physical register, return that PhysReg, otherwise
+ /// return 0.
+ unsigned getSpillSlotOrReMatPhysReg(int Slot) const {
+ std::map<int, unsigned>::const_iterator I =
+ SpillSlotsOrReMatsAvailable.find(Slot);
+ if (I != SpillSlotsOrReMatsAvailable.end()) {
+ return I->second >> 1; // Remove the CanClobber bit.
+ }
+ return 0;
+ }
+
+ /// addAvailable - Mark that the specified stack slot / remat is available
+ /// in the specified physreg. If CanClobber is true, the physreg can be
+ /// modified at any time without changing the semantics of the program.
+ void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
+ // If this stack slot is thought to be available in some other physreg,
+ // remove its record.
+ ModifyStackSlotOrReMat(SlotOrReMat);
+
+ PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat));
+ SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) |
+ (unsigned)CanClobber;
+
+ if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "Remembering RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Remembering SS#" << SlotOrReMat;
+ DOUT << " in physreg " << TRI->getName(Reg) << "\n";
+ }
+
+ /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
+ /// the value of the specified stackslot register if it desires. The
+ /// specified stack slot must be available in a physreg for this query to
+ /// make sense.
+ bool canClobberPhysRegForSS(int SlotOrReMat) const {
+ assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) &&
+ "Value not available!");
+ return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1;
+ }
+
+ /// canClobberPhysReg - Return true if the spiller is allowed to clobber the
+ /// physical register where values for some stack slot(s) might be
+ /// available.
+ bool canClobberPhysReg(unsigned PhysReg) const {
+ std::multimap<unsigned, int>::const_iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ I++;
+ if (!canClobberPhysRegForSS(SlotOrReMat))
+ return false;
+ }
+ return true;
+ }
+
+ /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+ /// stackslot register. The register is still available but is no longer
+ /// allowed to be modifed.
+ void disallowClobberPhysReg(unsigned PhysReg);
+
+ /// ClobberPhysReg - This is called when the specified physreg changes
+ /// value. We use this to invalidate any info about stuff that lives in
+ /// it and any of its aliases.
+ void ClobberPhysReg(unsigned PhysReg);
+
+ /// ModifyStackSlotOrReMat - This method is called when the value in a stack
+ /// slot changes. This removes information about which register the
+ /// previous value for this slot lives in (as the previous value is dead
+ /// now).
+ void ModifyStackSlotOrReMat(int SlotOrReMat);
+
+ /// AddAvailableRegsToLiveIn - Availability information is being kept coming
+ /// into the specified MBB. Add available physical registers as potential
+ /// live-in's. If they are reused in the MBB, they will be added to the
+ /// live-in set to make register scavenger and post-allocation scheduler.
+ void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps);
+};
+
+// ************************************************************************ //
+
+// ReusedOp - For each reused operand, we keep track of a bit of information,
+// in case we need to rollback upon processing a new operand. See comments
+// below.
+struct ReusedOp {
+ // The MachineInstr operand that reused an available value.
+ unsigned Operand;
+
+ // StackSlotOrReMat - The spill slot or remat id of the value being reused.
+ unsigned StackSlotOrReMat;
+
+ // PhysRegReused - The physical register the value was available in.
+ unsigned PhysRegReused;
+
+ // AssignedPhysReg - The physreg that was assigned for use by the reload.
+ unsigned AssignedPhysReg;
+
+ // VirtReg - The virtual register itself.
+ unsigned VirtReg;
+
+ ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
+ unsigned vreg)
+ : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr),
+ AssignedPhysReg(apr), VirtReg(vreg) {}
+};
+
+/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
+/// is reused instead of reloaded.
+class VISIBILITY_HIDDEN ReuseInfo {
+ MachineInstr &MI;
+ std::vector<ReusedOp> Reuses;
+ BitVector PhysRegsClobbered;
+public:
+ ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
+ PhysRegsClobbered.resize(tri->getNumRegs());
+ }
+
+ bool hasReuses() const {
+ return !Reuses.empty();
+ }
+
+ /// addReuse - If we choose to reuse a virtual register that is already
+ /// available instead of reloading it, remember that we did so.
+ void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
+ unsigned PhysRegReused, unsigned AssignedPhysReg,
+ unsigned VirtReg) {
+ // If the reload is to the assigned register anyway, no undo will be
+ // required.
+ if (PhysRegReused == AssignedPhysReg) return;
+
+ // Otherwise, remember this.
+ Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
+ AssignedPhysReg, VirtReg));
+ }
+
+ void markClobbered(unsigned PhysReg) {
+ PhysRegsClobbered.set(PhysReg);
+ }
+
+ bool isClobbered(unsigned PhysReg) const {
+ return PhysRegsClobbered.test(PhysReg);
+ }
+
+ /// GetRegForReload - We are about to emit a reload into PhysReg. If there
+ /// is some other operand that is using the specified register, either pick
+ /// a new register to use, or evict the previous reload and use this reg.
+ unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ SmallSet<unsigned, 8> &Rejected,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM);
+
+ /// GetRegForReload - Helper for the above GetRegForReload(). Add a
+ /// 'Rejected' set to remember which registers have been considered and
+ /// rejected for the reload. This avoids infinite looping in case like
+ /// this:
+ /// t1 := op t2, t3
+ /// t2 <- assigned r0 for use by the reload but ended up reuse r1
+ /// t3 <- assigned r1 for use by the reload but ended up reuse r0
+ /// t1 <- desires r1
+ /// sees r1 is taken by t2, tries t2's reload register r0
+ /// sees r0 is taken by t3, tries t3's reload register r1
+ /// sees r1 is taken by t2, tries t2's reload register r0 ...
+ unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ SmallSet<unsigned, 8> Rejected;
+ return GetRegForReload(PhysReg, MI, Spills, MaybeDeadStores, Rejected,
+ RegKills, KillOps, VRM);
+ }
+};
+
+
+// ****************** //
+// Utility Functions //
+// ****************** //
+
+/// findSinglePredSuccessor - Return via reference a vector of machine basic
+/// blocks each of which is a successor of the specified BB and has no other
+/// predecessor.
+static void findSinglePredSuccessor(MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineBasicBlock *> &Succs) {
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->pred_size() == 1)
+ Succs.push_back(SuccMBB);
+ }
+}
+
+/// InvalidateKill - Invalidate register kill information for a specific
+/// register. This also unsets the kills marker on the last kill operand.
+static void InvalidateKill(unsigned Reg,
+ const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ if (RegKills[Reg]) {
+ KillOps[Reg]->setIsKill(false);
+ KillOps[Reg] = NULL;
+ RegKills.reset(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ if (RegKills[*SR]) {
+ KillOps[*SR]->setIsKill(false);
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+ }
+ }
+}
+
+/// InvalidateKills - MI is going to be deleted. If any of its operands are
+/// marked kill, then invalidate the information.
+static void InvalidateKills(MachineInstr &MI,
+ const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ SmallVector<unsigned, 2> *KillRegs = NULL) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (KillRegs)
+ KillRegs->push_back(Reg);
+ assert(Reg < KillOps.size());
+ if (KillOps[Reg] == &MO) {
+ KillOps[Reg] = NULL;
+ RegKills.reset(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ if (RegKills[*SR]) {
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+ }
+ }
+ }
+}
+
+/// InvalidateRegDef - If the def operand of the specified def MI is now dead
+/// (since it's spill instruction is removed), mark it isDead. Also checks if
+/// the def MI has other definition operands that are not dead. Returns it by
+/// reference.
+static bool InvalidateRegDef(MachineBasicBlock::iterator I,
+ MachineInstr &NewDef, unsigned Reg,
+ bool &HasLiveDef) {
+ // Due to remat, it's possible this reg isn't being reused. That is,
+ // the def of this reg (by prev MI) is now dead.
+ MachineInstr *DefMI = I;
+ MachineOperand *DefOp = NULL;
+ for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = DefMI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ if (MO.getReg() == Reg)
+ DefOp = &MO;
+ else if (!MO.isDead())
+ HasLiveDef = true;
+ }
+ }
+ if (!DefOp)
+ return false;
+
+ bool FoundUse = false, Done = false;
+ MachineBasicBlock::iterator E = &NewDef;
+ ++I; ++E;
+ for (; !Done && I != E; ++I) {
+ MachineInstr *NMI = I;
+ for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
+ MachineOperand &MO = NMI->getOperand(j);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (MO.isUse())
+ FoundUse = true;
+ Done = true; // Stop after scanning all the operands of this MI.
+ }
+ }
+ if (!FoundUse) {
+ // Def is dead!
+ DefOp->setIsDead();
+ return true;
+ }
+ return false;
+}
+
+/// UpdateKills - Track and update kill info. If a MI reads a register that is
+/// marked kill, then it must be due to register reuse. Transfer the kill info
+/// over.
+static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
+ // That can't be right. Register is killed but not re-defined and it's
+ // being reused. Let's fix that.
+ KillOps[Reg]->setIsKill(false);
+ KillOps[Reg] = NULL;
+ RegKills.reset(Reg);
+ if (!MI.isRegTiedToDefOperand(i))
+ // Unless it's a two-address operand, this is the new kill.
+ MO.setIsKill();
+ }
+ if (MO.isKill()) {
+ RegKills.set(Reg);
+ KillOps[Reg] = &MO;
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ RegKills.set(*SR);
+ KillOps[*SR] = &MO;
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ RegKills.reset(Reg);
+ KillOps[Reg] = NULL;
+ // It also defines (or partially define) aliases.
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ RegKills.reset(*SR);
+ KillOps[*SR] = NULL;
+ }
+ }
+}
+
+/// ReMaterialize - Re-materialize definition for Reg targetting DestReg.
+///
+static void ReMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ unsigned DestReg, unsigned Reg,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM) {
+ TII->reMaterialize(MBB, MII, DestReg, VRM.getReMaterializedMI(Reg));
+ MachineInstr *NewMI = prior(MII);
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned VirtReg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
+ continue;
+ assert(MO.isUse());
+ unsigned SubIdx = MO.getSubReg();
+ unsigned Phys = VRM.getPhys(VirtReg);
+ assert(Phys);
+ unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys;
+ MO.setReg(RReg);
+ MO.setSubReg(0);
+ }
+ ++NumReMats;
+}
+
+/// findSuperReg - Find the SubReg's super-register of given register class
+/// where its SubIdx sub-register is SubReg.
+static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
+ unsigned SubIdx, const TargetRegisterInfo *TRI) {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ unsigned Reg = *I;
+ if (TRI->getSubReg(Reg, SubIdx) == SubReg)
+ return Reg;
+ }
+ return 0;
+}
+
+// ******************************** //
+// Available Spills Implementation //
+// ******************************** //
+
+/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
+/// stackslot register. The register is still available but is no longer
+/// allowed to be modifed.
+void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
+ std::multimap<unsigned, int>::iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ I++;
+ assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+ "Bidirectional map mismatch!");
+ SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
+ DOUT << "PhysReg " << TRI->getName(PhysReg)
+ << " copied, it is available for use but can no longer be modified\n";
+ }
+}
+
+/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+/// stackslot register and its aliases. The register and its aliases may
+/// still available but is no longer allowed to be modifed.
+void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+ disallowClobberPhysRegOnly(*AS);
+ disallowClobberPhysRegOnly(PhysReg);
+}
+
+/// ClobberPhysRegOnly - This is called when the specified physreg changes
+/// value. We use this to invalidate any info about stuff we thing lives in it.
+void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
+ std::multimap<unsigned, int>::iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int SlotOrReMat = I->second;
+ PhysRegsAvailable.erase(I++);
+ assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+ "Bidirectional map mismatch!");
+ SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
+ DOUT << "PhysReg " << TRI->getName(PhysReg)
+ << " clobbered, invalidating ";
+ if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 << "\n";
+ else
+ DOUT << "SS#" << SlotOrReMat << "\n";
+ }
+}
+
+/// ClobberPhysReg - This is called when the specified physreg changes
+/// value. We use this to invalidate any info about stuff we thing lives in
+/// it and any of its aliases.
+void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+ ClobberPhysRegOnly(*AS);
+ ClobberPhysRegOnly(PhysReg);
+}
+
+/// AddAvailableRegsToLiveIn - Availability information is being kept coming
+/// into the specified MBB. Add available physical registers as potential
+/// live-in's. If they are reused in the MBB, they will be added to the
+/// live-in set to make register scavenger and post-allocation scheduler.
+void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ std::set<unsigned> NotAvailable;
+ for (std::multimap<unsigned, int>::iterator
+ I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
+ I != E; ++I) {
+ unsigned Reg = I->first;
+ const TargetRegisterClass* RC = TRI->getPhysicalRegisterRegClass(Reg);
+ // FIXME: A temporary workaround. We can't reuse available value if it's
+ // not safe to move the def of the virtual register's class. e.g.
+ // X86::RFP* register classes. Do not add it as a live-in.
+ if (!TII->isSafeToMoveRegClassDefs(RC))
+ // This is no longer available.
+ NotAvailable.insert(Reg);
+ else {
+ MBB.addLiveIn(Reg);
+ InvalidateKill(Reg, TRI, RegKills, KillOps);
+ }
+
+ // Skip over the same register.
+ std::multimap<unsigned, int>::iterator NI = next(I);
+ while (NI != E && NI->first == Reg) {
+ ++I;
+ ++NI;
+ }
+ }
+
+ for (std::set<unsigned>::iterator I = NotAvailable.begin(),
+ E = NotAvailable.end(); I != E; ++I) {
+ ClobberPhysReg(*I);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(*I);
+ *SubRegs; ++SubRegs)
+ ClobberPhysReg(*SubRegs);
+ }
+}
+
+/// ModifyStackSlotOrReMat - This method is called when the value in a stack
+/// slot changes. This removes information about which register the previous
+/// value for this slot lives in (as the previous value is dead now).
+void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
+ std::map<int, unsigned>::iterator It =
+ SpillSlotsOrReMatsAvailable.find(SlotOrReMat);
+ if (It == SpillSlotsOrReMatsAvailable.end()) return;
+ unsigned Reg = It->second >> 1;
+ SpillSlotsOrReMatsAvailable.erase(It);
+
+ // This register may hold the value of multiple stack slots, only remove this
+ // stack slot from the set of values the register contains.
+ std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+ for (; ; ++I) {
+ assert(I != PhysRegsAvailable.end() && I->first == Reg &&
+ "Map inverse broken!");
+ if (I->second == SlotOrReMat) break;
+ }
+ PhysRegsAvailable.erase(I);
+}
+
+// ************************** //
+// Reuse Info Implementation //
+// ************************** //
+
+/// GetRegForReload - We are about to emit a reload into PhysReg. If there
+/// is some other operand that is using the specified register, either pick
+/// a new register to use, or evict the previous reload and use this reg.
+unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ SmallSet<unsigned, 8> &Rejected,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ const TargetInstrInfo* TII = MI->getParent()->getParent()->getTarget()
+ .getInstrInfo();
+
+ if (Reuses.empty()) return PhysReg; // This is most often empty.
+
+ for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
+ ReusedOp &Op = Reuses[ro];
+ // If we find some other reuse that was supposed to use this register
+ // exactly for its reload, we can change this reload to use ITS reload
+ // register. That is, unless its reload register has already been
+ // considered and subsequently rejected because it has also been reused
+ // by another operand.
+ if (Op.PhysRegReused == PhysReg &&
+ Rejected.count(Op.AssignedPhysReg) == 0) {
+ // Yup, use the reload register that we didn't use before.
+ unsigned NewReg = Op.AssignedPhysReg;
+ Rejected.insert(PhysReg);
+ return GetRegForReload(NewReg, MI, Spills, MaybeDeadStores, Rejected,
+ RegKills, KillOps, VRM);
+ } else {
+ // Otherwise, we might also have a problem if a previously reused
+ // value aliases the new register. If so, codegen the previous reload
+ // and use this one.
+ unsigned PRRU = Op.PhysRegReused;
+ const TargetRegisterInfo *TRI = Spills.getRegInfo();
+ if (TRI->areAliases(PRRU, PhysReg)) {
+ // Okay, we found out that an alias of a reused register
+ // was used. This isn't good because it means we have
+ // to undo a previous reuse.
+ MachineBasicBlock *MBB = MI->getParent();
+ const TargetRegisterClass *AliasRC =
+ MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg);
+
+ // Copy Op out of the vector and remove it, we're going to insert an
+ // explicit load for it.
+ ReusedOp NewOp = Op;
+ Reuses.erase(Reuses.begin()+ro);
+
+ // Ok, we're going to try to reload the assigned physreg into the
+ // slot that we were supposed to in the first place. However, that
+ // register could hold a reuse. Check to see if it conflicts or
+ // would prefer us to use a different register.
+ unsigned NewPhysReg = GetRegForReload(NewOp.AssignedPhysReg,
+ MI, Spills, MaybeDeadStores,
+ Rejected, RegKills, KillOps, VRM);
+
+ MachineBasicBlock::iterator MII = MI;
+ if (NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT) {
+ ReMaterialize(*MBB, MII, NewPhysReg, NewOp.VirtReg, TII, TRI,VRM);
+ } else {
+ TII->loadRegFromStackSlot(*MBB, MII, NewPhysReg,
+ NewOp.StackSlotOrReMat, AliasRC);
+ MachineInstr *LoadMI = prior(MII);
+ VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
+ // Any stores to this stack slot are not dead anymore.
+ MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
+ ++NumLoads;
+ }
+ Spills.ClobberPhysReg(NewPhysReg);
+ Spills.ClobberPhysReg(NewOp.PhysRegReused);
+
+ unsigned SubIdx = MI->getOperand(NewOp.Operand).getSubReg();
+ unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) : NewPhysReg;
+ MI->getOperand(NewOp.Operand).setReg(RReg);
+ MI->getOperand(NewOp.Operand).setSubReg(0);
+
+ Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
+ --MII;
+ UpdateKills(*MII, TRI, RegKills, KillOps);
+ DOUT << '\t' << *MII;
+
+ DOUT << "Reuse undone!\n";
+ --NumReused;
+
+ // Finally, PhysReg is now available, go ahead and use it.
+ return PhysReg;
+ }
+ }
+ }
+ return PhysReg;
+}
+
+// ************************************************************************ //
+
+/// FoldsStackSlotModRef - Return true if the specified MI folds the specified
+/// stack slot mod/ref. It also checks if it's possible to unfold the
+/// instruction by having it define a specified physical register instead.
+static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM) {
+ if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI))
+ return false;
+
+ bool Found = false;
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
+ unsigned VirtReg = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ if (MR & VirtRegMap::isModRef)
+ if (VRM.getStackSlot(VirtReg) == SS) {
+ Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0;
+ break;
+ }
+ }
+ if (!Found)
+ return false;
+
+ // Does the instruction uses a register that overlaps the scratch register?
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!VRM.hasPhys(Reg))
+ continue;
+ Reg = VRM.getPhys(Reg);
+ }
+ if (TRI->regsOverlap(PhysReg, Reg))
+ return false;
+ }
+ return true;
+}
+
+/// FindFreeRegister - Find a free register of a given register class by looking
+/// at (at most) the last two machine instructions.
+static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
+ MachineBasicBlock &MBB,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ BitVector &AllocatableRegs) {
+ BitVector Defs(TRI->getNumRegs());
+ BitVector Uses(TRI->getNumRegs());
+ SmallVector<unsigned, 4> LocalUses;
+ SmallVector<unsigned, 4> Kills;
+
+ // Take a look at 2 instructions at most.
+ for (unsigned Count = 0; Count < 2; ++Count) {
+ if (MII == MBB.begin())
+ break;
+ MachineInstr *PrevMI = prior(MII);
+ for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = PrevMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned Reg = MO.getReg();
+ if (MO.isDef()) {
+ Defs.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Defs.set(*AS);
+ } else {
+ LocalUses.push_back(Reg);
+ if (MO.isKill() && AllocatableRegs[Reg])
+ Kills.push_back(Reg);
+ }
+ }
+
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ unsigned Kill = Kills[i];
+ if (!Defs[Kill] && !Uses[Kill] &&
+ TRI->getPhysicalRegisterRegClass(Kill) == RC)
+ return Kill;
+ }
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ Uses.set(*AS);
+ }
+
+ MII = PrevMI;
+ }
+
+ return 0;
+}
+
+static
+void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == VirtReg)
+ MO.setReg(PhysReg);
+ }
+}
+
+namespace {
+ struct RefSorter {
+ bool operator()(const std::pair<MachineInstr*, int> &A,
+ const std::pair<MachineInstr*, int> &B) {
+ return A.second < B.second;
+ }
+ };
+}
+
+// ***************************** //
+// Local Spiller Implementation //
+// ***************************** //
+
+class VISIBILITY_HIDDEN LocalRewriter : public VirtRegRewriter {
+ MachineRegisterInfo *RegInfo;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ BitVector AllocatableRegs;
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+public:
+
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) {
+ RegInfo = &MF.getRegInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+ AllocatableRegs = TRI->getAllocatableSet(MF);
+ DOUT << "\n**** Local spiller rewriting function '"
+ << MF.getFunction()->getName() << "':\n";
+ DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)"
+ " ****\n";
+ DEBUG(MF.dump());
+
+ // Spills - Keep track of which spilled values are available in physregs
+ // so that we can choose to reuse the physregs instead of emitting
+ // reloads. This is usually refreshed per basic block.
+ AvailableSpills Spills(TRI, TII);
+
+ // Keep track of kill information.
+ BitVector RegKills(TRI->getNumRegs());
+ std::vector<MachineOperand*> KillOps;
+ KillOps.resize(TRI->getNumRegs(), NULL);
+
+ // SingleEntrySuccs - Successor blocks which have a single predecessor.
+ SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
+ SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
+
+ // Traverse the basic blocks depth first.
+ MachineBasicBlock *Entry = MF.begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*,
+ SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+ if (!EarlyVisited.count(MBB))
+ RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps);
+
+ // If this MBB is the only predecessor of a successor. Keep the
+ // availability information and visit it next.
+ do {
+ // Keep visiting single predecessor successor as long as possible.
+ SinglePredSuccs.clear();
+ findSinglePredSuccessor(MBB, SinglePredSuccs);
+ if (SinglePredSuccs.empty())
+ MBB = 0;
+ else {
+ // FIXME: More than one successors, each of which has MBB has
+ // the only predecessor.
+ MBB = SinglePredSuccs[0];
+ if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
+ Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
+ RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps);
+ }
+ }
+ } while (MBB);
+
+ // Clear the availability info.
+ Spills.clear();
+ }
+
+ DOUT << "**** Post Machine Instrs ****\n";
+ DEBUG(MF.dump());
+
+ // Mark unused spill slots.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int SS = VRM.getLowSpillSlot();
+ if (SS != VirtRegMap::NO_STACK_SLOT)
+ for (int e = VRM.getHighSpillSlot(); SS <= e; ++SS)
+ if (!VRM.isSpillSlotUsed(SS)) {
+ MFI->RemoveStackObject(SS);
+ ++NumDSS;
+ }
+
+ return true;
+ }
+
+private:
+
+ /// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
+ /// a scratch register is available.
+ /// xorq %r12<kill>, %r13
+ /// addq %rax, -184(%rbp)
+ /// addq %r13, -184(%rbp)
+ /// ==>
+ /// xorq %r12<kill>, %r13
+ /// movq -184(%rbp), %r12
+ /// addq %rax, %r12
+ /// addq %r13, %r12
+ /// movq %r12, -184(%rbp)
+ bool OptimizeByUnfold2(unsigned VirtReg, int SS,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+
+ MachineBasicBlock::iterator NextMII = next(MII);
+ if (NextMII == MBB.end())
+ return false;
+
+ if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
+ return false;
+
+ // Now let's see if the last couple of instructions happens to have freed up
+ // a register.
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ unsigned PhysReg = FindFreeRegister(MII, MBB, RC, TRI, AllocatableRegs);
+ if (!PhysReg)
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ TRI = MF.getTarget().getRegisterInfo();
+ MachineInstr &MI = *MII;
+ if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, VRM))
+ return false;
+
+ // If the next instruction also folds the same SS modref and can be unfoled,
+ // then it's worthwhile to issue a load from SS into the free register and
+ // then unfold these instructions.
+ if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM))
+ return false;
+
+ // Load from SS to the spare physical register.
+ TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC);
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(PhysReg);
+ // Remember it's available.
+ Spills.addAvailable(SS, PhysReg);
+ MaybeDeadStores[SS] = NULL;
+
+ // Unfold current MI.
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
+ assert(0 && "Unable unfold the load / store folding instruction!");
+ assert(NewMIs.size() == 1);
+ AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
+ VRM.transferRestorePts(&MI, NewMIs[0]);
+ MII = MBB.insert(MII, NewMIs[0]);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ ++NumModRefUnfold;
+
+ // Unfold next instructions that fold the same SS.
+ do {
+ MachineInstr &NextMI = *NextMII;
+ NextMII = next(NextMII);
+ NewMIs.clear();
+ if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
+ assert(0 && "Unable unfold the load / store folding instruction!");
+ assert(NewMIs.size() == 1);
+ AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
+ VRM.transferRestorePts(&NextMI, NewMIs[0]);
+ MBB.insert(NextMII, NewMIs[0]);
+ InvalidateKills(NextMI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&NextMI);
+ MBB.erase(&NextMI);
+ ++NumModRefUnfold;
+ } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM));
+
+ // Store the value back into SS.
+ TII->storeRegToStackSlot(MBB, NextMII, PhysReg, true, SS, RC);
+ MachineInstr *StoreMI = prior(NextMII);
+ VRM.addSpillSlotUse(SS, StoreMI);
+ VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+
+ return true;
+ }
+
+ /// OptimizeByUnfold - Turn a store folding instruction into a load folding
+ /// instruction. e.g.
+ /// xorl %edi, %eax
+ /// movl %eax, -32(%ebp)
+ /// movl -36(%ebp), %eax
+ /// orl %eax, -32(%ebp)
+ /// ==>
+ /// xorl %edi, %eax
+ /// orl -36(%ebp), %eax
+ /// mov %eax, -32(%ebp)
+ /// This enables unfolding optimization for a subsequent instruction which will
+ /// also eliminate the newly introduced store instruction.
+ bool OptimizeByUnfold(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ MachineFunction &MF = *MBB.getParent();
+ MachineInstr &MI = *MII;
+ unsigned UnfoldedOpc = 0;
+ unsigned UnfoldPR = 0;
+ unsigned UnfoldVR = 0;
+ int FoldedSS = VirtRegMap::NO_STACK_SLOT;
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
+ // Only transform a MI that folds a single register.
+ if (UnfoldedOpc)
+ return false;
+ UnfoldVR = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ // MI2VirtMap be can updated which invalidate the iterator.
+ // Increment the iterator first.
+ ++I;
+ if (VRM.isAssignedReg(UnfoldVR))
+ continue;
+ // If this reference is not a use, any previous store is now dead.
+ // Otherwise, the store to this stack slot is not dead anymore.
+ FoldedSS = VRM.getStackSlot(UnfoldVR);
+ MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
+ if (DeadStore && (MR & VirtRegMap::isModRef)) {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
+ if (!PhysReg || !DeadStore->readsRegister(PhysReg))
+ continue;
+ UnfoldPR = PhysReg;
+ UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+ false, true);
+ }
+ }
+
+ if (!UnfoldedOpc) {
+ if (!UnfoldVR)
+ return false;
+
+ // Look for other unfolding opportunities.
+ return OptimizeByUnfold2(UnfoldVR, FoldedSS, MBB, MII,
+ MaybeDeadStores, Spills, RegKills, KillOps, VRM);
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
+ continue;
+ unsigned VirtReg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
+ continue;
+ if (VRM.isAssignedReg(VirtReg)) {
+ unsigned PhysReg = VRM.getPhys(VirtReg);
+ if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
+ return false;
+ } else if (VRM.isReMaterialized(VirtReg))
+ continue;
+ int SS = VRM.getStackSlot(VirtReg);
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ if (PhysReg) {
+ if (TRI->regsOverlap(PhysReg, UnfoldPR))
+ return false;
+ continue;
+ }
+ if (VRM.hasPhys(VirtReg)) {
+ PhysReg = VRM.getPhys(VirtReg);
+ if (!TRI->regsOverlap(PhysReg, UnfoldPR))
+ continue;
+ }
+
+ // Ok, we'll need to reload the value into a register which makes
+ // it impossible to perform the store unfolding optimization later.
+ // Let's see if it is possible to fold the load if the store is
+ // unfolded. This allows us to perform the store unfolding
+ // optimization.
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
+ assert(NewMIs.size() == 1);
+ MachineInstr *NewMI = NewMIs.back();
+ NewMIs.clear();
+ int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
+ assert(Idx != -1);
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(Idx);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS);
+ if (FoldedMI) {
+ VRM.addSpillSlotUse(SS, FoldedMI);
+ if (!VRM.hasPhys(UnfoldVR))
+ VRM.assignVirt2Phys(UnfoldVR, UnfoldPR);
+ VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+ MII = MBB.insert(MII, FoldedMI);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ MF.DeleteMachineInstr(NewMI);
+ return true;
+ }
+ MF.DeleteMachineInstr(NewMI);
+ }
+ }
+
+ return false;
+ }
+
+ /// CommuteToFoldReload -
+ /// Look for
+ /// r1 = load fi#1
+ /// r1 = op r1, r2<kill>
+ /// store r1, fi#1
+ ///
+ /// If op is commutable and r2 is killed, then we can xform these to
+ /// r2 = op r2, fi#1
+ /// store r2, fi#1
+ bool CommuteToFoldReload(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ unsigned VirtReg, unsigned SrcReg, int SS,
+ AvailableSpills &Spills,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ const TargetRegisterInfo *TRI,
+ VirtRegMap &VRM) {
+ if (MII == MBB.begin() || !MII->killsRegister(SrcReg))
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineInstr &MI = *MII;
+ MachineBasicBlock::iterator DefMII = prior(MII);
+ MachineInstr *DefMI = DefMII;
+ const TargetInstrDesc &TID = DefMI->getDesc();
+ unsigned NewDstIdx;
+ if (DefMII != MBB.begin() &&
+ TID.isCommutable() &&
+ TII->CommuteChangesDestination(DefMI, NewDstIdx)) {
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
+ return false;
+ MachineInstr *ReloadMI = prior(DefMII);
+ int FrameIdx;
+ unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
+ if (DestReg != SrcReg || FrameIdx != SS)
+ return false;
+ int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
+ if (UseIdx == -1)
+ return false;
+ unsigned DefIdx;
+ if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
+ return false;
+ assert(DefMI->getOperand(DefIdx).isReg() &&
+ DefMI->getOperand(DefIdx).getReg() == SrcReg);
+
+ // Now commute def instruction.
+ MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
+ if (!CommutedMI)
+ return false;
+ SmallVector<unsigned, 1> Ops;
+ Ops.push_back(NewDstIdx);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS);
+ // Not needed since foldMemoryOperand returns new MI.
+ MF.DeleteMachineInstr(CommutedMI);
+ if (!FoldedMI)
+ return false;
+
+ VRM.addSpillSlotUse(SS, FoldedMI);
+ VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+ // Insert new def MI and spill MI.
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ TII->storeRegToStackSlot(MBB, &MI, NewReg, true, SS, RC);
+ MII = prior(MII);
+ MachineInstr *StoreMI = MII;
+ VRM.addSpillSlotUse(SS, StoreMI);
+ VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+ MII = MBB.insert(MII, FoldedMI); // Update MII to backtrack.
+
+ // Delete all 3 old instructions.
+ InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(ReloadMI);
+ MBB.erase(ReloadMI);
+ InvalidateKills(*DefMI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(DefMI);
+ MBB.erase(DefMI);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+
+ // If NewReg was previously holding value of some SS, it's now clobbered.
+ // This has to be done now because it's a physical register. When this
+ // instruction is re-visited, it's ignored.
+ Spills.ClobberPhysReg(NewReg);
+
+ ++NumCommutes;
+ return true;
+ }
+
+ return false;
+ }
+
+ /// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
+ /// the last store to the same slot is now dead. If so, remove the last store.
+ void SpillRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MII,
+ int Idx, unsigned PhysReg, int StackSlot,
+ const TargetRegisterClass *RC,
+ bool isAvailable, MachineInstr *&LastStore,
+ AvailableSpills &Spills,
+ SmallSet<MachineInstr*, 4> &ReMatDefs,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+
+ TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC);
+ MachineInstr *StoreMI = next(MII);
+ VRM.addSpillSlotUse(StackSlot, StoreMI);
+ DOUT << "Store:\t" << *StoreMI;
+
+ // If there is a dead store to this stack slot, nuke it now.
+ if (LastStore) {
+ DOUT << "Removed dead store:\t" << *LastStore;
+ ++NumDSE;
+ SmallVector<unsigned, 2> KillRegs;
+ InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
+ MachineBasicBlock::iterator PrevMII = LastStore;
+ bool CheckDef = PrevMII != MBB.begin();
+ if (CheckDef)
+ --PrevMII;
+ VRM.RemoveMachineInstrFromMaps(LastStore);
+ MBB.erase(LastStore);
+ if (CheckDef) {
+ // Look at defs of killed registers on the store. Mark the defs
+ // as dead since the store has been deleted and they aren't
+ // being reused.
+ for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
+ bool HasOtherDef = false;
+ if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef)) {
+ MachineInstr *DeadDef = PrevMII;
+ if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
+ // FIXME: This assumes a remat def does not have side
+ // effects.
+ VRM.RemoveMachineInstrFromMaps(DeadDef);
+ MBB.erase(DeadDef);
+ ++NumDRM;
+ }
+ }
+ }
+ }
+ }
+
+ LastStore = next(MII);
+
+ // If the stack slot value was previously available in some other
+ // register, change it now. Otherwise, make the register available,
+ // in PhysReg.
+ Spills.ModifyStackSlotOrReMat(StackSlot);
+ Spills.ClobberPhysReg(PhysReg);
+ Spills.addAvailable(StackSlot, PhysReg, isAvailable);
+ ++NumStores;
+ }
+
+ /// TransferDeadness - A identity copy definition is dead and it's being
+ /// removed. Find the last def or use and mark it as dead / kill.
+ void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
+ unsigned Reg, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps,
+ VirtRegMap &VRM) {
+ SmallPtrSet<MachineInstr*, 4> Seens;
+ SmallVector<std::pair<MachineInstr*, int>,8> Refs;
+ for (MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(Reg),
+ RE = RegInfo->reg_end(); RI != RE; ++RI) {
+ MachineInstr *UDMI = &*RI;
+ if (UDMI->getParent() != MBB)
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+ if (DI == DistanceMap.end() || DI->second > CurDist)
+ continue;
+ if (Seens.insert(UDMI))
+ Refs.push_back(std::make_pair(UDMI, DI->second));
+ }
+
+ if (Refs.empty())
+ return;
+ std::sort(Refs.begin(), Refs.end(), RefSorter());
+
+ while (!Refs.empty()) {
+ MachineInstr *LastUDMI = Refs.back().first;
+ Refs.pop_back();
+
+ MachineOperand *LastUD = NULL;
+ for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = LastUDMI->getOperand(i);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (!LastUD || (LastUD->isUse() && MO.isDef()))
+ LastUD = &MO;
+ if (LastUDMI->isRegTiedToDefOperand(i))
+ break;
+ }
+ if (LastUD->isDef()) {
+ // If the instruction has no side effect, delete it and propagate
+ // backward further. Otherwise, mark is dead and we are done.
+ const TargetInstrDesc &TID = LastUDMI->getDesc();
+ if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+ TID.hasUnmodeledSideEffects()) {
+ LastUD->setIsDead();
+ break;
+ }
+ VRM.RemoveMachineInstrFromMaps(LastUDMI);
+ MBB->erase(LastUDMI);
+ } else {
+ LastUD->setIsKill();
+ RegKills.set(Reg);
+ KillOps[Reg] = LastUD;
+ break;
+ }
+ }
+ }
+
+ /// rewriteMBB - Keep track of which spills are available even after the
+ /// register allocator is done with them. If possible, avid reloading vregs.
+ void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
+ LiveIntervals *LIs,
+ AvailableSpills &Spills, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+
+ DOUT << "\n**** Local spiller rewriting MBB '"
+ << MBB.getBasicBlock()->getName() << "':\n";
+
+ MachineFunction &MF = *MBB.getParent();
+
+ // MaybeDeadStores - When we need to write a value back into a stack slot,
+ // keep track of the inserted store. If the stack slot value is never read
+ // (because the value was used from some available register, for example), and
+ // subsequently stored to, the original store is dead. This map keeps track
+ // of inserted stores that are not used. If we see a subsequent store to the
+ // same stack slot, the original store is deleted.
+ std::vector<MachineInstr*> MaybeDeadStores;
+ MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
+
+ // ReMatDefs - These are rematerializable def MIs which are not deleted.
+ SmallSet<MachineInstr*, 4> ReMatDefs;
+
+ // Clear kill info.
+ SmallSet<unsigned, 2> KilledMIRegs;
+ RegKills.reset();
+ KillOps.clear();
+ KillOps.resize(TRI->getNumRegs(), NULL);
+
+ unsigned Dist = 0;
+ DistanceMap.clear();
+ for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+ MII != E; ) {
+ MachineBasicBlock::iterator NextMII = next(MII);
+
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+ bool Erased = false;
+ bool BackTracked = false;
+ if (OptimizeByUnfold(MBB, MII,
+ MaybeDeadStores, Spills, RegKills, KillOps, VRM))
+ NextMII = next(MII);
+
+ MachineInstr &MI = *MII;
+
+ if (VRM.hasEmergencySpills(&MI)) {
+ // Spill physical register(s) in the rare case the allocator has run out
+ // of registers to allocate.
+ SmallSet<int, 4> UsedSS;
+ std::vector<unsigned> &EmSpills = VRM.getEmergencySpills(&MI);
+ for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
+ unsigned PhysReg = EmSpills[i];
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(PhysReg);
+ assert(RC && "Unable to determine register class!");
+ int SS = VRM.getEmergencySpillSlot(RC);
+ if (UsedSS.count(SS))
+ assert(0 && "Need to spill more than one physical registers!");
+ UsedSS.insert(SS);
+ TII->storeRegToStackSlot(MBB, MII, PhysReg, true, SS, RC);
+ MachineInstr *StoreMI = prior(MII);
+ VRM.addSpillSlotUse(SS, StoreMI);
+ TII->loadRegFromStackSlot(MBB, next(MII), PhysReg, SS, RC);
+ MachineInstr *LoadMI = next(MII);
+ VRM.addSpillSlotUse(SS, LoadMI);
+ ++NumPSpills;
+ }
+ NextMII = next(MII);
+ }
+
+ // Insert restores here if asked to.
+ if (VRM.isRestorePt(&MI)) {
+ std::vector<unsigned> &RestoreRegs = VRM.getRestorePtRestores(&MI);
+ for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
+ unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order.
+ if (!VRM.getPreSplitReg(VirtReg))
+ continue; // Split interval spilled again.
+ unsigned Phys = VRM.getPhys(VirtReg);
+ RegInfo->setPhysRegUsed(Phys);
+
+ // Check if the value being restored if available. If so, it must be
+ // from a predecessor BB that fallthrough into this BB. We do not
+ // expect:
+ // BB1:
+ // r1 = load fi#1
+ // ...
+ // = r1<kill>
+ // ... # r1 not clobbered
+ // ...
+ // = load fi#1
+ bool DoReMat = VRM.isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+ if (InReg == Phys) {
+ // If the value is already available in the expected register, save
+ // a reload / remat.
+ if (SSorRMId)
+ DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << SSorRMId;
+ DOUT << " from physreg "
+ << TRI->getName(InReg) << " for vreg"
+ << VirtReg <<" instead of reloading into physreg "
+ << TRI->getName(Phys) << "\n";
+ ++NumOmitted;
+ continue;
+ } else if (InReg && InReg != Phys) {
+ if (SSorRMId)
+ DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << SSorRMId;
+ DOUT << " from physreg "
+ << TRI->getName(InReg) << " for vreg"
+ << VirtReg <<" by copying it into physreg "
+ << TRI->getName(Phys) << "\n";
+
+ // If the reloaded / remat value is available in another register,
+ // copy it to the desired register.
+ TII->copyRegToReg(MBB, &MI, Phys, InReg, RC, RC);
+
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(Phys);
+ // Remember it's available.
+ Spills.addAvailable(SSorRMId, Phys);
+
+ // Mark is killed.
+ MachineInstr *CopyMI = prior(MII);
+ MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
+ KillOpnd->setIsKill();
+ UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+ DOUT << '\t' << *CopyMI;
+ ++NumCopified;
+ continue;
+ }
+
+ if (VRM.isReMaterialized(VirtReg)) {
+ ReMaterialize(MBB, MII, Phys, VirtReg, TII, TRI, VRM);
+ } else {
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(MBB, &MI, Phys, SSorRMId, RC);
+ MachineInstr *LoadMI = prior(MII);
+ VRM.addSpillSlotUse(SSorRMId, LoadMI);
+ ++NumLoads;
+ }
+
+ // This invalidates Phys.
+ Spills.ClobberPhysReg(Phys);
+ // Remember it's available.
+ Spills.addAvailable(SSorRMId, Phys);
+
+ UpdateKills(*prior(MII), TRI, RegKills, KillOps);
+ DOUT << '\t' << *prior(MII);
+ }
+ }
+
+ // Insert spills here if asked to.
+ if (VRM.isSpillPt(&MI)) {
+ std::vector<std::pair<unsigned,bool> > &SpillRegs =
+ VRM.getSpillPtSpills(&MI);
+ for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
+ unsigned VirtReg = SpillRegs[i].first;
+ bool isKill = SpillRegs[i].second;
+ if (!VRM.getPreSplitReg(VirtReg))
+ continue; // Split interval spilled again.
+ const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
+ unsigned Phys = VRM.getPhys(VirtReg);
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC);
+ MachineInstr *StoreMI = next(MII);
+ VRM.addSpillSlotUse(StackSlot, StoreMI);
+ DOUT << "Store:\t" << *StoreMI;
+ VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+ }
+ NextMII = next(MII);
+ }
+
+ /// ReusedOperands - Keep track of operand reuse in case we need to undo
+ /// reuse.
+ ReuseInfo ReusedOperands(MI, TRI);
+ SmallVector<unsigned, 4> VirtUseOps;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue; // Ignore non-register operands.
+
+ unsigned VirtReg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+ // Ignore physregs for spilling, but remember that it is used by this
+ // function.
+ RegInfo->setPhysRegUsed(VirtReg);
+ continue;
+ }
+
+ // We want to process implicit virtual register uses first.
+ if (MO.isImplicit())
+ // If the virtual register is implicitly defined, emit a implicit_def
+ // before so scavenger knows it's "defined".
+ VirtUseOps.insert(VirtUseOps.begin(), i);
+ else
+ VirtUseOps.push_back(i);
+ }
+
+ // Process all of the spilled uses and all non spilled reg references.
+ SmallVector<int, 2> PotentialDeadStoreSlots;
+ KilledMIRegs.clear();
+ for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+ unsigned i = VirtUseOps[j];
+ MachineOperand &MO = MI.getOperand(i);
+ unsigned VirtReg = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register?");
+
+ unsigned SubIdx = MO.getSubReg();
+ if (VRM.isAssignedReg(VirtReg)) {
+ // This virtual register was assigned a physreg!
+ unsigned Phys = VRM.getPhys(VirtReg);
+ RegInfo->setPhysRegUsed(Phys);
+ if (MO.isDef())
+ ReusedOperands.markClobbered(Phys);
+ unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ if (VRM.isImplicitlyDefined(VirtReg))
+ BuildMI(MBB, &MI, MI.getDebugLoc(),
+ TII->get(TargetInstrInfo::IMPLICIT_DEF), RReg);
+ continue;
+ }
+
+ // This virtual register is now known to be a spilled value.
+ if (!MO.isUse())
+ continue; // Handle defs in the loop below (handle use&def here though)
+
+ bool AvoidReload = false;
+ if (LIs->hasInterval(VirtReg)) {
+ LiveInterval &LI = LIs->getInterval(VirtReg);
+ if (!LI.liveAt(LIs->getUseIndex(LI.beginNumber())))
+ // Must be defined by an implicit def. It should not be spilled. Note,
+ // this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ AvoidReload = true;
+ }
+
+ bool DoReMat = VRM.isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
+ int ReuseSlot = SSorRMId;
+
+ // Check to see if this stack slot is available.
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+ // If this is a sub-register use, make sure the reuse register is in the
+ // right register class. For example, for x86 not all of the 32-bit
+ // registers have accessible sub-registers.
+ // Similarly so for EXTRACT_SUBREG. Consider this:
+ // EDI = op
+ // MOV32_mr fi#1, EDI
+ // ...
+ // = EXTRACT_SUBREG fi#1
+ // fi#1 is available in EDI, but it cannot be reused because it's not in
+ // the right register file.
+ if (PhysReg && !AvoidReload &&
+ (SubIdx || MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)) {
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ if (!RC->contains(PhysReg))
+ PhysReg = 0;
+ }
+
+ if (PhysReg && !AvoidReload) {
+ // This spilled operand might be part of a two-address operand. If this
+ // is the case, then changing it will necessarily require changing the
+ // def part of the instruction as well. However, in some cases, we
+ // aren't allowed to modify the reused register. If none of these cases
+ // apply, reuse it.
+ bool CanReuse = true;
+ bool isTied = MI.isRegTiedToDefOperand(i);
+ if (isTied) {
+ // Okay, we have a two address operand. We can reuse this physreg as
+ // long as we are allowed to clobber the value and there isn't an
+ // earlier def that has already clobbered the physreg.
+ CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+ Spills.canClobberPhysReg(PhysReg);
+ }
+
+ if (CanReuse) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << ReuseSlot;
+ DOUT << " from physreg "
+ << TRI->getName(PhysReg) << " for vreg"
+ << VirtReg <<" instead of reloading into physreg "
+ << TRI->getName(VRM.getPhys(VirtReg)) << "\n";
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+
+ // The only technical detail we have is that we don't know that
+ // PhysReg won't be clobbered by a reloaded stack slot that occurs
+ // later in the instruction. In particular, consider 'op V1, V2'.
+ // If V1 is available in physreg R0, we would choose to reuse it
+ // here, instead of reloading it into the register the allocator
+ // indicated (say R1). However, V2 might have to be reloaded
+ // later, and it might indicate that it needs to live in R0. When
+ // this occurs, we need to have information available that
+ // indicates it is safe to use R1 for the reload instead of R0.
+ //
+ // To further complicate matters, we might conflict with an alias,
+ // or R0 and R1 might not be compatible with each other. In this
+ // case, we actually insert a reload for V1 in R1, ensuring that
+ // we can get at R0 or its alias.
+ ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+ VRM.getPhys(VirtReg), VirtReg);
+ if (isTied)
+ // Only mark it clobbered if this is a use&def operand.
+ ReusedOperands.markClobbered(PhysReg);
+ ++NumReused;
+
+ if (MI.getOperand(i).isKill() &&
+ ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+ // The store of this spilled value is potentially dead, but we
+ // won't know for certain until we've confirmed that the re-use
+ // above is valid, which means waiting until the other operands
+ // are processed. For now we just track the spill slot, we'll
+ // remove it after the other operands are processed if valid.
+
+ PotentialDeadStoreSlots.push_back(ReuseSlot);
+ }
+
+ // Mark is isKill if it's there no other uses of the same virtual
+ // register and it's not a two-address operand. IsKill will be
+ // unset if reg is reused.
+ if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+
+ continue;
+ } // CanReuse
+
+ // Otherwise we have a situation where we have a two-address instruction
+ // whose mod/ref operand needs to be reloaded. This reload is already
+ // available in some register "PhysReg", but if we used PhysReg as the
+ // operand to our 2-addr instruction, the instruction would modify
+ // PhysReg. This isn't cool if something later uses PhysReg and expects
+ // to get its initial value.
+ //
+ // To avoid this problem, and to avoid doing a load right after a store,
+ // we emit a copy from PhysReg into the designated register for this
+ // operand.
+ unsigned DesignatedReg = VRM.getPhys(VirtReg);
+ assert(DesignatedReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ DesignatedReg = ReusedOperands.GetRegForReload(DesignatedReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+
+ // If the mapped designated register is actually the physreg we have
+ // incoming, we don't need to inserted a dead copy.
+ if (DesignatedReg == PhysReg) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << ReuseSlot;
+ DOUT << " from physreg " << TRI->getName(PhysReg)
+ << " for vreg" << VirtReg
+ << " instead of reloading into same physreg.\n";
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ ReusedOperands.markClobbered(RReg);
+ ++NumReused;
+ continue;
+ }
+
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ RegInfo->setPhysRegUsed(DesignatedReg);
+ ReusedOperands.markClobbered(DesignatedReg);
+ TII->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC, RC);
+
+ MachineInstr *CopyMI = prior(MII);
+ UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+ // This invalidates DesignatedReg.
+ Spills.ClobberPhysReg(DesignatedReg);
+
+ Spills.addAvailable(ReuseSlot, DesignatedReg);
+ unsigned RReg =
+ SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ DOUT << '\t' << *prior(MII);
+ ++NumReused;
+ continue;
+ } // if (PhysReg)
+
+ // Otherwise, reload it and remember that we have it.
+ PhysReg = VRM.getPhys(VirtReg);
+ assert(PhysReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+
+ RegInfo->setPhysRegUsed(PhysReg);
+ ReusedOperands.markClobbered(PhysReg);
+ if (AvoidReload)
+ ++NumAvoided;
+ else {
+ if (DoReMat) {
+ ReMaterialize(MBB, MII, PhysReg, VirtReg, TII, TRI, VRM);
+ } else {
+ const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(MBB, &MI, PhysReg, SSorRMId, RC);
+ MachineInstr *LoadMI = prior(MII);
+ VRM.addSpillSlotUse(SSorRMId, LoadMI);
+ ++NumLoads;
+ }
+ // This invalidates PhysReg.
+ Spills.ClobberPhysReg(PhysReg);
+
+ // Any stores to this stack slot are not dead anymore.
+ if (!DoReMat)
+ MaybeDeadStores[SSorRMId] = NULL;
+ Spills.addAvailable(SSorRMId, PhysReg);
+ // Assumes this is the last use. IsKill will be unset if reg is reused
+ // unless it's a two-address operand.
+ if (!MI.isRegTiedToDefOperand(i) &&
+ KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+
+ UpdateKills(*prior(MII), TRI, RegKills, KillOps);
+ DOUT << '\t' << *prior(MII);
+ }
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ }
+
+ // Ok - now we can remove stores that have been confirmed dead.
+ for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+ // This was the last use and the spilled value is still available
+ // for reuse. That means the spill was unnecessary!
+ int PDSSlot = PotentialDeadStoreSlots[j];
+ MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+ if (DeadStore) {
+ DOUT << "Removed dead store:\t" << *DeadStore;
+ InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(DeadStore);
+ MBB.erase(DeadStore);
+ MaybeDeadStores[PDSSlot] = NULL;
+ ++NumDSE;
+ }
+ }
+
+
+ DOUT << '\t' << MI;
+
+
+ // If we have folded references to memory operands, make sure we clear all
+ // physical registers that may contain the value of the spilled virtual
+ // register
+ SmallSet<int, 2> FoldedSS;
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
+ unsigned VirtReg = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ DOUT << "Folded vreg: " << VirtReg << " MR: " << MR;
+
+ // MI2VirtMap be can updated which invalidate the iterator.
+ // Increment the iterator first.
+ ++I;
+ int SS = VRM.getStackSlot(VirtReg);
+ if (SS == VirtRegMap::NO_STACK_SLOT)
+ continue;
+ FoldedSS.insert(SS);
+ DOUT << " - StackSlot: " << SS << "\n";
+
+ // If this folded instruction is just a use, check to see if it's a
+ // straight load from the virt reg slot.
+ if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
+ int FrameIdx;
+ unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
+ if (DestReg && FrameIdx == SS) {
+ // If this spill slot is available, turn it into a copy (or nothing)
+ // instead of leaving it as a load!
+ if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
+ DOUT << "Promoted Load To Copy: " << MI;
+ if (DestReg != InReg) {
+ const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
+ TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC);
+ MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
+ unsigned SubIdx = DefMO->getSubReg();
+ // Revisit the copy so we make sure to notice the effects of the
+ // operation on the destreg (either needing to RA it if it's
+ // virtual or needing to clobber any values if it's physical).
+ NextMII = &MI;
+ --NextMII; // backtrack to the copy.
+ // Propagate the sub-register index over.
+ if (SubIdx) {
+ DefMO = NextMII->findRegisterDefOperand(DestReg);
+ DefMO->setSubReg(SubIdx);
+ }
+
+ // Mark is killed.
+ MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg);
+ KillOpnd->setIsKill();
+
+ BackTracked = true;
+ } else {
+ DOUT << "Removing now-noop copy: " << MI;
+ // Unset last kill since it's being reused.
+ InvalidateKill(InReg, TRI, RegKills, KillOps);
+ Spills.disallowClobberPhysReg(InReg);
+ }
+
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ goto ProcessNextInst;
+ }
+ } else {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ SmallVector<MachineInstr*, 4> NewMIs;
+ if (PhysReg &&
+ TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) {
+ MBB.insert(MII, NewMIs[0]);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ --NextMII; // backtrack to the unfolded instruction.
+ BackTracked = true;
+ goto ProcessNextInst;
+ }
+ }
+ }
+
+ // If this reference is not a use, any previous store is now dead.
+ // Otherwise, the store to this stack slot is not dead anymore.
+ MachineInstr* DeadStore = MaybeDeadStores[SS];
+ if (DeadStore) {
+ bool isDead = !(MR & VirtRegMap::isRef);
+ MachineInstr *NewStore = NULL;
+ if (MR & VirtRegMap::isModRef) {
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+ SmallVector<MachineInstr*, 4> NewMIs;
+ // We can reuse this physreg as long as we are allowed to clobber
+ // the value and there isn't an earlier def that has already clobbered
+ // the physreg.
+ if (PhysReg &&
+ !ReusedOperands.isClobbered(PhysReg) &&
+ Spills.canClobberPhysReg(PhysReg) &&
+ !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
+ MachineOperand *KillOpnd =
+ DeadStore->findRegisterUseOperand(PhysReg, true);
+ // Note, if the store is storing a sub-register, it's possible the
+ // super-register is needed below.
+ if (KillOpnd && !KillOpnd->getSubReg() &&
+ TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
+ MBB.insert(MII, NewMIs[0]);
+ NewStore = NewMIs[1];
+ MBB.insert(MII, NewStore);
+ VRM.addSpillSlotUse(SS, NewStore);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ --NextMII;
+ --NextMII; // backtrack to the unfolded instruction.
+ BackTracked = true;
+ isDead = true;
+ ++NumSUnfold;
+ }
+ }
+ }
+
+ if (isDead) { // Previous store is dead.
+ // If we get here, the store is dead, nuke it now.
+ DOUT << "Removed dead store:\t" << *DeadStore;
+ InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(DeadStore);
+ MBB.erase(DeadStore);
+ if (!NewStore)
+ ++NumDSE;
+ }
+
+ MaybeDeadStores[SS] = NULL;
+ if (NewStore) {
+ // Treat this store as a spill merged into a copy. That makes the
+ // stack slot value available.
+ VRM.virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
+ goto ProcessNextInst;
+ }
+ }
+
+ // If the spill slot value is available, and this is a new definition of
+ // the value, the value is not available anymore.
+ if (MR & VirtRegMap::isMod) {
+ // Notice that the value in this stack slot has been modified.
+ Spills.ModifyStackSlotOrReMat(SS);
+
+ // If this is *just* a mod of the value, check to see if this is just a
+ // store to the spill slot (i.e. the spill got merged into the copy). If
+ // so, realize that the vreg is available now, and add the store to the
+ // MaybeDeadStore info.
+ int StackSlot;
+ if (!(MR & VirtRegMap::isRef)) {
+ if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
+ assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ "Src hasn't been allocated yet?");
+
+ if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot,
+ Spills, RegKills, KillOps, TRI, VRM)) {
+ NextMII = next(MII);
+ BackTracked = true;
+ goto ProcessNextInst;
+ }
+
+ // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark
+ // this as a potentially dead store in case there is a subsequent
+ // store into the stack slot without a read from it.
+ MaybeDeadStores[StackSlot] = &MI;
+
+ // If the stack slot value was previously available in some other
+ // register, change it now. Otherwise, make the register
+ // available in PhysReg.
+ Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
+ }
+ }
+ }
+ }
+
+ // Process all of the spilled defs.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!(MO.isReg() && MO.getReg() && MO.isDef()))
+ continue;
+
+ unsigned VirtReg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ unsigned Src, Dst, SrcSR, DstSR;
+ if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
+ ++NumDCE;
+ DOUT << "Removing now-noop copy: " << MI;
+ SmallVector<unsigned, 2> KillRegs;
+ InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
+ if (MO.isDead() && !KillRegs.empty()) {
+ // Source register or an implicit super/sub-register use is killed.
+ assert(KillRegs[0] == Dst ||
+ TRI->isSubRegister(KillRegs[0], Dst) ||
+ TRI->isSuperRegister(KillRegs[0], Dst));
+ // Last def is now dead.
+ TransferDeadness(&MBB, Dist, Src, RegKills, KillOps, VRM);
+ }
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ Spills.disallowClobberPhysReg(VirtReg);
+ goto ProcessNextInst;
+ }
+
+ // If it's not a no-op copy, it clobbers the value in the destreg.
+ Spills.ClobberPhysReg(VirtReg);
+ ReusedOperands.markClobbered(VirtReg);
+
+ // Check to see if this instruction is a load from a stack slot into
+ // a register. If so, this provides the stack slot value in the reg.
+ int FrameIdx;
+ if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+ assert(DestReg == VirtReg && "Unknown load situation!");
+
+ // If it is a folded reference, then it's not safe to clobber.
+ bool Folded = FoldedSS.count(FrameIdx);
+ // Otherwise, if it wasn't available, remember that it is now!
+ Spills.addAvailable(FrameIdx, DestReg, !Folded);
+ goto ProcessNextInst;
+ }
+
+ continue;
+ }
+
+ unsigned SubIdx = MO.getSubReg();
+ bool DoReMat = VRM.isReMaterialized(VirtReg);
+ if (DoReMat)
+ ReMatDefs.insert(&MI);
+
+ // The only vregs left are stack slot definitions.
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
+
+ // If this def is part of a two-address operand, make sure to execute
+ // the store from the correct physical register.
+ unsigned PhysReg;
+ unsigned TiedOp;
+ if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
+ PhysReg = MI.getOperand(TiedOp).getReg();
+ if (SubIdx) {
+ unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
+ assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
+ "Can't find corresponding super-register!");
+ PhysReg = SuperReg;
+ }
+ } else {
+ PhysReg = VRM.getPhys(VirtReg);
+ if (ReusedOperands.isClobbered(PhysReg)) {
+ // Another def has taken the assigned physreg. It must have been a
+ // use&def which got it due to reuse. Undo the reuse!
+ PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, VRM);
+ }
+ }
+
+ assert(PhysReg && "VR not assigned a physical register?");
+ RegInfo->setPhysRegUsed(PhysReg);
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ ReusedOperands.markClobbered(RReg);
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+
+ if (!MO.isDead()) {
+ MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
+ SpillRegToStackSlot(MBB, MII, -1, PhysReg, StackSlot, RC, true,
+ LastStore, Spills, ReMatDefs, RegKills, KillOps, VRM);
+ NextMII = next(MII);
+
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ {
+ unsigned Src, Dst, SrcSR, DstSR;
+ if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
+ ++NumDCE;
+ DOUT << "Removing now-noop copy: " << MI;
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM.RemoveMachineInstrFromMaps(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ UpdateKills(*LastStore, TRI, RegKills, KillOps);
+ goto ProcessNextInst;
+ }
+ }
+ }
+ }
+ ProcessNextInst:
+ DistanceMap.insert(std::make_pair(&MI, Dist++));
+ if (!Erased && !BackTracked) {
+ for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
+ UpdateKills(*II, TRI, RegKills, KillOps);
+ }
+ MII = NextMII;
+ }
+
+ }
+
+};
+
+llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
+ switch (RewriterOpt) {
+ default: assert(0 && "Unreachable!");
+ case local:
+ return new LocalRewriter();
+ case simple:
+ return new SimpleRewriter();
+ case trivial:
+ return new TrivialRewriter();
+ }
+}
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
new file mode 100644
index 0000000..bc830f7
--- /dev/null
+++ b/lib/CodeGen/VirtRegRewriter.h
@@ -0,0 +1,56 @@
+//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
+#define LLVM_CODEGEN_VIRTREGREWRITER_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "VirtRegMap.h"
+#include <map>
+
+// TODO:
+// - Finish renaming Spiller -> Rewriter
+// - SimpleSpiller
+// - LocalSpiller
+
+namespace llvm {
+
+ /// VirtRegRewriter interface: Implementations of this interface assign
+ /// spilled virtual registers to stack slots, rewriting the code.
+ struct VirtRegRewriter {
+ virtual ~VirtRegRewriter();
+ virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+ LiveIntervals* LIs) = 0;
+ };
+
+ /// createVirtRegRewriter - Create an return a rewriter object, as specified
+ /// on the command line.
+ VirtRegRewriter* createVirtRegRewriter();
+
+}
+
+#endif
OpenPOWER on IntegriCloud