diff options
Diffstat (limited to 'lib/CodeGen')
112 files changed, 86727 insertions, 0 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp new file mode 100644 index 0000000..45462da --- /dev/null +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -0,0 +1,1724 @@ +//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AsmPrinter class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include <cerrno> +using namespace llvm; + +static cl::opt<cl::boolOrDefault> +AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), + cl::init(cl::BOU_UNSET)); + +char AsmPrinter::ID = 0; +AsmPrinter::AsmPrinter(raw_ostream &o, TargetMachine &tm, + const TargetAsmInfo *T, CodeGenOpt::Level OL, bool VDef) + : MachineFunctionPass(&ID), FunctionNumber(0), OptLevel(OL), O(o), + TM(tm), TAI(T), TRI(tm.getRegisterInfo()), + IsInTextSection(false) +{ + switch (AsmVerbose) { + case cl::BOU_UNSET: VerboseAsm = VDef; break; + case cl::BOU_TRUE: VerboseAsm = true; break; + case cl::BOU_FALSE: VerboseAsm = false; break; + } +} + +AsmPrinter::~AsmPrinter() { + for (gcp_iterator I = GCMetadataPrinters.begin(), + E = GCMetadataPrinters.end(); I != E; ++I) + delete I->second; +} + +/// SwitchToTextSection - Switch to the specified text section of the executable +/// if we are not already in it! +/// +void AsmPrinter::SwitchToTextSection(const char *NewSection, + const GlobalValue *GV) { + std::string NS; + if (GV && GV->hasSection()) + NS = TAI->getSwitchToSectionDirective() + GV->getSection(); + else + NS = NewSection; + + // If we're already in this section, we're done. + if (CurrentSection == NS) return; + + // Close the current section, if applicable. + if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) + O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; + + CurrentSection = NS; + + if (!CurrentSection.empty()) + O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n'; + + IsInTextSection = true; +} + +/// SwitchToDataSection - Switch to the specified data section of the executable +/// if we are not already in it! +/// +void AsmPrinter::SwitchToDataSection(const char *NewSection, + const GlobalValue *GV) { + std::string NS; + if (GV && GV->hasSection()) + NS = TAI->getSwitchToSectionDirective() + GV->getSection(); + else + NS = NewSection; + + // If we're already in this section, we're done. + if (CurrentSection == NS) return; + + // Close the current section, if applicable. + if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) + O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; + + CurrentSection = NS; + + if (!CurrentSection.empty()) + O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n'; + + IsInTextSection = false; +} + +/// SwitchToSection - Switch to the specified section of the executable if we +/// are not already in it! +void AsmPrinter::SwitchToSection(const Section* NS) { + const std::string& NewSection = NS->getName(); + + // If we're already in this section, we're done. + if (CurrentSection == NewSection) return; + + // Close the current section, if applicable. + if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) + O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; + + // FIXME: Make CurrentSection a Section* in the future + CurrentSection = NewSection; + CurrentSection_ = NS; + + if (!CurrentSection.empty()) { + // If section is named we need to switch into it via special '.section' + // directive and also append funky flags. Otherwise - section name is just + // some magic assembler directive. + if (NS->isNamed()) + O << TAI->getSwitchToSectionDirective() + << CurrentSection + << TAI->getSectionFlags(NS->getFlags()); + else + O << CurrentSection; + O << TAI->getDataSectionStartSuffix() << '\n'; + } + + IsInTextSection = (NS->getFlags() & SectionFlags::Code); +} + +void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<GCModuleInfo>(); +} + +bool AsmPrinter::doInitialization(Module &M) { + Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix()); + + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + + if (TAI->hasSingleParameterDotFile()) { + /* Very minimal debug info. It is ignored if we emit actual + debug info. If we don't, this at helps the user find where + a function came from. */ + O << "\t.file\t\"" << M.getModuleIdentifier() << "\"\n"; + } + + for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) + MP->beginAssembly(O, *this, *TAI); + + if (!M.getModuleInlineAsm().empty()) + O << TAI->getCommentString() << " Start of file scope inline assembly\n" + << M.getModuleInlineAsm() + << '\n' << TAI->getCommentString() + << " End of file scope inline assembly\n"; + + SwitchToDataSection(""); // Reset back to no section. + + MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + if (MMI) MMI->AnalyzeModule(M); + DW = getAnalysisIfAvailable<DwarfWriter>(); + return false; +} + +bool AsmPrinter::doFinalization(Module &M) { + if (TAI->getWeakRefDirective()) { + if (!ExtWeakSymbols.empty()) + SwitchToDataSection(""); + + for (std::set<const GlobalValue*>::iterator i = ExtWeakSymbols.begin(), + e = ExtWeakSymbols.end(); i != e; ++i) + O << TAI->getWeakRefDirective() << Mang->getValueName(*i) << '\n'; + } + + if (TAI->getSetDirective()) { + if (!M.alias_empty()) + SwitchToSection(TAI->getTextSection()); + + O << '\n'; + for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); + I!=E; ++I) { + std::string Name = Mang->getValueName(I); + std::string Target; + + const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal()); + Target = Mang->getValueName(GV); + + if (I->hasExternalLinkage() || !TAI->getWeakRefDirective()) + O << "\t.globl\t" << Name << '\n'; + else if (I->hasWeakLinkage()) + O << TAI->getWeakRefDirective() << Name << '\n'; + else if (!I->hasLocalLinkage()) + assert(0 && "Invalid alias linkage"); + + printVisibility(Name, I->getVisibility()); + + O << TAI->getSetDirective() << ' ' << Name << ", " << Target << '\n'; + } + } + + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) + MP->finishAssembly(O, *this, *TAI); + + // If we don't have any trampolines, then we don't require stack memory + // to be executable. Some targets have a directive to declare this. + Function* InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); + if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) + if (TAI->getNonexecutableStackDirective()) + O << TAI->getNonexecutableStackDirective() << '\n'; + + delete Mang; Mang = 0; + return false; +} + +const std::string & +AsmPrinter::getCurrentFunctionEHName(const MachineFunction *MF, + std::string &Name) const { + assert(MF && "No machine function?"); + Name = MF->getFunction()->getName(); + if (Name.empty()) + Name = Mang->getValueName(MF->getFunction()); + Name = Mang->makeNameProper(TAI->getEHGlobalPrefix() + + Name + ".eh", TAI->getGlobalPrefix()); + return Name; +} + +void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { + // What's my mangled name? + CurrentFnName = Mang->getValueName(MF.getFunction()); + IncrementFunctionNumber(); +} + +namespace { + // SectionCPs - Keep track the alignment, constpool entries per Section. + struct SectionCPs { + const Section *S; + unsigned Alignment; + SmallVector<unsigned, 4> CPEs; + SectionCPs(const Section *s, unsigned a) : S(s), Alignment(a) {}; + }; +} + +/// EmitConstantPool - Print to the current output stream assembly +/// representations of the constants in the constant pool MCP. This is +/// used to print out constants which have been "spilled to memory" by +/// the code generator. +/// +void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { + const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); + if (CP.empty()) return; + + // Calculate sections for constant pool entries. We collect entries to go into + // the same section together to reduce amount of section switch statements. + SmallVector<SectionCPs, 4> CPSections; + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + MachineConstantPoolEntry CPE = CP[i]; + unsigned Align = CPE.getAlignment(); + const Section* S = TAI->SelectSectionForMachineConst(CPE.getType()); + // The number of sections are small, just do a linear search from the + // last section to the first. + bool Found = false; + unsigned SecIdx = CPSections.size(); + while (SecIdx != 0) { + if (CPSections[--SecIdx].S == S) { + Found = true; + break; + } + } + if (!Found) { + SecIdx = CPSections.size(); + CPSections.push_back(SectionCPs(S, Align)); + } + + if (Align > CPSections[SecIdx].Alignment) + CPSections[SecIdx].Alignment = Align; + CPSections[SecIdx].CPEs.push_back(i); + } + + // Now print stuff into the calculated sections. + for (unsigned i = 0, e = CPSections.size(); i != e; ++i) { + SwitchToSection(CPSections[i].S); + EmitAlignment(Log2_32(CPSections[i].Alignment)); + + unsigned Offset = 0; + for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) { + unsigned CPI = CPSections[i].CPEs[j]; + MachineConstantPoolEntry CPE = CP[CPI]; + + // Emit inter-object padding for alignment. + unsigned AlignMask = CPE.getAlignment() - 1; + unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; + EmitZeros(NewOffset - Offset); + + const Type *Ty = CPE.getType(); + Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty); + + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << CPI << ":\t\t\t\t\t"; + if (VerboseAsm) { + O << TAI->getCommentString() << ' '; + WriteTypeSymbolic(O, CPE.getType(), 0); + } + O << '\n'; + if (CPE.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); + else + EmitGlobalConstant(CPE.Val.ConstVal); + } + } +} + +/// EmitJumpTableInfo - Print assembly representations of the jump tables used +/// by the current function to the current output stream. +/// +void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, + MachineFunction &MF) { + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + bool IsPic = TM.getRelocationModel() == Reloc::PIC_; + + // Pick the directive to use to print the jump table entries, and switch to + // the appropriate section. + TargetLowering *LoweringInfo = TM.getTargetLowering(); + + const char* JumpTableDataSection = TAI->getJumpTableDataSection(); + const Function *F = MF.getFunction(); + unsigned SectionFlags = TAI->SectionFlagsForGlobal(F); + if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) || + !JumpTableDataSection || + SectionFlags & SectionFlags::Linkonce) { + // In PIC mode, we need to emit the jump table to the same section as the + // function body itself, otherwise the label differences won't make sense. + // We should also do if the section name is NULL or function is declared in + // discardable section. + SwitchToSection(TAI->SectionForGlobal(F)); + } else { + SwitchToDataSection(JumpTableDataSection); + } + + EmitAlignment(Log2_32(MJTI->getAlignment())); + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + const std::vector<MachineBasicBlock*> &JTBBs = JT[i].MBBs; + + // If this jump table was deleted, ignore it. + if (JTBBs.empty()) continue; + + // For PIC codegen, if possible we want to use the SetDirective to reduce + // the number of relocations the assembler will generate for the jump table. + // Set directives are all printed before the jump table itself. + SmallPtrSet<MachineBasicBlock*, 16> EmittedSets; + if (TAI->getSetDirective() && IsPic) + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) + if (EmittedSets.insert(JTBBs[ii])) + printPICJumpTableSetLabel(i, JTBBs[ii]); + + // On some targets (e.g. darwin) we want to emit two consequtive labels + // before each jump table. The first label is never referenced, but tells + // the assembler and linker the extents of the jump table object. The + // second label is actually referenced by the code. + if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix()) + O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n"; + + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << i << ":\n"; + + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { + printPICJumpTableEntry(MJTI, JTBBs[ii], i); + O << '\n'; + } + } +} + +void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid) const { + bool IsPic = TM.getRelocationModel() == Reloc::PIC_; + + // Use JumpTableDirective otherwise honor the entry size from the jump table + // info. + const char *JTEntryDirective = TAI->getJumpTableDirective(); + bool HadJTEntryDirective = JTEntryDirective != NULL; + if (!HadJTEntryDirective) { + JTEntryDirective = MJTI->getEntrySize() == 4 ? + TAI->getData32bitsDirective() : TAI->getData64bitsDirective(); + } + + O << JTEntryDirective << ' '; + + // If we have emitted set directives for the jump table entries, print + // them rather than the entries themselves. If we're emitting PIC, then + // emit the table entries as differences between two text section labels. + // If we're emitting non-PIC code, then emit the entries as direct + // references to the target basic blocks. + if (IsPic) { + if (TAI->getSetDirective()) { + O << TAI->getPrivateGlobalPrefix() << getFunctionNumber() + << '_' << uid << "_set_" << MBB->getNumber(); + } else { + printBasicBlockLabel(MBB, false, false, false); + // If the arch uses custom Jump Table directives, don't calc relative to + // JT + if (!HadJTEntryDirective) + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" + << getFunctionNumber() << '_' << uid; + } + } else { + printBasicBlockLabel(MBB, false, false, false); + } +} + + +/// EmitSpecialLLVMGlobal - Check to see if the specified global is a +/// special global used by LLVM. If so, emit it and return true, otherwise +/// do nothing and return false. +bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { + if (GV->getName() == "llvm.used") { + if (TAI->getUsedDirective() != 0) // No need to emit this at all. + EmitLLVMUsedList(GV->getInitializer()); + return true; + } + + // Ignore debug and non-emitted data. + if (GV->getSection() == "llvm.metadata" || + GV->hasAvailableExternallyLinkage()) + return true; + + if (!GV->hasAppendingLinkage()) return false; + + assert(GV->hasInitializer() && "Not a special LLVM global!"); + + const TargetData *TD = TM.getTargetData(); + unsigned Align = Log2_32(TD->getPointerPrefAlignment()); + if (GV->getName() == "llvm.global_ctors") { + SwitchToDataSection(TAI->getStaticCtorsSection()); + EmitAlignment(Align, 0); + EmitXXStructorList(GV->getInitializer()); + return true; + } + + if (GV->getName() == "llvm.global_dtors") { + SwitchToDataSection(TAI->getStaticDtorsSection()); + EmitAlignment(Align, 0); + EmitXXStructorList(GV->getInitializer()); + return true; + } + + return false; +} + +/// findGlobalValue - if CV is an expression equivalent to a single +/// global value, return that value. +const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) { + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) + return GV; + else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + const TargetData *TD = TM.getTargetData(); + unsigned Opcode = CE->getOpcode(); + switch (Opcode) { + case Instruction::GetElementPtr: { + const Constant *ptrVal = CE->getOperand(0); + SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end()); + if (TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], idxVec.size())) + return 0; + return findGlobalValue(ptrVal); + } + case Instruction::BitCast: + return findGlobalValue(CE->getOperand(0)); + default: + return 0; + } + } + return 0; +} + +/// EmitLLVMUsedList - For targets that define a TAI::UsedDirective, mark each +/// global in the specified llvm.used list for which emitUsedDirectiveFor +/// is true, as being used with this directive. + +void AsmPrinter::EmitLLVMUsedList(Constant *List) { + const char *Directive = TAI->getUsedDirective(); + + // Should be an array of 'sbyte*'. + ConstantArray *InitList = dyn_cast<ConstantArray>(List); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + const GlobalValue *GV = findGlobalValue(InitList->getOperand(i)); + if (TAI->emitUsedDirectiveFor(GV, Mang)) { + O << Directive; + EmitConstantValueOnly(InitList->getOperand(i)); + O << '\n'; + } + } +} + +/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the +/// function pointers, ignoring the init priority. +void AsmPrinter::EmitXXStructorList(Constant *List) { + // Should be an array of '{ int, void ()* }' structs. The first value is the + // init priority, which we ignore. + if (!isa<ConstantArray>(List)) return; + ConstantArray *InitList = cast<ConstantArray>(List); + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ + if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + + if (CS->getOperand(1)->isNullValue()) + return; // Found a null terminator, exit printing. + // Emit the function pointer. + EmitGlobalConstant(CS->getOperand(1)); + } +} + +/// getGlobalLinkName - Returns the asm/link name of of the specified +/// global variable. Should be overridden by each target asm printer to +/// generate the appropriate value. +const std::string &AsmPrinter::getGlobalLinkName(const GlobalVariable *GV, + std::string &LinkName) const { + if (isa<Function>(GV)) { + LinkName += TAI->getFunctionAddrPrefix(); + LinkName += Mang->getValueName(GV); + LinkName += TAI->getFunctionAddrSuffix(); + } else { + LinkName += TAI->getGlobalVarAddrPrefix(); + LinkName += Mang->getValueName(GV); + LinkName += TAI->getGlobalVarAddrSuffix(); + } + + return LinkName; +} + +/// EmitExternalGlobal - Emit the external reference to a global variable. +/// Should be overridden if an indirect reference should be used. +void AsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) { + std::string GLN; + O << getGlobalLinkName(GV, GLN); +} + + + +//===----------------------------------------------------------------------===// +/// LEB 128 number encoding. + +/// PrintULEB128 - Print a series of hexidecimal values (separated by commas) +/// representing an unsigned leb128 value. +void AsmPrinter::PrintULEB128(unsigned Value) const { + char Buffer[20]; + do { + unsigned char Byte = static_cast<unsigned char>(Value & 0x7f); + Value >>= 7; + if (Value) Byte |= 0x80; + O << "0x" << utohex_buffer(Byte, Buffer+20); + if (Value) O << ", "; + } while (Value); +} + +/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas) +/// representing a signed leb128 value. +void AsmPrinter::PrintSLEB128(int Value) const { + int Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + char Buffer[20]; + + do { + unsigned char Byte = static_cast<unsigned char>(Value & 0x7f); + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + if (IsMore) Byte |= 0x80; + O << "0x" << utohex_buffer(Byte, Buffer+20); + if (IsMore) O << ", "; + } while (IsMore); +} + +//===--------------------------------------------------------------------===// +// Emission and print routines +// + +/// PrintHex - Print a value as a hexidecimal value. +/// +void AsmPrinter::PrintHex(int Value) const { + char Buffer[20]; + O << "0x" << utohex_buffer(static_cast<unsigned>(Value), Buffer+20); +} + +/// EOL - Print a newline character to asm stream. If a comment is present +/// then it will be printed first. Comments should not contain '\n'. +void AsmPrinter::EOL() const { + O << '\n'; +} + +void AsmPrinter::EOL(const std::string &Comment) const { + if (VerboseAsm && !Comment.empty()) { + O << '\t' + << TAI->getCommentString() + << ' ' + << Comment; + } + O << '\n'; +} + +void AsmPrinter::EOL(const char* Comment) const { + if (VerboseAsm && *Comment) { + O << '\t' + << TAI->getCommentString() + << ' ' + << Comment; + } + O << '\n'; +} + +/// EmitULEB128Bytes - Emit an assembler byte data directive to compose an +/// unsigned leb128 value. +void AsmPrinter::EmitULEB128Bytes(unsigned Value) const { + if (TAI->hasLEB128()) { + O << "\t.uleb128\t" + << Value; + } else { + O << TAI->getData8bitsDirective(); + PrintULEB128(Value); + } +} + +/// EmitSLEB128Bytes - print an assembler byte data directive to compose a +/// signed leb128 value. +void AsmPrinter::EmitSLEB128Bytes(int Value) const { + if (TAI->hasLEB128()) { + O << "\t.sleb128\t" + << Value; + } else { + O << TAI->getData8bitsDirective(); + PrintSLEB128(Value); + } +} + +/// EmitInt8 - Emit a byte directive and value. +/// +void AsmPrinter::EmitInt8(int Value) const { + O << TAI->getData8bitsDirective(); + PrintHex(Value & 0xFF); +} + +/// EmitInt16 - Emit a short directive and value. +/// +void AsmPrinter::EmitInt16(int Value) const { + O << TAI->getData16bitsDirective(); + PrintHex(Value & 0xFFFF); +} + +/// EmitInt32 - Emit a long directive and value. +/// +void AsmPrinter::EmitInt32(int Value) const { + O << TAI->getData32bitsDirective(); + PrintHex(Value); +} + +/// EmitInt64 - Emit a long long directive and value. +/// +void AsmPrinter::EmitInt64(uint64_t Value) const { + if (TAI->getData64bitsDirective()) { + O << TAI->getData64bitsDirective(); + PrintHex(Value); + } else { + if (TM.getTargetData()->isBigEndian()) { + EmitInt32(unsigned(Value >> 32)); O << '\n'; + EmitInt32(unsigned(Value)); + } else { + EmitInt32(unsigned(Value)); O << '\n'; + EmitInt32(unsigned(Value >> 32)); + } + } +} + +/// toOctal - Convert the low order bits of X into an octal digit. +/// +static inline char toOctal(int X) { + return (X&7)+'0'; +} + +/// printStringChar - Print a char, escaped if necessary. +/// +static void printStringChar(raw_ostream &O, unsigned char C) { + if (C == '"') { + O << "\\\""; + } else if (C == '\\') { + O << "\\\\"; + } else if (isprint((unsigned char)C)) { + O << C; + } else { + switch(C) { + case '\b': O << "\\b"; break; + case '\f': O << "\\f"; break; + case '\n': O << "\\n"; break; + case '\r': O << "\\r"; break; + case '\t': O << "\\t"; break; + default: + O << '\\'; + O << toOctal(C >> 6); + O << toOctal(C >> 3); + O << toOctal(C >> 0); + break; + } + } +} + +/// EmitString - Emit a string with quotes and a null terminator. +/// Special characters are emitted properly. +/// \literal (Eg. '\t') \endliteral +void AsmPrinter::EmitString(const std::string &String) const { + EmitString(String.c_str(), String.size()); +} + +void AsmPrinter::EmitString(const char *String, unsigned Size) const { + const char* AscizDirective = TAI->getAscizDirective(); + if (AscizDirective) + O << AscizDirective; + else + O << TAI->getAsciiDirective(); + O << '\"'; + for (unsigned i = 0; i < Size; ++i) + printStringChar(O, String[i]); + if (AscizDirective) + O << '\"'; + else + O << "\\0\""; +} + + +/// EmitFile - Emit a .file directive. +void AsmPrinter::EmitFile(unsigned Number, const std::string &Name) const { + O << "\t.file\t" << Number << " \""; + for (unsigned i = 0, N = Name.size(); i < N; ++i) + printStringChar(O, Name[i]); + O << '\"'; +} + + +//===----------------------------------------------------------------------===// + +// EmitAlignment - Emit an alignment directive to the specified power of +// two boundary. For example, if you pass in 3 here, you will get an 8 +// byte alignment. If a global value is specified, and if that global has +// an explicit alignment requested, it will unconditionally override the +// alignment request. However, if ForcedAlignBits is specified, this value +// has final say: the ultimate alignment will be the max of ForcedAlignBits +// and the alignment computed with NumBits and the global. +// +// The algorithm is: +// Align = NumBits; +// if (GV && GV->hasalignment) Align = GV->getalignment(); +// Align = std::max(Align, ForcedAlignBits); +// +void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV, + unsigned ForcedAlignBits, + bool UseFillExpr) const { + if (GV && GV->getAlignment()) + NumBits = Log2_32(GV->getAlignment()); + NumBits = std::max(NumBits, ForcedAlignBits); + + if (NumBits == 0) return; // No need to emit alignment. + if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits; + O << TAI->getAlignDirective() << NumBits; + + unsigned FillValue = TAI->getTextAlignFillValue(); + UseFillExpr &= IsInTextSection && FillValue; + if (UseFillExpr) { + O << ','; + PrintHex(FillValue); + } + O << '\n'; +} + + +/// EmitZeros - Emit a block of zeros. +/// +void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const { + if (NumZeros) { + if (TAI->getZeroDirective()) { + O << TAI->getZeroDirective() << NumZeros; + if (TAI->getZeroDirectiveSuffix()) + O << TAI->getZeroDirectiveSuffix(); + O << '\n'; + } else { + for (; NumZeros; --NumZeros) + O << TAI->getData8bitsDirective(AddrSpace) << "0\n"; + } + } +} + +// Print out the specified constant, without a storage class. Only the +// constants valid in constant expressions can occur here. +void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { + if (CV->isNullValue() || isa<UndefValue>(CV)) + O << '0'; + else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + O << CI->getZExtValue(); + } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { + // This is a constant address for a global variable or function. Use the + // name of the variable or function as the address value, possibly + // decorating it with GlobalVarAddrPrefix/Suffix or + // FunctionAddrPrefix/Suffix (these all default to "" ) + if (isa<Function>(GV)) { + O << TAI->getFunctionAddrPrefix() + << Mang->getValueName(GV) + << TAI->getFunctionAddrSuffix(); + } else { + O << TAI->getGlobalVarAddrPrefix() + << Mang->getValueName(GV) + << TAI->getGlobalVarAddrSuffix(); + } + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + const TargetData *TD = TM.getTargetData(); + unsigned Opcode = CE->getOpcode(); + switch (Opcode) { + case Instruction::GetElementPtr: { + // generate a symbolic expression for the byte address + const Constant *ptrVal = CE->getOperand(0); + SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end()); + if (int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], + idxVec.size())) { + // Truncate/sext the offset to the pointer size. + if (TD->getPointerSizeInBits() != 64) { + int SExtAmount = 64-TD->getPointerSizeInBits(); + Offset = (Offset << SExtAmount) >> SExtAmount; + } + + if (Offset) + O << '('; + EmitConstantValueOnly(ptrVal); + if (Offset > 0) + O << ") + " << Offset; + else if (Offset < 0) + O << ") - " << -Offset; + } else { + EmitConstantValueOnly(ptrVal); + } + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + assert(0 && "FIXME: Don't yet support this kind of constant cast expr"); + break; + case Instruction::BitCast: + return EmitConstantValueOnly(CE->getOperand(0)); + + case Instruction::IntToPtr: { + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(), false/*ZExt*/); + return EmitConstantValueOnly(Op); + } + + + case Instruction::PtrToInt: { + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + const Type *Ty = CE->getType(); + + // We can emit the pointer value into this slot if the slot is an + // integer slot greater or equal to the size of the pointer. + if (TD->getTypeAllocSize(Ty) >= TD->getTypeAllocSize(Op->getType())) + return EmitConstantValueOnly(Op); + + O << "(("; + EmitConstantValueOnly(Op); + APInt ptrMask = APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Ty)); + + SmallString<40> S; + ptrMask.toStringUnsigned(S); + O << ") & " << S.c_str() << ')'; + break; + } + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + O << '('; + EmitConstantValueOnly(CE->getOperand(0)); + O << ')'; + switch (Opcode) { + case Instruction::Add: + O << " + "; + break; + case Instruction::Sub: + O << " - "; + break; + case Instruction::And: + O << " & "; + break; + case Instruction::Or: + O << " | "; + break; + case Instruction::Xor: + O << " ^ "; + break; + default: + break; + } + O << '('; + EmitConstantValueOnly(CE->getOperand(1)); + O << ')'; + break; + default: + assert(0 && "Unsupported operator!"); + } + } else { + assert(0 && "Unknown constant value!"); + } +} + +/// printAsCString - Print the specified array as a C compatible string, only if +/// the predicate isString is true. +/// +static void printAsCString(raw_ostream &O, const ConstantArray *CVA, + unsigned LastElt) { + assert(CVA->isString() && "Array is not string compatible!"); + + O << '\"'; + for (unsigned i = 0; i != LastElt; ++i) { + unsigned char C = + (unsigned char)cast<ConstantInt>(CVA->getOperand(i))->getZExtValue(); + printStringChar(O, C); + } + O << '\"'; +} + +/// EmitString - Emit a zero-byte-terminated string constant. +/// +void AsmPrinter::EmitString(const ConstantArray *CVA) const { + unsigned NumElts = CVA->getNumOperands(); + if (TAI->getAscizDirective() && NumElts && + cast<ConstantInt>(CVA->getOperand(NumElts-1))->getZExtValue() == 0) { + O << TAI->getAscizDirective(); + printAsCString(O, CVA, NumElts-1); + } else { + O << TAI->getAsciiDirective(); + printAsCString(O, CVA, NumElts); + } + O << '\n'; +} + +void AsmPrinter::EmitGlobalConstantArray(const ConstantArray *CVA, + unsigned AddrSpace) { + if (CVA->isString()) { + EmitString(CVA); + } else { // Not a string. Print the values in successive locations + for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) + EmitGlobalConstant(CVA->getOperand(i), AddrSpace); + } +} + +void AsmPrinter::EmitGlobalConstantVector(const ConstantVector *CP) { + const VectorType *PTy = CP->getType(); + + for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I) + EmitGlobalConstant(CP->getOperand(I)); +} + +void AsmPrinter::EmitGlobalConstantStruct(const ConstantStruct *CVS, + unsigned AddrSpace) { + // Print the fields in successive locations. Pad to align if needed! + const TargetData *TD = TM.getTargetData(); + unsigned Size = TD->getTypeAllocSize(CVS->getType()); + const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType()); + uint64_t sizeSoFar = 0; + for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) { + const Constant* field = CVS->getOperand(i); + + // Check if padding is needed and insert one or more 0s. + uint64_t fieldSize = TD->getTypeAllocSize(field->getType()); + uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1)) + - cvsLayout->getElementOffset(i)) - fieldSize; + sizeSoFar += fieldSize + padSize; + + // Now print the actual field value. + EmitGlobalConstant(field, AddrSpace); + + // Insert padding - this may include padding to increase the size of the + // current field up to the ABI size (if the struct is not packed) as well + // as padding to ensure that the next field starts at the right offset. + EmitZeros(padSize, AddrSpace); + } + assert(sizeSoFar == cvsLayout->getSizeInBytes() && + "Layout of constant struct may be incorrect!"); +} + +void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP, + unsigned AddrSpace) { + // FP Constants are printed as integer constants to avoid losing + // precision... + const TargetData *TD = TM.getTargetData(); + if (CFP->getType() == Type::DoubleTy) { + double Val = CFP->getValueAPF().convertToDouble(); // for comment only + uint64_t i = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + if (TAI->getData64bitsDirective(AddrSpace)) { + O << TAI->getData64bitsDirective(AddrSpace) << i; + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " double value: " << Val; + O << '\n'; + } else if (TD->isBigEndian()) { + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " double most significant word " << Val; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " double least significant word " << Val; + O << '\n'; + } else { + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " double least significant word " << Val; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " double most significant word " << Val; + O << '\n'; + } + return; + } else if (CFP->getType() == Type::FloatTy) { + float Val = CFP->getValueAPF().convertToFloat(); // for comment only + O << TAI->getData32bitsDirective(AddrSpace) + << CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " float " << Val; + O << '\n'; + return; + } else if (CFP->getType() == Type::X86_FP80Ty) { + // all long double variants are printed as hex + // api needed to prevent premature destruction + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t *p = api.getRawData(); + // Convert to double so we can print the approximate val as a comment. + APFloat DoubleVal = CFP->getValueAPF(); + bool ignored; + DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + if (TD->isBigEndian()) { + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double most significant halfword of ~" + << DoubleVal.convertToDouble(); + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double least significant halfword"; + O << '\n'; + } else { + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double least significant halfword of ~" + << DoubleVal.convertToDouble(); + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next halfword"; + O << '\n'; + O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double most significant halfword"; + O << '\n'; + } + EmitZeros(TD->getTypeAllocSize(Type::X86_FP80Ty) - + TD->getTypeStoreSize(Type::X86_FP80Ty), AddrSpace); + return; + } else if (CFP->getType() == Type::PPC_FP128Ty) { + // all long double variants are printed as hex + // api needed to prevent premature destruction + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t *p = api.getRawData(); + if (TD->isBigEndian()) { + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double most significant word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double least significant word"; + O << '\n'; + } else { + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double least significant word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double next word"; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " long double most significant word"; + O << '\n'; + } + return; + } else assert(0 && "Floating point constant type not handled"); +} + +void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI, + unsigned AddrSpace) { + const TargetData *TD = TM.getTargetData(); + unsigned BitWidth = CI->getBitWidth(); + assert(isPowerOf2_32(BitWidth) && + "Non-power-of-2-sized integers not handled!"); + + // We don't expect assemblers to support integer data directives + // for more than 64 bits, so we emit the data in at most 64-bit + // quantities at a time. + const uint64_t *RawData = CI->getValue().getRawData(); + for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { + uint64_t Val; + if (TD->isBigEndian()) + Val = RawData[e - i - 1]; + else + Val = RawData[i]; + + if (TAI->getData64bitsDirective(AddrSpace)) + O << TAI->getData64bitsDirective(AddrSpace) << Val << '\n'; + else if (TD->isBigEndian()) { + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " Double-word most significant word " << Val; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " Double-word least significant word " << Val; + O << '\n'; + } else { + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " Double-word least significant word " << Val; + O << '\n'; + O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); + if (VerboseAsm) + O << '\t' << TAI->getCommentString() + << " Double-word most significant word " << Val; + O << '\n'; + } + } +} + +/// EmitGlobalConstant - Print a general LLVM constant to the .s file. +void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { + const TargetData *TD = TM.getTargetData(); + const Type *type = CV->getType(); + unsigned Size = TD->getTypeAllocSize(type); + + if (CV->isNullValue() || isa<UndefValue>(CV)) { + EmitZeros(Size, AddrSpace); + return; + } else if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) { + EmitGlobalConstantArray(CVA , AddrSpace); + return; + } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) { + EmitGlobalConstantStruct(CVS, AddrSpace); + return; + } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { + EmitGlobalConstantFP(CFP, AddrSpace); + return; + } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + // Small integers are handled below; large integers are handled here. + if (Size > 4) { + EmitGlobalConstantLargeInt(CI, AddrSpace); + return; + } + } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { + EmitGlobalConstantVector(CP); + return; + } + + printDataDirective(type, AddrSpace); + EmitConstantValueOnly(CV); + if (VerboseAsm) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + SmallString<40> S; + CI->getValue().toStringUnsigned(S, 16); + O << "\t\t\t" << TAI->getCommentString() << " 0x" << S.c_str(); + } + } + O << '\n'; +} + +void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + // Target doesn't support this yet! + abort(); +} + +/// PrintSpecial - Print information related to the specified machine instr +/// that is independent of the operand, and may be independent of the instr +/// itself. This can be useful for portably encoding the comment character +/// or other bits of target-specific knowledge into the asmstrings. The +/// syntax used is ${:comment}. Targets can override this to add support +/// for their own strange codes. +void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const { + if (!strcmp(Code, "private")) { + O << TAI->getPrivateGlobalPrefix(); + } else if (!strcmp(Code, "comment")) { + if (VerboseAsm) + O << TAI->getCommentString(); + } else if (!strcmp(Code, "uid")) { + // Assign a unique ID to this machine instruction. + static const MachineInstr *LastMI = 0; + static const Function *F = 0; + static unsigned Counter = 0U-1; + + // Comparing the address of MI isn't sufficient, because machineinstrs may + // be allocated to the same address across functions. + const Function *ThisF = MI->getParent()->getParent()->getFunction(); + + // If this is a new machine instruction, bump the counter. + if (LastMI != MI || F != ThisF) { + ++Counter; + LastMI = MI; + F = ThisF; + } + O << Counter; + } else { + cerr << "Unknown special formatter '" << Code + << "' for machine instr: " << *MI; + exit(1); + } +} + +/// processDebugLoc - Processes the debug information of each machine +/// instruction's DebugLoc. +void AsmPrinter::processDebugLoc(DebugLoc DL) { + if (TAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) { + if (!DL.isUnknown()) { + static DebugLocTuple PrevDLT(0, ~0U, ~0U); + DebugLocTuple CurDLT = MF->getDebugLocTuple(DL); + + if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT) + printLabel(DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, + DICompileUnit(CurDLT.CompileUnit))); + + PrevDLT = CurDLT; + } + } +} + +/// printInlineAsm - This method formats and prints the specified machine +/// instruction that is an inline asm. +void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { + unsigned NumOperands = MI->getNumOperands(); + + // Count the number of register definitions. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != NumOperands-1 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + + // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + + // If this asmstr is empty, just print the #APP/#NOAPP markers. + // These are useful to see where empty asm's wound up. + if (AsmStr[0] == 0) { + O << TAI->getInlineAsmStart() << "\n\t" << TAI->getInlineAsmEnd() << '\n'; + return; + } + + O << TAI->getInlineAsmStart() << "\n\t"; + + // The variant of the current asmprinter. + int AsmPrinterVariant = TAI->getAssemblerDialect(); + + int CurVariant = -1; // The number of the {.|.|.} region we are in. + const char *LastEmitted = AsmStr; // One past the last character emitted. + + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + O.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + O << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; + + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': // $$ -> $ + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + O << '$'; + ++LastEmitted; // Consume second '$' character. + break; + case '(': // $( -> same as GCC's { character. + ++LastEmitted; // Consume '(' character. + if (CurVariant != -1) { + cerr << "Nested variants found in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + CurVariant = 0; // We're in the first variant now. + break; + case '|': + ++LastEmitted; // consume '|' character. + if (CurVariant == -1) + O << '|'; // this is gcc's behavior for | outside a variant + else + ++CurVariant; // We're in the next variant. + break; + case ')': // $) -> same as GCC's } char. + ++LastEmitted; // consume ')' character. + if (CurVariant == -1) + O << '}'; // this is gcc's behavior for } outside a variant + else + CurVariant = -1; + break; + } + if (Done) break; + + bool HasCurlyBraces = false; + if (*LastEmitted == '{') { // ${variable} + ++LastEmitted; // Consume '{' character. + HasCurlyBraces = true; + } + + // If we have ${:foo}, then this is not a real operand reference, it is a + // "magic" string reference, just like in .td files. Arrange to call + // PrintSpecial. + if (HasCurlyBraces && *LastEmitted == ':') { + ++LastEmitted; + const char *StrStart = LastEmitted; + const char *StrEnd = strchr(StrStart, '}'); + if (StrEnd == 0) { + cerr << "Unterminated ${:foo} operand in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + + std::string Val(StrStart, StrEnd); + PrintSpecial(MI, Val.c_str()); + LastEmitted = StrEnd+1; + break; + } + + const char *IDStart = LastEmitted; + char *IDEnd; + errno = 0; + long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs. + if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) { + cerr << "Bad $ operand number in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + LastEmitted = IDEnd; + + char Modifier[2] = { 0, 0 }; + + if (HasCurlyBraces) { + // If we have curly braces, check for a modifier character. This + // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. + if (*LastEmitted == ':') { + ++LastEmitted; // Consume ':' character. + if (*LastEmitted == 0) { + cerr << "Bad ${:} expression in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + + Modifier[0] = *LastEmitted; + ++LastEmitted; // Consume modifier character. + } + + if (*LastEmitted != '}') { + cerr << "Bad ${} expression in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + ++LastEmitted; // Consume '}' character. + } + + if ((unsigned)Val >= NumOperands-1) { + cerr << "Invalid $ operand number in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + + // Okay, we finally have a value number. Ask the target to print this + // operand! + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { + unsigned OpNo = 1; + + bool Error = false; + + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + + if (OpNo >= MI->getNumOperands()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + if (Modifier[0]=='l') // labels are target independent + printBasicBlockLabel(MI->getOperand(OpNo).getMBB(), + false, false, false); + else { + AsmPrinter *AP = const_cast<AsmPrinter*>(this); + if ((OpFlags & 7) == 4) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant, + Modifier[0] ? Modifier : 0); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant, + Modifier[0] ? Modifier : 0); + } + } + } + if (Error) { + cerr << "Invalid operand found in inline asm: '" + << AsmStr << "'\n"; + MI->dump(); + exit(1); + } + } + break; + } + } + } + O << "\n\t" << TAI->getInlineAsmEnd() << '\n'; +} + +/// printImplicitDef - This method prints the specified machine instruction +/// that is an implicit def. +void AsmPrinter::printImplicitDef(const MachineInstr *MI) const { + if (VerboseAsm) + O << '\t' << TAI->getCommentString() << " implicit-def: " + << TRI->getAsmName(MI->getOperand(0).getReg()) << '\n'; +} + +/// printLabel - This method prints a local label used by debug and +/// exception handling tables. +void AsmPrinter::printLabel(const MachineInstr *MI) const { + printLabel(MI->getOperand(0).getImm()); +} + +void AsmPrinter::printLabel(unsigned Id) const { + O << TAI->getPrivateGlobalPrefix() << "label" << Id << ":\n"; +} + +/// printDeclare - This method prints a local variable declaration used by +/// debug tables. +/// FIXME: It doesn't really print anything rather it inserts a DebugVariable +/// entry into dwarf table. +void AsmPrinter::printDeclare(const MachineInstr *MI) const { + unsigned FI = MI->getOperand(0).getIndex(); + GlobalValue *GV = MI->getOperand(1).getGlobal(); + DW->RecordVariable(cast<GlobalVariable>(GV), FI, MI); +} + +/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM +/// instruction, using the specified assembler variant. Targets should +/// overried this to format as appropriate. +bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode) { + // Target doesn't support this yet! + return true; +} + +bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + // Target doesn't support this yet! + return true; +} + +/// printBasicBlockLabel - This method prints the label for the specified +/// MachineBasicBlock +void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB, + bool printAlign, + bool printColon, + bool printComment) const { + if (printAlign) { + unsigned Align = MBB->getAlignment(); + if (Align) + EmitAlignment(Log2_32(Align)); + } + + O << TAI->getPrivateGlobalPrefix() << "BB" << getFunctionNumber() << '_' + << MBB->getNumber(); + if (printColon) + O << ':'; + if (printComment && MBB->getBasicBlock()) + O << '\t' << TAI->getCommentString() << ' ' + << MBB->getBasicBlock()->getNameStart(); +} + +/// printPICJumpTableSetLabel - This method prints a set label for the +/// specified MachineBasicBlock for a jumptable entry. +void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, + const MachineBasicBlock *MBB) const { + if (!TAI->getSetDirective()) + return; + + O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','; + printBasicBlockLabel(MBB, false, false, false); + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << uid << '\n'; +} + +void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2, + const MachineBasicBlock *MBB) const { + if (!TAI->getSetDirective()) + return; + + O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << uid << '_' << uid2 + << "_set_" << MBB->getNumber() << ','; + printBasicBlockLabel(MBB, false, false, false); + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << uid << '_' << uid2 << '\n'; +} + +/// printDataDirective - This method prints the asm directive for the +/// specified type. +void AsmPrinter::printDataDirective(const Type *type, unsigned AddrSpace) { + const TargetData *TD = TM.getTargetData(); + switch (type->getTypeID()) { + case Type::IntegerTyID: { + unsigned BitWidth = cast<IntegerType>(type)->getBitWidth(); + if (BitWidth <= 8) + O << TAI->getData8bitsDirective(AddrSpace); + else if (BitWidth <= 16) + O << TAI->getData16bitsDirective(AddrSpace); + else if (BitWidth <= 32) + O << TAI->getData32bitsDirective(AddrSpace); + else if (BitWidth <= 64) { + assert(TAI->getData64bitsDirective(AddrSpace) && + "Target cannot handle 64-bit constant exprs!"); + O << TAI->getData64bitsDirective(AddrSpace); + } else { + assert(0 && "Target cannot handle given data directive width!"); + } + break; + } + case Type::PointerTyID: + if (TD->getPointerSize() == 8) { + assert(TAI->getData64bitsDirective(AddrSpace) && + "Target cannot handle 64-bit pointer exprs!"); + O << TAI->getData64bitsDirective(AddrSpace); + } else if (TD->getPointerSize() == 2) { + O << TAI->getData16bitsDirective(AddrSpace); + } else if (TD->getPointerSize() == 1) { + O << TAI->getData8bitsDirective(AddrSpace); + } else { + O << TAI->getData32bitsDirective(AddrSpace); + } + break; + case Type::FloatTyID: case Type::DoubleTyID: + case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: + assert (0 && "Should have already output floating point constant."); + default: + assert (0 && "Can't handle printing this type of thing"); + break; + } +} + +void AsmPrinter::printSuffixedName(const char *Name, const char *Suffix, + const char *Prefix) { + if (Name[0]=='\"') + O << '\"'; + O << TAI->getPrivateGlobalPrefix(); + if (Prefix) O << Prefix; + if (Name[0]=='\"') + O << '\"'; + if (Name[0]=='\"') + O << Name[1]; + else + O << Name; + O << Suffix; + if (Name[0]=='\"') + O << '\"'; +} + +void AsmPrinter::printSuffixedName(const std::string &Name, const char* Suffix) { + printSuffixedName(Name.c_str(), Suffix); +} + +void AsmPrinter::printVisibility(const std::string& Name, + unsigned Visibility) const { + if (Visibility == GlobalValue::HiddenVisibility) { + if (const char *Directive = TAI->getHiddenDirective()) + O << Directive << Name << '\n'; + } else if (Visibility == GlobalValue::ProtectedVisibility) { + if (const char *Directive = TAI->getProtectedDirective()) + O << Directive << Name << '\n'; + } +} + +void AsmPrinter::printOffset(int64_t Offset) const { + if (Offset > 0) + O << '+' << Offset; + else if (Offset < 0) + O << Offset; +} + +GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { + if (!S->usesMetadata()) + return 0; + + gcp_iterator GCPI = GCMetadataPrinters.find(S); + if (GCPI != GCMetadataPrinters.end()) + return GCPI->second; + + const char *Name = S->getName().c_str(); + + for (GCMetadataPrinterRegistry::iterator + I = GCMetadataPrinterRegistry::begin(), + E = GCMetadataPrinterRegistry::end(); I != E; ++I) + if (strcmp(Name, I->getName()) == 0) { + GCMetadataPrinter *GMP = I->instantiate(); + GMP->S = S; + GCMetadataPrinters.insert(std::make_pair(S, GMP)); + return GMP; + } + + cerr << "no GCMetadataPrinter registered for GC: " << Name << "\n"; + abort(); +} diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt new file mode 100644 index 0000000..066aaab --- /dev/null +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -0,0 +1,10 @@ +add_llvm_library(LLVMAsmPrinter + AsmPrinter.cpp + DIE.cpp + DwarfDebug.cpp + DwarfException.cpp + DwarfLabel.cpp + DwarfPrinter.cpp + DwarfWriter.cpp + OcamlGCPrinter.cpp + ) diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp new file mode 100644 index 0000000..dc149cf --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -0,0 +1,518 @@ +//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Data structures for DWARF info entries. +// +//===----------------------------------------------------------------------===// + +#include "DIE.h" +#include "DwarfPrinter.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include <ostream> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// DIEAbbrevData Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Attribute); + ID.AddInteger(Form); +} + +//===----------------------------------------------------------------------===// +// DIEAbbrev Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Tag); + ID.AddInteger(ChildrenFlag); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) + Data[i].Profile(ID); +} + +/// Emit - Print the abbreviation using the specified asm printer. +/// +void DIEAbbrev::Emit(const AsmPrinter *Asm) const { + // Emit its Dwarf tag type. + Asm->EmitULEB128Bytes(Tag); + Asm->EOL(dwarf::TagString(Tag)); + + // Emit whether it has children DIEs. + Asm->EmitULEB128Bytes(ChildrenFlag); + Asm->EOL(dwarf::ChildrenString(ChildrenFlag)); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + const DIEAbbrevData &AttrData = Data[i]; + + // Emit attribute type. + Asm->EmitULEB128Bytes(AttrData.getAttribute()); + Asm->EOL(dwarf::AttributeString(AttrData.getAttribute())); + + // Emit form type. + Asm->EmitULEB128Bytes(AttrData.getForm()); + Asm->EOL(dwarf::FormEncodingString(AttrData.getForm())); + } + + // Mark end of abbreviation. + Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(1)"); + Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(2)"); +} + +#ifndef NDEBUG +void DIEAbbrev::print(std::ostream &O) { + O << "Abbreviation @" + << std::hex << (intptr_t)this << std::dec + << " " + << dwarf::TagString(Tag) + << " " + << dwarf::ChildrenString(ChildrenFlag) + << "\n"; + + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << " " + << dwarf::AttributeString(Data[i].getAttribute()) + << " " + << dwarf::FormEncodingString(Data[i].getForm()) + << "\n"; + } +} +void DIEAbbrev::dump() { print(cerr); } +#endif + +//===----------------------------------------------------------------------===// +// DIE Implementation +//===----------------------------------------------------------------------===// + +DIE::~DIE() { + for (unsigned i = 0, N = Children.size(); i < N; ++i) + delete Children[i]; +} + +/// AddSiblingOffset - Add a sibling offset field to the front of the DIE. +/// +void DIE::AddSiblingOffset() { + DIEInteger *DI = new DIEInteger(0); + Values.insert(Values.begin(), DI); + Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIE::Profile(FoldingSetNodeID &ID) { + Abbrev.Profile(ID); + + for (unsigned i = 0, N = Children.size(); i < N; ++i) + ID.AddPointer(Children[i]); + + for (unsigned j = 0, M = Values.size(); j < M; ++j) + ID.AddPointer(Values[j]); +} + +#ifndef NDEBUG +void DIE::print(std::ostream &O, unsigned IncIndent) { + static unsigned IndentCount = 0; + IndentCount += IncIndent; + const std::string Indent(IndentCount, ' '); + bool isBlock = Abbrev.getTag() == 0; + + if (!isBlock) { + O << Indent + << "Die: " + << "0x" << std::hex << (intptr_t)this << std::dec + << ", Offset: " << Offset + << ", Size: " << Size + << "\n"; + + O << Indent + << dwarf::TagString(Abbrev.getTag()) + << " " + << dwarf::ChildrenString(Abbrev.getChildrenFlag()); + } else { + O << "Size: " << Size; + } + O << "\n"; + + const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData(); + + IndentCount += 2; + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << Indent; + + if (!isBlock) + O << dwarf::AttributeString(Data[i].getAttribute()); + else + O << "Blk[" << i << "]"; + + O << " " + << dwarf::FormEncodingString(Data[i].getForm()) + << " "; + Values[i]->print(O); + O << "\n"; + } + IndentCount -= 2; + + for (unsigned j = 0, M = Children.size(); j < M; ++j) { + Children[j]->print(O, 4); + } + + if (!isBlock) O << "\n"; + IndentCount -= IncIndent; +} + +void DIE::dump() { + print(cerr); +} +#endif + + +#ifndef NDEBUG +void DIEValue::dump() { + print(cerr); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEInteger Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit integer of appropriate size. +/// +void DIEInteger::EmitValue(Dwarf *D, unsigned Form) const { + const AsmPrinter *Asm = D->getAsm(); + switch (Form) { + case dwarf::DW_FORM_flag: // Fall thru + case dwarf::DW_FORM_ref1: // Fall thru + case dwarf::DW_FORM_data1: Asm->EmitInt8(Integer); break; + case dwarf::DW_FORM_ref2: // Fall thru + case dwarf::DW_FORM_data2: Asm->EmitInt16(Integer); break; + case dwarf::DW_FORM_ref4: // Fall thru + case dwarf::DW_FORM_data4: Asm->EmitInt32(Integer); break; + case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_data8: Asm->EmitInt64(Integer); break; + case dwarf::DW_FORM_udata: Asm->EmitULEB128Bytes(Integer); break; + case dwarf::DW_FORM_sdata: Asm->EmitSLEB128Bytes(Integer); break; + default: assert(0 && "DIE Value form not supported yet"); break; + } +} + +/// SizeOf - Determine size of integer value in bytes. +/// +unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const { + switch (Form) { + case dwarf::DW_FORM_flag: // Fall thru + case dwarf::DW_FORM_ref1: // Fall thru + case dwarf::DW_FORM_data1: return sizeof(int8_t); + case dwarf::DW_FORM_ref2: // Fall thru + case dwarf::DW_FORM_data2: return sizeof(int16_t); + case dwarf::DW_FORM_ref4: // Fall thru + case dwarf::DW_FORM_data4: return sizeof(int32_t); + case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_data8: return sizeof(int64_t); + case dwarf::DW_FORM_udata: return TargetAsmInfo::getULEB128Size(Integer); + case dwarf::DW_FORM_sdata: return TargetAsmInfo::getSLEB128Size(Integer); + default: assert(0 && "DIE Value form not supported yet"); break; + } + return 0; +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEInteger::Profile(FoldingSetNodeID &ID, unsigned Int) { + ID.AddInteger(isInteger); + ID.AddInteger(Int); +} +void DIEInteger::Profile(FoldingSetNodeID &ID) { + Profile(ID, Integer); +} + +#ifndef NDEBUG +void DIEInteger::print(std::ostream &O) { + O << "Int: " << (int64_t)Integer + << " 0x" << std::hex << Integer << std::dec; +} +#endif + +//===----------------------------------------------------------------------===// +// DIEString Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit string value. +/// +void DIEString::EmitValue(Dwarf *D, unsigned Form) const { + D->getAsm()->EmitString(Str); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEString::Profile(FoldingSetNodeID &ID, const std::string &Str) { + ID.AddInteger(isString); + ID.AddString(Str); +} +void DIEString::Profile(FoldingSetNodeID &ID) { + Profile(ID, Str); +} + +#ifndef NDEBUG +void DIEString::print(std::ostream &O) { + O << "Str: \"" << Str << "\""; +} +#endif + +//===----------------------------------------------------------------------===// +// DIEDwarfLabel Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit label value. +/// +void DIEDwarfLabel::EmitValue(Dwarf *D, unsigned Form) const { + bool IsSmall = Form == dwarf::DW_FORM_data4; + D->EmitReference(Label, false, IsSmall); +} + +/// SizeOf - Determine size of label value in bytes. +/// +unsigned DIEDwarfLabel::SizeOf(const TargetData *TD, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + return TD->getPointerSize(); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEDwarfLabel::Profile(FoldingSetNodeID &ID, const DWLabel &Label) { + ID.AddInteger(isLabel); + Label.Profile(ID); +} +void DIEDwarfLabel::Profile(FoldingSetNodeID &ID) { + Profile(ID, Label); +} + +#ifndef NDEBUG +void DIEDwarfLabel::print(std::ostream &O) { + O << "Lbl: "; + Label.print(O); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEObjectLabel Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit label value. +/// +void DIEObjectLabel::EmitValue(Dwarf *D, unsigned Form) const { + bool IsSmall = Form == dwarf::DW_FORM_data4; + D->EmitReference(Label, false, IsSmall); +} + +/// SizeOf - Determine size of label value in bytes. +/// +unsigned DIEObjectLabel::SizeOf(const TargetData *TD, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + return TD->getPointerSize(); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEObjectLabel::Profile(FoldingSetNodeID &ID, const std::string &Label) { + ID.AddInteger(isAsIsLabel); + ID.AddString(Label); +} +void DIEObjectLabel::Profile(FoldingSetNodeID &ID) { + Profile(ID, Label.c_str()); +} + +#ifndef NDEBUG +void DIEObjectLabel::print(std::ostream &O) { + O << "Obj: " << Label; +} +#endif + +//===----------------------------------------------------------------------===// +// DIESectionOffset Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit delta value. +/// +void DIESectionOffset::EmitValue(Dwarf *D, unsigned Form) const { + bool IsSmall = Form == dwarf::DW_FORM_data4; + D->EmitSectionOffset(Label.getTag(), Section.getTag(), + Label.getNumber(), Section.getNumber(), + IsSmall, IsEH, UseSet); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIESectionOffset::SizeOf(const TargetData *TD, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + return TD->getPointerSize(); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIESectionOffset::Profile(FoldingSetNodeID &ID, const DWLabel &Label, + const DWLabel &Section) { + ID.AddInteger(isSectionOffset); + Label.Profile(ID); + Section.Profile(ID); + // IsEH and UseSet are specific to the Label/Section that we will emit the + // offset for; so Label/Section are enough for uniqueness. +} +void DIESectionOffset::Profile(FoldingSetNodeID &ID) { + Profile(ID, Label, Section); +} + +#ifndef NDEBUG +void DIESectionOffset::print(std::ostream &O) { + O << "Off: "; + Label.print(O); + O << "-"; + Section.print(O); + O << "-" << IsEH << "-" << UseSet; +} +#endif + +//===----------------------------------------------------------------------===// +// DIEDelta Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit delta value. +/// +void DIEDelta::EmitValue(Dwarf *D, unsigned Form) const { + bool IsSmall = Form == dwarf::DW_FORM_data4; + D->EmitDifference(LabelHi, LabelLo, IsSmall); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEDelta::SizeOf(const TargetData *TD, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + return TD->getPointerSize(); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEDelta::Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi, + const DWLabel &LabelLo) { + ID.AddInteger(isDelta); + LabelHi.Profile(ID); + LabelLo.Profile(ID); +} +void DIEDelta::Profile(FoldingSetNodeID &ID) { + Profile(ID, LabelHi, LabelLo); +} + +#ifndef NDEBUG +void DIEDelta::print(std::ostream &O) { + O << "Del: "; + LabelHi.print(O); + O << "-"; + LabelLo.print(O); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEEntry Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit debug information entry offset. +/// +void DIEEntry::EmitValue(Dwarf *D, unsigned Form) const { + D->getAsm()->EmitInt32(Entry->getOffset()); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIEEntry::Profile(FoldingSetNodeID &ID, DIE *Entry) { + ID.AddInteger(isEntry); + ID.AddPointer(Entry); +} +void DIEEntry::Profile(FoldingSetNodeID &ID) { + ID.AddInteger(isEntry); + + if (Entry) + ID.AddPointer(Entry); + else + ID.AddPointer(this); +} + +#ifndef NDEBUG +void DIEEntry::print(std::ostream &O) { + O << "Die: 0x" << std::hex << (intptr_t)Entry << std::dec; +} +#endif + +//===----------------------------------------------------------------------===// +// DIEBlock Implementation +//===----------------------------------------------------------------------===// + +/// ComputeSize - calculate the size of the block. +/// +unsigned DIEBlock::ComputeSize(const TargetData *TD) { + if (!Size) { + const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) + Size += Values[i]->SizeOf(TD, AbbrevData[i].getForm()); + } + + return Size; +} + +/// EmitValue - Emit block data. +/// +void DIEBlock::EmitValue(Dwarf *D, unsigned Form) const { + const AsmPrinter *Asm = D->getAsm(); + switch (Form) { + case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; + case dwarf::DW_FORM_block: Asm->EmitULEB128Bytes(Size); break; + default: assert(0 && "Improper form for block"); break; + } + + const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + Asm->EOL(); + Values[i]->EmitValue(D, AbbrevData[i].getForm()); + } +} + +/// SizeOf - Determine size of block data in bytes. +/// +unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const { + switch (Form) { + case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); + case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); + case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); + case dwarf::DW_FORM_block: return Size + TargetAsmInfo::getULEB128Size(Size); + default: assert(0 && "Improper form for block"); break; + } + return 0; +} + +void DIEBlock::Profile(FoldingSetNodeID &ID) { + ID.AddInteger(isBlock); + DIE::Profile(ID); +} + +#ifndef NDEBUG +void DIEBlock::print(std::ostream &O) { + O << "Blk: "; + DIE::print(O, 5); +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h new file mode 100644 index 0000000..b14d91c --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -0,0 +1,549 @@ +//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Data structures for DWARF info entries. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DIE_H__ +#define CODEGEN_ASMPRINTER_DIE_H__ + +#include "DwarfLabel.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/raw_ostream.h" +#include <iosfwd> + +namespace llvm { + class AsmPrinter; + class Dwarf; + class TargetData; + + //===--------------------------------------------------------------------===// + /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a + /// Dwarf abbreviation. + class VISIBILITY_HIDDEN DIEAbbrevData { + /// Attribute - Dwarf attribute code. + /// + unsigned Attribute; + + /// Form - Dwarf form code. + /// + unsigned Form; + public: + DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {} + + // Accessors. + unsigned getAttribute() const { return Attribute; } + unsigned getForm() const { return Form; } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + }; + + //===--------------------------------------------------------------------===// + /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug + /// information object. + class VISIBILITY_HIDDEN DIEAbbrev : public FoldingSetNode { + /// Tag - Dwarf tag code. + /// + unsigned Tag; + + /// Unique number for node. + /// + unsigned Number; + + /// ChildrenFlag - Dwarf children flag. + /// + unsigned ChildrenFlag; + + /// Data - Raw data bytes for abbreviation. + /// + SmallVector<DIEAbbrevData, 8> Data; + public: + DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {} + virtual ~DIEAbbrev() {} + + // Accessors. + unsigned getTag() const { return Tag; } + unsigned getNumber() const { return Number; } + unsigned getChildrenFlag() const { return ChildrenFlag; } + const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; } + void setTag(unsigned T) { Tag = T; } + void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; } + void setNumber(unsigned N) { Number = N; } + + /// AddAttribute - Adds another set of attribute information to the + /// abbreviation. + void AddAttribute(unsigned Attribute, unsigned Form) { + Data.push_back(DIEAbbrevData(Attribute, Form)); + } + + /// AddFirstAttribute - Adds a set of attribute information to the front + /// of the abbreviation. + void AddFirstAttribute(unsigned Attribute, unsigned Form) { + Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form)); + } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + + /// Emit - Print the abbreviation using the specified asm printer. + /// + void Emit(const AsmPrinter *Asm) const; + +#ifndef NDEBUG + void print(std::ostream *O) { + if (O) print(*O); + } + void print(std::ostream &O); + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIE - A structured debug information entry. Has an abbreviation which + /// describes it's organization. + class CompileUnit; + class DIEValue; + + class VISIBILITY_HIDDEN DIE : public FoldingSetNode { + protected: + /// Abbrev - Buffer for constructing abbreviation. + /// + DIEAbbrev Abbrev; + + /// Offset - Offset in debug info section. + /// + unsigned Offset; + + /// Size - Size of instance + children. + /// + unsigned Size; + + /// Children DIEs. + /// + std::vector<DIE *> Children; + + /// Attributes values. + /// + SmallVector<DIEValue*, 32> Values; + + /// Abstract compile unit. + CompileUnit *AbstractCU; + public: + explicit DIE(unsigned Tag) + : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0), Size(0) {} + virtual ~DIE(); + + // Accessors. + DIEAbbrev &getAbbrev() { return Abbrev; } + unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } + unsigned getTag() const { return Abbrev.getTag(); } + unsigned getOffset() const { return Offset; } + unsigned getSize() const { return Size; } + const std::vector<DIE *> &getChildren() const { return Children; } + SmallVector<DIEValue*, 32> &getValues() { return Values; } + CompileUnit *getAbstractCompileUnit() const { return AbstractCU; } + + void setTag(unsigned Tag) { Abbrev.setTag(Tag); } + void setOffset(unsigned O) { Offset = O; } + void setSize(unsigned S) { Size = S; } + void setAbstractCompileUnit(CompileUnit *CU) { AbstractCU = CU; } + + /// AddValue - Add a value and attributes to a DIE. + /// + void AddValue(unsigned Attribute, unsigned Form, DIEValue *Value) { + Abbrev.AddAttribute(Attribute, Form); + Values.push_back(Value); + } + + /// SiblingOffset - Return the offset of the debug information entry's + /// sibling. + unsigned SiblingOffset() const { return Offset + Size; } + + /// AddSiblingOffset - Add a sibling offset field to the front of the DIE. + /// + void AddSiblingOffset(); + + /// AddChild - Add a child to the DIE. + /// + void AddChild(DIE *Child) { + Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); + Children.push_back(Child); + } + + /// Detach - Detaches objects connected to it after copying. + /// + void Detach() { + Children.clear(); + } + + /// Profile - Used to gather unique data for the value folding set. + /// + void Profile(FoldingSetNodeID &ID) ; + +#ifndef NDEBUG + void print(std::ostream *O, unsigned IncIndent = 0) { + if (O) print(*O, IncIndent); + } + void print(std::ostream &O, unsigned IncIndent = 0); + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEValue - A debug information entry value. + /// + class VISIBILITY_HIDDEN DIEValue : public FoldingSetNode { + public: + enum { + isInteger, + isString, + isLabel, + isAsIsLabel, + isSectionOffset, + isDelta, + isEntry, + isBlock + }; + protected: + /// Type - Type of data stored in the value. + /// + unsigned Type; + public: + explicit DIEValue(unsigned T) : Type(T) {} + virtual ~DIEValue() {} + + // Accessors + unsigned getType() const { return Type; } + + /// EmitValue - Emit value via the Dwarf writer. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const = 0; + + /// SizeOf - Return the size of a value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const = 0; + + /// Profile - Used to gather unique data for the value folding set. + /// + virtual void Profile(FoldingSetNodeID &ID) = 0; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *) { return true; } + +#ifndef NDEBUG + void print(std::ostream *O) { + if (O) print(*O); + } + virtual void print(std::ostream &O) = 0; + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEInteger - An integer value DIE. + /// + class VISIBILITY_HIDDEN DIEInteger : public DIEValue { + uint64_t Integer; + public: + explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} + + /// BestForm - Choose the best form for integer. + /// + static unsigned BestForm(bool IsSigned, uint64_t Int) { + if (IsSigned) { + if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1; + if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2; + if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4; + } else { + if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1; + if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2; + if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4; + } + return dwarf::DW_FORM_data8; + } + + /// EmitValue - Emit integer of appropriate size. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of integer value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, unsigned Int); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEInteger *) { return true; } + static bool classof(const DIEValue *I) { return I->getType() == isInteger; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEString - A string value DIE. + /// + class VISIBILITY_HIDDEN DIEString : public DIEValue { + const std::string Str; + public: + explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {} + + /// EmitValue - Emit string value. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of string value in bytes. + /// + virtual unsigned SizeOf(const TargetData *, unsigned /*Form*/) const { + return Str.size() + sizeof(char); // sizeof('\0'); + } + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const std::string &Str); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEString *) { return true; } + static bool classof(const DIEValue *S) { return S->getType() == isString; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEDwarfLabel - A Dwarf internal label expression DIE. + // + class VISIBILITY_HIDDEN DIEDwarfLabel : public DIEValue { + const DWLabel Label; + public: + explicit DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {} + + /// EmitValue - Emit label value. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of label value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const DWLabel &Label); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEDwarfLabel *) { return true; } + static bool classof(const DIEValue *L) { return L->getType() == isLabel; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEObjectLabel - A label to an object in code or data. + // + class VISIBILITY_HIDDEN DIEObjectLabel : public DIEValue { + const std::string Label; + public: + explicit DIEObjectLabel(const std::string &L) + : DIEValue(isAsIsLabel), Label(L) {} + + /// EmitValue - Emit label value. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of label value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const std::string &Label); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEObjectLabel *) { return true; } + static bool classof(const DIEValue *L) { + return L->getType() == isAsIsLabel; + } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIESectionOffset - A section offset DIE. + /// + class VISIBILITY_HIDDEN DIESectionOffset : public DIEValue { + const DWLabel Label; + const DWLabel Section; + bool IsEH : 1; + bool UseSet : 1; + public: + DIESectionOffset(const DWLabel &Lab, const DWLabel &Sec, + bool isEH = false, bool useSet = true) + : DIEValue(isSectionOffset), Label(Lab), Section(Sec), + IsEH(isEH), UseSet(useSet) {} + + /// EmitValue - Emit section offset. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of section offset value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const DWLabel &Label, + const DWLabel &Section); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIESectionOffset *) { return true; } + static bool classof(const DIEValue *D) { + return D->getType() == isSectionOffset; + } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEDelta - A simple label difference DIE. + /// + class VISIBILITY_HIDDEN DIEDelta : public DIEValue { + const DWLabel LabelHi; + const DWLabel LabelLo; + public: + DIEDelta(const DWLabel &Hi, const DWLabel &Lo) + : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {} + + /// EmitValue - Emit delta value. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of delta value in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi, + const DWLabel &LabelLo); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEDelta *) { return true; } + static bool classof(const DIEValue *D) { return D->getType() == isDelta; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEntry - A pointer to another debug information entry. An instance of + /// this class can also be used as a proxy for a debug information entry not + /// yet defined (ie. types.) + class VISIBILITY_HIDDEN DIEEntry : public DIEValue { + DIE *Entry; + public: + explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} + + DIE *getEntry() const { return Entry; } + void setEntry(DIE *E) { Entry = E; } + + /// EmitValue - Emit debug information entry offset. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of debug information entry in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const { + return sizeof(int32_t); + } + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, DIE *Entry); + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEEntry *) { return true; } + static bool classof(const DIEValue *E) { return E->getType() == isEntry; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEBlock - A block of values. Primarily used for location expressions. + // + class VISIBILITY_HIDDEN DIEBlock : public DIEValue, public DIE { + unsigned Size; // Size in bytes excluding size header. + public: + DIEBlock() + : DIEValue(isBlock), DIE(0), Size(0) {} + virtual ~DIEBlock() {} + + /// ComputeSize - calculate the size of the block. + /// + unsigned ComputeSize(const TargetData *TD); + + /// BestForm - Choose the best form for data. + /// + unsigned BestForm() const { + if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1; + if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2; + if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4; + return dwarf::DW_FORM_block; + } + + /// EmitValue - Emit block data. + /// + virtual void EmitValue(Dwarf *D, unsigned Form) const; + + /// SizeOf - Determine size of block data in bytes. + /// + virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + virtual void Profile(FoldingSetNodeID &ID); + + // Implement isa/cast/dyncast. + static bool classof(const DIEBlock *) { return true; } + static bool classof(const DIEValue *E) { return E->getType() == isBlock; } + +#ifndef NDEBUG + virtual void print(std::ostream &O); +#endif + }; + +} // end llvm namespace + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp new file mode 100644 index 0000000..25217b0 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -0,0 +1,2610 @@ +//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfDebug.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/Timer.h" +#include "llvm/System/Path.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include <ostream> +using namespace llvm; + +static TimerGroup &getDwarfTimerGroup() { + static TimerGroup DwarfTimerGroup("Dwarf Debugging"); + return DwarfTimerGroup; +} + +//===----------------------------------------------------------------------===// + +/// Configuration values for initial hash set sizes (log2). +/// +static const unsigned InitDiesSetSize = 9; // log2(512) +static const unsigned InitAbbreviationsSetSize = 9; // log2(512) +static const unsigned InitValuesSetSize = 9; // log2(512) + +namespace llvm { + +//===----------------------------------------------------------------------===// +/// CompileUnit - This dwarf writer support class manages information associate +/// with a source file. +class VISIBILITY_HIDDEN CompileUnit { + /// ID - File identifier for source. + /// + unsigned ID; + + /// Die - Compile unit debug information entry. + /// + DIE *Die; + + /// GVToDieMap - Tracks the mapping of unit level debug informaton + /// variables to debug information entries. + std::map<GlobalVariable *, DIE *> GVToDieMap; + + /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// descriptors to debug information entries using a DIEEntry proxy. + std::map<GlobalVariable *, DIEEntry *> GVToDIEEntryMap; + + /// Globals - A map of globally visible named entities for this unit. + /// + StringMap<DIE*> Globals; + + /// DiesSet - Used to uniquely define dies within the compile unit. + /// + FoldingSet<DIE> DiesSet; +public: + CompileUnit(unsigned I, DIE *D) + : ID(I), Die(D), DiesSet(InitDiesSetSize) {} + ~CompileUnit() { delete Die; } + + // Accessors. + unsigned getID() const { return ID; } + DIE* getDie() const { return Die; } + StringMap<DIE*> &getGlobals() { return Globals; } + + /// hasContent - Return true if this compile unit has something to write out. + /// + bool hasContent() const { return !Die->getChildren().empty(); } + + /// AddGlobal - Add a new global entity to the compile unit. + /// + void AddGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; } + + /// getDieMapSlotFor - Returns the debug information entry map slot for the + /// specified debug variable. + DIE *&getDieMapSlotFor(GlobalVariable *GV) { return GVToDieMap[GV]; } + + /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for the + /// specified debug variable. + DIEEntry *&getDIEEntrySlotFor(GlobalVariable *GV) { + return GVToDIEEntryMap[GV]; + } + + /// AddDie - Adds or interns the DIE to the compile unit. + /// + DIE *AddDie(DIE &Buffer) { + FoldingSetNodeID ID; + Buffer.Profile(ID); + void *Where; + DIE *Die = DiesSet.FindNodeOrInsertPos(ID, Where); + + if (!Die) { + Die = new DIE(Buffer); + DiesSet.InsertNode(Die, Where); + this->Die->AddChild(Die); + Buffer.Detach(); + } + + return Die; + } +}; + +//===----------------------------------------------------------------------===// +/// DbgVariable - This class is used to track local variable information. +/// +class VISIBILITY_HIDDEN DbgVariable { + DIVariable Var; // Variable Descriptor. + unsigned FrameIndex; // Variable frame index. + bool InlinedFnVar; // Variable for an inlined function. +public: + DbgVariable(DIVariable V, unsigned I, bool IFV) + : Var(V), FrameIndex(I), InlinedFnVar(IFV) {} + + // Accessors. + DIVariable getVariable() const { return Var; } + unsigned getFrameIndex() const { return FrameIndex; } + bool isInlinedFnVar() const { return InlinedFnVar; } +}; + +//===----------------------------------------------------------------------===// +/// DbgScope - This class is used to track scope information. +/// +class DbgConcreteScope; +class VISIBILITY_HIDDEN DbgScope { + DbgScope *Parent; // Parent to this scope. + DIDescriptor Desc; // Debug info descriptor for scope. + // Either subprogram or block. + unsigned StartLabelID; // Label ID of the beginning of scope. + unsigned EndLabelID; // Label ID of the end of scope. + SmallVector<DbgScope *, 4> Scopes; // Scopes defined in scope. + SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope. + SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs. +public: + DbgScope(DbgScope *P, DIDescriptor D) + : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0) {} + virtual ~DbgScope(); + + // Accessors. + DbgScope *getParent() const { return Parent; } + DIDescriptor getDesc() const { return Desc; } + unsigned getStartLabelID() const { return StartLabelID; } + unsigned getEndLabelID() const { return EndLabelID; } + SmallVector<DbgScope *, 4> &getScopes() { return Scopes; } + SmallVector<DbgVariable *, 8> &getVariables() { return Variables; } + SmallVector<DbgConcreteScope*,8> &getConcreteInsts() { return ConcreteInsts; } + void setStartLabelID(unsigned S) { StartLabelID = S; } + void setEndLabelID(unsigned E) { EndLabelID = E; } + + /// AddScope - Add a scope to the scope. + /// + void AddScope(DbgScope *S) { Scopes.push_back(S); } + + /// AddVariable - Add a variable to the scope. + /// + void AddVariable(DbgVariable *V) { Variables.push_back(V); } + + /// AddConcreteInst - Add a concrete instance to the scope. + /// + void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); } + +#ifndef NDEBUG + void dump() const; +#endif +}; + +#ifndef NDEBUG +void DbgScope::dump() const { + static unsigned IndentLevel = 0; + std::string Indent(IndentLevel, ' '); + + cerr << Indent; Desc.dump(); + cerr << " [" << StartLabelID << ", " << EndLabelID << "]\n"; + + IndentLevel += 2; + + for (unsigned i = 0, e = Scopes.size(); i != e; ++i) + if (Scopes[i] != this) + Scopes[i]->dump(); + + IndentLevel -= 2; +} +#endif + +//===----------------------------------------------------------------------===// +/// DbgConcreteScope - This class is used to track a scope that holds concrete +/// instance information. +/// +class VISIBILITY_HIDDEN DbgConcreteScope : public DbgScope { + CompileUnit *Unit; + DIE *Die; // Debug info for this concrete scope. +public: + DbgConcreteScope(DIDescriptor D) : DbgScope(NULL, D) {} + + // Accessors. + DIE *getDie() const { return Die; } + void setDie(DIE *D) { Die = D; } +}; + +DbgScope::~DbgScope() { + for (unsigned i = 0, N = Scopes.size(); i < N; ++i) + delete Scopes[i]; + for (unsigned j = 0, M = Variables.size(); j < M; ++j) + delete Variables[j]; + for (unsigned k = 0, O = ConcreteInsts.size(); k < O; ++k) + delete ConcreteInsts[k]; +} + +} // end llvm namespace + +DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T) + : Dwarf(OS, A, T, "dbg"), MainCU(0), + AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(), + ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionMap(), + SectionSourceLines(), didInitial(false), shouldEmit(false), + FunctionDbgScope(0), DebugTimer(0) { + if (TimePassesIsEnabled) + DebugTimer = new Timer("Dwarf Debug Writer", + getDwarfTimerGroup()); +} +DwarfDebug::~DwarfDebug() { + for (unsigned j = 0, M = Values.size(); j < M; ++j) + delete Values[j]; + + for (DenseMap<const GlobalVariable *, DbgScope *>::iterator + I = AbstractInstanceRootMap.begin(), + E = AbstractInstanceRootMap.end(); I != E;++I) + delete I->second; + + delete DebugTimer; +} + +/// AssignAbbrevNumber - Define a unique number for the abbreviation. +/// +void DwarfDebug::AssignAbbrevNumber(DIEAbbrev &Abbrev) { + // Profile the node so that we can make it unique. + FoldingSetNodeID ID; + Abbrev.Profile(ID); + + // Check the set for priors. + DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev); + + // If it's newly added. + if (InSet == &Abbrev) { + // Add to abbreviation list. + Abbreviations.push_back(&Abbrev); + + // Assign the vector position + 1 as its number. + Abbrev.setNumber(Abbreviations.size()); + } else { + // Assign existing abbreviation number. + Abbrev.setNumber(InSet->getNumber()); + } +} + +/// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug +/// information entry. +DIEEntry *DwarfDebug::CreateDIEEntry(DIE *Entry) { + DIEEntry *Value; + + if (Entry) { + FoldingSetNodeID ID; + DIEEntry::Profile(ID, Entry); + void *Where; + Value = static_cast<DIEEntry *>(ValuesSet.FindNodeOrInsertPos(ID, Where)); + + if (Value) return Value; + + Value = new DIEEntry(Entry); + ValuesSet.InsertNode(Value, Where); + } else { + Value = new DIEEntry(Entry); + } + + Values.push_back(Value); + return Value; +} + +/// SetDIEEntry - Set a DIEEntry once the debug information entry is defined. +/// +void DwarfDebug::SetDIEEntry(DIEEntry *Value, DIE *Entry) { + Value->setEntry(Entry); + + // Add to values set if not already there. If it is, we merely have a + // duplicate in the values list (no harm.) + ValuesSet.GetOrInsertNode(Value); +} + +/// AddUInt - Add an unsigned integer attribute data and value. +/// +void DwarfDebug::AddUInt(DIE *Die, unsigned Attribute, + unsigned Form, uint64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(false, Integer); + + FoldingSetNodeID ID; + DIEInteger::Profile(ID, Integer); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEInteger(Integer); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddSInt - Add an signed integer attribute data and value. +/// +void DwarfDebug::AddSInt(DIE *Die, unsigned Attribute, + unsigned Form, int64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(true, Integer); + + FoldingSetNodeID ID; + DIEInteger::Profile(ID, (uint64_t)Integer); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEInteger(Integer); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddString - Add a string attribute data and value. +/// +void DwarfDebug::AddString(DIE *Die, unsigned Attribute, unsigned Form, + const std::string &String) { + FoldingSetNodeID ID; + DIEString::Profile(ID, String); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEString(String); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddLabel - Add a Dwarf label attribute data and value. +/// +void DwarfDebug::AddLabel(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Label) { + FoldingSetNodeID ID; + DIEDwarfLabel::Profile(ID, Label); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEDwarfLabel(Label); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddObjectLabel - Add an non-Dwarf label attribute data and value. +/// +void DwarfDebug::AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form, + const std::string &Label) { + FoldingSetNodeID ID; + DIEObjectLabel::Profile(ID, Label); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEObjectLabel(Label); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddSectionOffset - Add a section offset label attribute data and value. +/// +void DwarfDebug::AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Label, const DWLabel &Section, + bool isEH, bool useSet) { + FoldingSetNodeID ID; + DIESectionOffset::Profile(ID, Label, Section); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIESectionOffset(Label, Section, isEH, useSet); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddDelta - Add a label delta attribute data and value. +/// +void DwarfDebug::AddDelta(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Hi, const DWLabel &Lo) { + FoldingSetNodeID ID; + DIEDelta::Profile(ID, Hi, Lo); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = new DIEDelta(Hi, Lo); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); +} + +/// AddBlock - Add block data. +/// +void DwarfDebug::AddBlock(DIE *Die, unsigned Attribute, unsigned Form, + DIEBlock *Block) { + Block->ComputeSize(TD); + FoldingSetNodeID ID; + Block->Profile(ID); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + + if (!Value) { + Value = Block; + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } else { + // Already exists, reuse the previous one. + delete Block; + Block = cast<DIEBlock>(Value); + } + + Die->AddValue(Attribute, Block->BestForm(), Value); +} + +/// AddSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::AddSourceLine(DIE *Die, const DIVariable *V) { + // If there is no compile unit specified, don't add a line #. + if (V->getCompileUnit().isNull()) + return; + + unsigned Line = V->getLineNumber(); + unsigned FileID = FindCompileUnit(V->getCompileUnit()).getID(); + assert(FileID && "Invalid file id"); + AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// AddSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::AddSourceLine(DIE *Die, const DIGlobal *G) { + // If there is no compile unit specified, don't add a line #. + if (G->getCompileUnit().isNull()) + return; + + unsigned Line = G->getLineNumber(); + unsigned FileID = FindCompileUnit(G->getCompileUnit()).getID(); + assert(FileID && "Invalid file id"); + AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} +void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) { + // If there is no compile unit specified, don't add a line #. + DICompileUnit CU = Ty->getCompileUnit(); + if (CU.isNull()) + return; + + unsigned Line = Ty->getLineNumber(); + unsigned FileID = FindCompileUnit(CU).getID(); + assert(FileID && "Invalid file id"); + AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// AddAddress - Add an address attribute to a die based on the location +/// provided. +void DwarfDebug::AddAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location) { + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + } else { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + } else { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + AddBlock(Die, Attribute, 0, Block); +} + +/// AddType - Add a new type attribute to the specified entity. +void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { + if (Ty.isNull()) + return; + + // Check for pre-existence. + DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getGV()); + + // If it exists then use the existing value. + if (Slot) { + Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot); + return; + } + + // Set up proxy. + Slot = CreateDIEEntry(); + + // Construct type. + DIE Buffer(dwarf::DW_TAG_base_type); + if (Ty.isBasicType(Ty.getTag())) + ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getGV())); + else if (Ty.isDerivedType(Ty.getTag())) + ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getGV())); + else { + assert(Ty.isCompositeType(Ty.getTag()) && "Unknown kind of DIType"); + ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getGV())); + } + + // Add debug information entry to entity and appropriate context. + DIE *Die = NULL; + DIDescriptor Context = Ty.getContext(); + if (!Context.isNull()) + Die = DW_Unit->getDieMapSlotFor(Context.getGV()); + + if (Die) { + DIE *Child = new DIE(Buffer); + Die->AddChild(Child); + Buffer.Detach(); + SetDIEEntry(Slot, Child); + } else { + Die = DW_Unit->AddDie(Buffer); + SetDIEEntry(Slot, Die); + } + + Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot); +} + +/// ConstructTypeDIE - Construct basic type die from DIBasicType. +void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DIBasicType BTy) { + // Get core information. + std::string Name; + BTy.getName(Name); + Buffer.setTag(dwarf::DW_TAG_base_type); + AddUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + uint64_t Size = BTy.getSizeInBits() >> 3; + AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); +} + +/// ConstructTypeDIE - Construct derived type die from DIDerivedType. +void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DIDerivedType DTy) { + // Get core information. + std::string Name; + DTy.getName(Name); + uint64_t Size = DTy.getSizeInBits() >> 3; + unsigned Tag = DTy.getTag(); + + // FIXME - Workaround for templates. + if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; + + Buffer.setTag(Tag); + + // Map to main type, void will not have a type. + DIType FromTy = DTy.getTypeDerivedFrom(); + AddType(DW_Unit, &Buffer, FromTy); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + + // Add source line info if available and TyDesc is not a forward declaration. + if (!DTy.isForwardDecl()) + AddSourceLine(&Buffer, &DTy); +} + +/// ConstructTypeDIE - Construct type DIE from DICompositeType. +void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DICompositeType CTy) { + // Get core information. + std::string Name; + CTy.getName(Name); + + uint64_t Size = CTy.getSizeInBits() >> 3; + unsigned Tag = CTy.getTag(); + Buffer.setTag(Tag); + + switch (Tag) { + case dwarf::DW_TAG_vector_type: + case dwarf::DW_TAG_array_type: + ConstructArrayTypeDIE(DW_Unit, Buffer, &CTy); + break; + case dwarf::DW_TAG_enumeration_type: { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIE *ElemDie = NULL; + DIEnumerator Enum(Elements.getElement(i).getGV()); + ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum); + Buffer.AddChild(ElemDie); + } + } + break; + case dwarf::DW_TAG_subroutine_type: { + // Add return type. + DIArray Elements = CTy.getTypeArray(); + DIDescriptor RTy = Elements.getElement(0); + AddType(DW_Unit, &Buffer, DIType(RTy.getGV())); + + // Add prototype flag. + AddUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + + // Add arguments. + for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIDescriptor Ty = Elements.getElement(i); + AddType(DW_Unit, Arg, DIType(Ty.getGV())); + Buffer.AddChild(Arg); + } + } + break; + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: { + // Add elements to structure type. + DIArray Elements = CTy.getTypeArray(); + + // A forward struct declared type may not have elements available. + if (Elements.isNull()) + break; + + // Add elements to structure type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIE *ElemDie = NULL; + if (Element.getTag() == dwarf::DW_TAG_subprogram) + ElemDie = CreateSubprogramDIE(DW_Unit, + DISubprogram(Element.getGV())); + else if (Element.getTag() == dwarf::DW_TAG_variable) // ?? + ElemDie = CreateGlobalVariableDIE(DW_Unit, + DIGlobalVariable(Element.getGV())); + else + ElemDie = CreateMemberDIE(DW_Unit, + DIDerivedType(Element.getGV())); + Buffer.AddChild(ElemDie); + } + + // FIXME: We'd like an API to register additional attributes for the + // frontend to use while synthesizing, and then we'd use that api in clang + // instead of this. + if (Name == "__block_literal_generic") + AddUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + AddUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); + break; + } + default: + break; + } + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + if (Tag == dwarf::DW_TAG_enumeration_type || + Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + else { + // Add zero size if it is not a forward declaration. + if (CTy.isForwardDecl()) + AddUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + else + AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + } + + // Add source line info if available. + if (!CTy.isForwardDecl()) + AddSourceLine(&Buffer, &CTy); + } +} + +/// ConstructSubrangeDIE - Construct subrange DIE from DISubrange. +void DwarfDebug::ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ + int64_t L = SR.getLo(); + int64_t H = SR.getHi(); + DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); + + if (L != H) { + AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + if (L) + AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + AddSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + } + + Buffer.AddChild(DW_Subrange); +} + +/// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType. +void DwarfDebug::ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DICompositeType *CTy) { + Buffer.setTag(dwarf::DW_TAG_array_type); + if (CTy->getTag() == dwarf::DW_TAG_vector_type) + AddUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + + // Emit derived type. + AddType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom()); + DIArray Elements = CTy->getTypeArray(); + + // Construct an anonymous type for index type. + DIE IdxBuffer(dwarf::DW_TAG_base_type); + AddUInt(&IdxBuffer, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + AddUInt(&IdxBuffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_signed); + DIE *IndexTy = DW_Unit->AddDie(IdxBuffer); + + // Add subranges to array type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + if (Element.getTag() == dwarf::DW_TAG_subrange_type) + ConstructSubrangeDIE(Buffer, DISubrange(Element.getGV()), IndexTy); + } +} + +/// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator. +DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { + DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); + std::string Name; + ETy->getName(Name); + AddString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + int64_t Value = ETy->getEnumValue(); + AddSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + return Enumerator; +} + +/// CreateGlobalVariableDIE - Create new DIE using GV. +DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit, + const DIGlobalVariable &GV) { + DIE *GVDie = new DIE(dwarf::DW_TAG_variable); + std::string Name; + GV.getDisplayName(Name); + AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + std::string LinkageName; + GV.getLinkageName(LinkageName); + if (!LinkageName.empty()) + AddString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + LinkageName); + AddType(DW_Unit, GVDie, GV.getType()); + if (!GV.isLocalToUnit()) + AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + AddSourceLine(GVDie, &GV); + return GVDie; +} + +/// CreateMemberDIE - Create new member DIE. +DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){ + DIE *MemberDie = new DIE(DT.getTag()); + std::string Name; + DT.getName(Name); + if (!Name.empty()) + AddString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + AddType(DW_Unit, MemberDie, DT.getTypeDerivedFrom()); + + AddSourceLine(MemberDie, &DT); + + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = DT.getOriginalTypeSize(); + + if (Size != FieldSize) { + // Handle bitfield. + AddUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); + AddUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t FieldOffset = Offset; + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size); + AddUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); + } + + DIEBlock *Block = new DIEBlock(); + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + AddUInt(Block, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); + AddBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, Block); + + if (DT.isProtected()) + AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0, + dwarf::DW_ACCESS_private); + + return MemberDie; +} + +/// CreateSubprogramDIE - Create new DIE using SP. +DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, + const DISubprogram &SP, + bool IsConstructor, + bool IsInlined) { + DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram); + + std::string Name; + SP.getName(Name); + AddString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + std::string LinkageName; + SP.getLinkageName(LinkageName); + + if (!LinkageName.empty()) + AddString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + LinkageName); + + AddSourceLine(SPDie, &SP); + + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + + // Add prototyped tag, if C or ObjC. + unsigned Lang = SP.getCompileUnit().getLanguage(); + if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 || + Lang == dwarf::DW_LANG_ObjC) + AddUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + + // Add Return Type. + unsigned SPTag = SPTy.getTag(); + if (!IsConstructor) { + if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type) + AddType(DW_Unit, SPDie, SPTy); + else + AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getGV())); + } + + if (!SP.isDefinition()) { + AddUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + + // Add arguments. Do not add arguments for subprogram definition. They will + // be handled through RecordVariable. + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + AddType(DW_Unit, Arg, DIType(Args.getElement(i).getGV())); + AddUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? + SPDie->AddChild(Arg); + } + } + + if (!SP.isLocalToUnit() && !IsInlined) + AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + + // DW_TAG_inlined_subroutine may refer to this DIE. + DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getGV()); + Slot = SPDie; + return SPDie; +} + +/// FindCompileUnit - Get the compile unit for the given descriptor. +/// +CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const { + DenseMap<Value *, CompileUnit *>::const_iterator I = + CompileUnitMap.find(Unit.getGV()); + assert(I != CompileUnitMap.end() && "Missing compile unit."); + return *I->second; +} + +/// CreateDbgScopeVariable - Create a new scope variable. +/// +DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { + // Get the descriptor. + const DIVariable &VD = DV->getVariable(); + + // Translate tag to proper Dwarf tag. The result variable is dropped for + // now. + unsigned Tag; + switch (VD.getTag()) { + case dwarf::DW_TAG_return_variable: + return NULL; + case dwarf::DW_TAG_arg_variable: + Tag = dwarf::DW_TAG_formal_parameter; + break; + case dwarf::DW_TAG_auto_variable: // fall thru + default: + Tag = dwarf::DW_TAG_variable; + break; + } + + // Define variable debug information entry. + DIE *VariableDie = new DIE(Tag); + std::string Name; + VD.getName(Name); + AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + // Add source line info if available. + AddSourceLine(VariableDie, &VD); + + // Add variable type. + AddType(Unit, VariableDie, VD.getType()); + + // Add variable address. + if (!DV->isInlinedFnVar()) { + // Variables for abstract instances of inlined functions don't get a + // location. + MachineLocation Location; + Location.set(RI->getFrameRegister(*MF), + RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); + AddAddress(VariableDie, dwarf::DW_AT_location, Location); + } + + return VariableDie; +} + +/// getOrCreateScope - Returns the scope associated with the given descriptor. +/// +DbgScope *DwarfDebug::getOrCreateScope(GlobalVariable *V) { + DbgScope *&Slot = DbgScopeMap[V]; + if (Slot) return Slot; + + DbgScope *Parent = NULL; + DIBlock Block(V); + + // Don't create a new scope if we already created one for an inlined function. + DenseMap<const GlobalVariable *, DbgScope *>::iterator + II = AbstractInstanceRootMap.find(V); + if (II != AbstractInstanceRootMap.end()) + return LexicalScopeStack.back(); + + if (!Block.isNull()) { + DIDescriptor ParentDesc = Block.getContext(); + Parent = + ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getGV()); + } + + Slot = new DbgScope(Parent, DIDescriptor(V)); + + if (Parent) + Parent->AddScope(Slot); + else + // First function is top level function. + FunctionDbgScope = Slot; + + return Slot; +} + +/// ConstructDbgScope - Construct the components of a scope. +/// +void DwarfDebug::ConstructDbgScope(DbgScope *ParentScope, + unsigned ParentStartID, + unsigned ParentEndID, + DIE *ParentDie, CompileUnit *Unit) { + // Add variables to scope. + SmallVector<DbgVariable *, 8> &Variables = ParentScope->getVariables(); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) { + DIE *VariableDie = CreateDbgScopeVariable(Variables[i], Unit); + if (VariableDie) ParentDie->AddChild(VariableDie); + } + + // Add concrete instances to scope. + SmallVector<DbgConcreteScope *, 8> &ConcreteInsts = + ParentScope->getConcreteInsts(); + for (unsigned i = 0, N = ConcreteInsts.size(); i < N; ++i) { + DbgConcreteScope *ConcreteInst = ConcreteInsts[i]; + DIE *Die = ConcreteInst->getDie(); + + unsigned StartID = ConcreteInst->getStartLabelID(); + unsigned EndID = ConcreteInst->getEndLabelID(); + + // Add the scope bounds. + if (StartID) + AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("label", StartID)); + else + AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + + if (EndID) + AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("label", EndID)); + else + AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + + ParentDie->AddChild(Die); + } + + // Add nested scopes. + SmallVector<DbgScope *, 4> &Scopes = ParentScope->getScopes(); + for (unsigned j = 0, M = Scopes.size(); j < M; ++j) { + // Define the Scope debug information entry. + DbgScope *Scope = Scopes[j]; + + unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID()); + unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID()); + + // Ignore empty scopes. + if (StartID == EndID && StartID != 0) continue; + + // Do not ignore inlined scopes even if they don't have any variables or + // scopes. + if (Scope->getScopes().empty() && Scope->getVariables().empty() && + Scope->getConcreteInsts().empty()) + continue; + + if (StartID == ParentStartID && EndID == ParentEndID) { + // Just add stuff to the parent scope. + ConstructDbgScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit); + } else { + DIE *ScopeDie = new DIE(dwarf::DW_TAG_lexical_block); + + // Add the scope bounds. + if (StartID) + AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("label", StartID)); + else + AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + + if (EndID) + AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("label", EndID)); + else + AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + + // Add the scope's contents. + ConstructDbgScope(Scope, StartID, EndID, ScopeDie, Unit); + ParentDie->AddChild(ScopeDie); + } + } +} + +/// ConstructFunctionDbgScope - Construct the scope for the subprogram. +/// +void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope, + bool AbstractScope) { + // Exit if there is no root scope. + if (!RootScope) return; + DIDescriptor Desc = RootScope->getDesc(); + if (Desc.isNull()) + return; + + // Get the subprogram debug information entry. + DISubprogram SPD(Desc.getGV()); + + // Get the compile unit context. + CompileUnit *Unit = MainCU; + if (!Unit) + Unit = &FindCompileUnit(SPD.getCompileUnit()); + + // Get the subprogram die. + DIE *SPDie = Unit->getDieMapSlotFor(SPD.getGV()); + assert(SPDie && "Missing subprogram descriptor"); + + if (!AbstractScope) { + // Add the function bounds. + AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + MachineLocation Location(RI->getFrameRegister(*MF)); + AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); + } + + ConstructDbgScope(RootScope, 0, 0, SPDie, Unit); +} + +/// ConstructDefaultDbgScope - Construct a default scope for the subprogram. +/// +void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) { + const char *FnName = MF->getFunction()->getNameStart(); + if (MainCU) { + StringMap<DIE*> &Globals = MainCU->getGlobals(); + StringMap<DIE*>::iterator GI = Globals.find(FnName); + if (GI != Globals.end()) { + DIE *SPDie = GI->second; + + // Add the function bounds. + AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + + MachineLocation Location(RI->getFrameRegister(*MF)); + AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); + return; + } + } else { + for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) { + CompileUnit *Unit = CompileUnits[i]; + StringMap<DIE*> &Globals = Unit->getGlobals(); + StringMap<DIE*>::iterator GI = Globals.find(FnName); + if (GI != Globals.end()) { + DIE *SPDie = GI->second; + + // Add the function bounds. + AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + + MachineLocation Location(RI->getFrameRegister(*MF)); + AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); + return; + } + } + } + +#if 0 + // FIXME: This is causing an abort because C++ mangled names are compared with + // their unmangled counterparts. See PR2885. Don't do this assert. + assert(0 && "Couldn't find DIE for machine function!"); +#endif +} + +/// GetOrCreateSourceID - Look up the source id with the given directory and +/// source file names. If none currently exists, create a new id and insert it +/// in the SourceIds map. This can update DirectoryNames and SourceFileNames +/// maps as well. +unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName, + const std::string &FileName) { + unsigned DId; + StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName); + if (DI != DirectoryIdMap.end()) { + DId = DI->getValue(); + } else { + DId = DirectoryNames.size() + 1; + DirectoryIdMap[DirName] = DId; + DirectoryNames.push_back(DirName); + } + + unsigned FId; + StringMap<unsigned>::iterator FI = SourceFileIdMap.find(FileName); + if (FI != SourceFileIdMap.end()) { + FId = FI->getValue(); + } else { + FId = SourceFileNames.size() + 1; + SourceFileIdMap[FileName] = FId; + SourceFileNames.push_back(FileName); + } + + DenseMap<std::pair<unsigned, unsigned>, unsigned>::iterator SI = + SourceIdMap.find(std::make_pair(DId, FId)); + if (SI != SourceIdMap.end()) + return SI->second; + + unsigned SrcId = SourceIds.size() + 1; // DW_AT_decl_file cannot be 0. + SourceIdMap[std::make_pair(DId, FId)] = SrcId; + SourceIds.push_back(std::make_pair(DId, FId)); + + return SrcId; +} + +void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) { + DICompileUnit DIUnit(GV); + std::string Dir, FN, Prod; + unsigned ID = GetOrCreateSourceID(DIUnit.getDirectory(Dir), + DIUnit.getFilename(FN)); + + DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); + AddSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + DWLabel("section_line", 0), DWLabel("section_line", 0), + false); + AddString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, + DIUnit.getProducer(Prod)); + AddUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, + DIUnit.getLanguage()); + AddString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); + + if (!Dir.empty()) + AddString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); + if (DIUnit.isOptimized()) + AddUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + + std::string Flags; + DIUnit.getFlags(Flags); + if (!Flags.empty()) + AddString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); + + unsigned RVer = DIUnit.getRunTimeVersion(); + if (RVer) + AddUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + dwarf::DW_FORM_data1, RVer); + + CompileUnit *Unit = new CompileUnit(ID, Die); + if (DIUnit.isMain()) { + assert(!MainCU && "Multiple main compile units are found!"); + MainCU = Unit; + } + + CompileUnitMap[DIUnit.getGV()] = Unit; + CompileUnits.push_back(Unit); +} + +/// ConstructCompileUnits - Create a compile unit DIEs. +void DwarfDebug::ConstructCompileUnits() { + GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.compile_units"); + if (!Root) + return; + assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() && + "Malformed compile unit descriptor anchor type"); + Constant *RootC = cast<Constant>(*Root->use_begin()); + assert(RootC->hasNUsesOrMore(1) && + "Malformed compile unit descriptor anchor type"); + + for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end(); + UI != UE; ++UI) + for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end(); + UUI != UUE; ++UUI) { + GlobalVariable *GV = cast<GlobalVariable>(*UUI); + ConstructCompileUnit(GV); + } +} + +bool DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) { + DIGlobalVariable DI_GV(GV); + CompileUnit *DW_Unit = MainCU; + if (!DW_Unit) + DW_Unit = &FindCompileUnit(DI_GV.getCompileUnit()); + + // Check for pre-existence. + DIE *&Slot = DW_Unit->getDieMapSlotFor(DI_GV.getGV()); + if (Slot) + return false; + + DIE *VariableDie = CreateGlobalVariableDIE(DW_Unit, DI_GV); + + // Add address. + DIEBlock *Block = new DIEBlock(); + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + std::string GLN; + AddObjectLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->getGlobalLinkName(DI_GV.getGlobal(), GLN)); + AddBlock(VariableDie, dwarf::DW_AT_location, 0, Block); + + // Add to map. + Slot = VariableDie; + + // Add to context owner. + DW_Unit->getDie()->AddChild(VariableDie); + + // Expose as global. FIXME - need to check external flag. + std::string Name; + DW_Unit->AddGlobal(DI_GV.getName(Name), VariableDie); + return true; +} + +/// ConstructGlobalVariableDIEs - Create DIEs for each of the externally visible +/// global variables. Return true if at least one global DIE is created. +bool DwarfDebug::ConstructGlobalVariableDIEs() { + GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.global_variables"); + if (!Root) + return false; + + assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() && + "Malformed global variable descriptor anchor type"); + Constant *RootC = cast<Constant>(*Root->use_begin()); + assert(RootC->hasNUsesOrMore(1) && + "Malformed global variable descriptor anchor type"); + + bool Result = false; + for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end(); + UI != UE; ++UI) + for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end(); + UUI != UUE; ++UUI) + Result |= ConstructGlobalVariableDIE(cast<GlobalVariable>(*UUI)); + + return Result; +} + +bool DwarfDebug::ConstructSubprogram(GlobalVariable *GV) { + DISubprogram SP(GV); + CompileUnit *Unit = MainCU; + if (!Unit) + Unit = &FindCompileUnit(SP.getCompileUnit()); + + // Check for pre-existence. + DIE *&Slot = Unit->getDieMapSlotFor(GV); + if (Slot) + return false; + + if (!SP.isDefinition()) + // This is a method declaration which will be handled while constructing + // class type. + return false; + + DIE *SubprogramDie = CreateSubprogramDIE(Unit, SP); + + // Add to map. + Slot = SubprogramDie; + + // Add to context owner. + Unit->getDie()->AddChild(SubprogramDie); + + // Expose as global. + std::string Name; + Unit->AddGlobal(SP.getName(Name), SubprogramDie); + return true; +} + +/// ConstructSubprograms - Create DIEs for each of the externally visible +/// subprograms. Return true if at least one subprogram DIE is created. +bool DwarfDebug::ConstructSubprograms() { + GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.subprograms"); + if (!Root) + return false; + + assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() && + "Malformed subprogram descriptor anchor type"); + Constant *RootC = cast<Constant>(*Root->use_begin()); + assert(RootC->hasNUsesOrMore(1) && + "Malformed subprogram descriptor anchor type"); + + bool Result = false; + for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end(); + UI != UE; ++UI) + for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end(); + UUI != UUE; ++UUI) + Result |= ConstructSubprogram(cast<GlobalVariable>(*UUI)); + + return Result; +} + +/// SetDebugInfo - Create global DIEs and emit initial debug info sections. +/// This is inovked by the target AsmPrinter. +void DwarfDebug::SetDebugInfo(MachineModuleInfo *mmi) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + // Create all the compile unit DIEs. + ConstructCompileUnits(); + + if (CompileUnits.empty()) { + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return; + } + + // Create DIEs for each of the externally visible global variables. + bool globalDIEs = ConstructGlobalVariableDIEs(); + + // Create DIEs for each of the externally visible subprograms. + bool subprogramDIEs = ConstructSubprograms(); + + // If there is not any debug info available for any global variables and any + // subprograms then there is not any debug info to emit. + if (!globalDIEs && !subprogramDIEs) { + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return; + } + + MMI = mmi; + shouldEmit = true; + MMI->setDebugInfoAvailability(true); + + // Prime section data. + SectionMap.insert(TAI->getTextSection()); + + // Print out .file directives to specify files for .loc directives. These are + // printed out early so that they precede any .loc directives. + if (TAI->hasDotLocAndDotFile()) { + for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) { + // Remember source id starts at 1. + std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i); + sys::Path FullPath(getSourceDirectoryName(Id.first)); + bool AppendOk = + FullPath.appendComponent(getSourceFileName(Id.second)); + assert(AppendOk && "Could not append filename to directory!"); + AppendOk = false; + Asm->EmitFile(i, FullPath.toString()); + Asm->EOL(); + } + } + + // Emit initial sections + EmitInitial(); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +/// EndModule - Emit all Dwarf sections that should come after the content. +/// +void DwarfDebug::EndModule() { + if (!ShouldEmitDwarfDebug()) + return; + + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + // Standard sections final addresses. + Asm->SwitchToSection(TAI->getTextSection()); + EmitLabel("text_end", 0); + Asm->SwitchToSection(TAI->getDataSection()); + EmitLabel("data_end", 0); + + // End text sections. + for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) { + Asm->SwitchToSection(SectionMap[i]); + EmitLabel("section_end", i); + } + + // Emit common frame information. + EmitCommonDebugFrame(); + + // Emit function debug frame information + for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(), + E = DebugFrames.end(); I != E; ++I) + EmitFunctionDebugFrame(*I); + + // Compute DIE offsets and sizes. + SizeAndOffsets(); + + // Emit all the DIEs into a debug info section + EmitDebugInfo(); + + // Corresponding abbreviations into a abbrev section. + EmitAbbreviations(); + + // Emit source line correspondence into a debug line section. + EmitDebugLines(); + + // Emit info into a debug pubnames section. + EmitDebugPubNames(); + + // Emit info into a debug str section. + EmitDebugStr(); + + // Emit info into a debug loc section. + EmitDebugLoc(); + + // Emit info into a debug aranges section. + EmitDebugARanges(); + + // Emit info into a debug ranges section. + EmitDebugRanges(); + + // Emit info into a debug macinfo section. + EmitDebugMacInfo(); + + // Emit inline info. + EmitDebugInlineInfo(); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +/// BeginFunction - Gather pre-function debug information. Assumes being +/// emitted immediately after the function entry point. +void DwarfDebug::BeginFunction(MachineFunction *MF) { + this->MF = MF; + + if (!ShouldEmitDwarfDebug()) return; + + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + // Begin accumulating function debug information. + MMI->BeginFunction(MF); + + // Assumes in correct section after the entry point. + EmitLabel("func_begin", ++SubprogramCount); + + // Emit label for the implicitly defined dbg.stoppoint at the start of the + // function. + DebugLoc FDL = MF->getDefaultDebugLoc(); + if (!FDL.isUnknown()) { + DebugLocTuple DLT = MF->getDebugLocTuple(FDL); + unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, + DICompileUnit(DLT.CompileUnit)); + Asm->printLabel(LabelID); + } + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +/// EndFunction - Gather and emit post-function debug information. +/// +void DwarfDebug::EndFunction(MachineFunction *MF) { + if (!ShouldEmitDwarfDebug()) return; + + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + // Define end label for subprogram. + EmitLabel("func_end", SubprogramCount); + + // Get function line info. + if (!Lines.empty()) { + // Get section line info. + unsigned ID = SectionMap.insert(Asm->CurrentSection_); + if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID); + std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1]; + // Append the function info to section info. + SectionLineInfos.insert(SectionLineInfos.end(), + Lines.begin(), Lines.end()); + } + + // Construct the DbgScope for abstract instances. + for (SmallVector<DbgScope *, 32>::iterator + I = AbstractInstanceRootList.begin(), + E = AbstractInstanceRootList.end(); I != E; ++I) + ConstructFunctionDbgScope(*I); + + // Construct scopes for subprogram. + if (FunctionDbgScope) + ConstructFunctionDbgScope(FunctionDbgScope); + else + // FIXME: This is wrong. We are essentially getting past a problem with + // debug information not being able to handle unreachable blocks that have + // debug information in them. In particular, those unreachable blocks that + // have "region end" info in them. That situation results in the "root + // scope" not being created. If that's the case, then emit a "default" + // scope, i.e., one that encompasses the whole function. This isn't + // desirable. And a better way of handling this (and all of the debugging + // information) needs to be explored. + ConstructDefaultDbgScope(MF); + + DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount, + MMI->getFrameMoves())); + + // Clear debug info + if (FunctionDbgScope) { + delete FunctionDbgScope; + DbgScopeMap.clear(); + DbgAbstractScopeMap.clear(); + DbgConcreteScopeMap.clear(); + InlinedVariableScopes.clear(); + FunctionDbgScope = NULL; + LexicalScopeStack.clear(); + AbstractInstanceRootList.clear(); + } + + Lines.clear(); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +/// RecordSourceLine - Records location information and associates it with a +/// label. Returns a unique label ID used to generate a label and provide +/// correspondence to the source line list. +unsigned DwarfDebug::RecordSourceLine(Value *V, unsigned Line, unsigned Col) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + CompileUnit *Unit = CompileUnitMap[V]; + assert(Unit && "Unable to find CompileUnit"); + unsigned ID = MMI->NextLabelID(); + Lines.push_back(SrcLineInfo(Line, Col, Unit->getID(), ID)); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return ID; +} + +/// RecordSourceLine - Records location information and associates it with a +/// label. Returns a unique label ID used to generate a label and provide +/// correspondence to the source line list. +unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, + DICompileUnit CU) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + std::string Dir, Fn; + unsigned Src = GetOrCreateSourceID(CU.getDirectory(Dir), + CU.getFilename(Fn)); + unsigned ID = MMI->NextLabelID(); + Lines.push_back(SrcLineInfo(Line, Col, Src, ID)); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return ID; +} + +/// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be +/// timed. Look up the source id with the given directory and source file +/// names. If none currently exists, create a new id and insert it in the +/// SourceIds map. This can update DirectoryNames and SourceFileNames maps as +/// well. +unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, + const std::string &FileName) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + unsigned SrcId = GetOrCreateSourceID(DirName, FileName); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return SrcId; +} + +/// RecordRegionStart - Indicate the start of a region. +unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + DbgScope *Scope = getOrCreateScope(V); + unsigned ID = MMI->NextLabelID(); + if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID); + LexicalScopeStack.push_back(Scope); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return ID; +} + +/// RecordRegionEnd - Indicate the end of a region. +unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + DbgScope *Scope = getOrCreateScope(V); + unsigned ID = MMI->NextLabelID(); + Scope->setEndLabelID(ID); + if (LexicalScopeStack.size() != 0) + LexicalScopeStack.pop_back(); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return ID; +} + +/// RecordVariable - Indicate the declaration of a local variable. +void DwarfDebug::RecordVariable(GlobalVariable *GV, unsigned FrameIndex, + const MachineInstr *MI) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + DIDescriptor Desc(GV); + DbgScope *Scope = NULL; + bool InlinedFnVar = false; + + if (Desc.getTag() == dwarf::DW_TAG_variable) { + // GV is a global variable. + DIGlobalVariable DG(GV); + Scope = getOrCreateScope(DG.getContext().getGV()); + } else { + DenseMap<const MachineInstr *, DbgScope *>::iterator + SI = InlinedVariableScopes.find(MI); + + if (SI != InlinedVariableScopes.end()) { + // or GV is an inlined local variable. + Scope = SI->second; + } else { + DIVariable DV(GV); + GlobalVariable *V = DV.getContext().getGV(); + + // FIXME: The code that checks for the inlined local variable is a hack! + DenseMap<const GlobalVariable *, DbgScope *>::iterator + AI = AbstractInstanceRootMap.find(V); + + if (AI != AbstractInstanceRootMap.end()) { + // This method is called each time a DECLARE node is encountered. For an + // inlined function, this could be many, many times. We don't want to + // re-add variables to that DIE for each time. We just want to add them + // once. Check to make sure that we haven't added them already. + DenseMap<const GlobalVariable *, + SmallSet<const GlobalVariable *, 32> >::iterator + IP = InlinedParamMap.find(V); + + if (IP != InlinedParamMap.end() && IP->second.count(GV) > 0) { + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + return; + } + + // or GV is an inlined local variable. + Scope = AI->second; + InlinedParamMap[V].insert(GV); + InlinedFnVar = true; + } else { + // or GV is a local variable. + Scope = getOrCreateScope(V); + } + } + } + + assert(Scope && "Unable to find the variable's scope"); + DbgVariable *DV = new DbgVariable(DIVariable(GV), FrameIndex, InlinedFnVar); + Scope->AddVariable(DV); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +//// RecordInlinedFnStart - Indicate the start of inlined subroutine. +unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, + unsigned Line, unsigned Col) { + unsigned LabelID = MMI->NextLabelID(); + + if (!TAI->doesDwarfUsesInlineInfoSection()) + return LabelID; + + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + GlobalVariable *GV = SP.getGV(); + DenseMap<const GlobalVariable *, DbgScope *>::iterator + II = AbstractInstanceRootMap.find(GV); + + if (II == AbstractInstanceRootMap.end()) { + // Create an abstract instance entry for this inlined function if it doesn't + // already exist. + DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV)); + + // Get the compile unit context. + CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit()); + DIE *SPDie = Unit->getDieMapSlotFor(GV); + if (!SPDie) + SPDie = CreateSubprogramDIE(Unit, SP, false, true); + + // Mark as being inlined. This makes this subprogram entry an abstract + // instance root. + // FIXME: Our debugger doesn't care about the value of DW_AT_inline, only + // that it's defined. That probably won't change in the future. However, + // this could be more elegant. + AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined); + + // Keep track of the abstract scope for this function. + DbgAbstractScopeMap[GV] = Scope; + + AbstractInstanceRootMap[GV] = Scope; + AbstractInstanceRootList.push_back(Scope); + } + + // Create a concrete inlined instance for this inlined function. + DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV)); + DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine); + CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit()); + ScopeDie->setAbstractCompileUnit(Unit); + + DIE *Origin = Unit->getDieMapSlotFor(GV); + AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, Origin); + AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, Unit->getID()); + AddUInt(ScopeDie, dwarf::DW_AT_call_line, 0, Line); + AddUInt(ScopeDie, dwarf::DW_AT_call_column, 0, Col); + + ConcreteScope->setDie(ScopeDie); + ConcreteScope->setStartLabelID(LabelID); + MMI->RecordUsedDbgLabel(LabelID); + + LexicalScopeStack.back()->AddConcreteInst(ConcreteScope); + + // Keep track of the concrete scope that's inlined into this function. + DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator + SI = DbgConcreteScopeMap.find(GV); + + if (SI == DbgConcreteScopeMap.end()) + DbgConcreteScopeMap[GV].push_back(ConcreteScope); + else + SI->second.push_back(ConcreteScope); + + // Track the start label for this inlined function. + DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator + I = InlineInfo.find(GV); + + if (I == InlineInfo.end()) + InlineInfo[GV].push_back(LabelID); + else + I->second.push_back(LabelID); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return LabelID; +} + +/// RecordInlinedFnEnd - Indicate the end of inlined subroutine. +unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) { + if (!TAI->doesDwarfUsesInlineInfoSection()) + return 0; + + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + GlobalVariable *GV = SP.getGV(); + DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator + I = DbgConcreteScopeMap.find(GV); + + if (I == DbgConcreteScopeMap.end()) { + // FIXME: Can this situation actually happen? And if so, should it? + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return 0; + } + + SmallVector<DbgScope *, 8> &Scopes = I->second; + assert(!Scopes.empty() && "We should have at least one debug scope!"); + DbgScope *Scope = Scopes.back(); Scopes.pop_back(); + unsigned ID = MMI->NextLabelID(); + MMI->RecordUsedDbgLabel(ID); + Scope->setEndLabelID(ID); + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return ID; +} + +/// RecordVariableScope - Record scope for the variable declared by +/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes +/// for only inlined subroutine variables. Other variables's scopes are +/// determined during RecordVariable(). +void DwarfDebug::RecordVariableScope(DIVariable &DV, + const MachineInstr *DeclareMI) { + if (TimePassesIsEnabled) + DebugTimer->startTimer(); + + DISubprogram SP(DV.getContext().getGV()); + + if (SP.isNull()) { + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); + + return; + } + + DenseMap<GlobalVariable *, DbgScope *>::iterator + I = DbgAbstractScopeMap.find(SP.getGV()); + if (I != DbgAbstractScopeMap.end()) + InlinedVariableScopes[DeclareMI] = I->second; + + if (TimePassesIsEnabled) + DebugTimer->stopTimer(); +} + +//===----------------------------------------------------------------------===// +// Emit Methods +//===----------------------------------------------------------------------===// + +/// SizeAndOffsetDie - Compute the size and offset of a DIE. +/// +unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) { + // Get the children. + const std::vector<DIE *> &Children = Die->getChildren(); + + // If not last sibling and has children then add sibling offset attribute. + if (!Last && !Children.empty()) Die->AddSiblingOffset(); + + // Record the abbreviation. + AssignAbbrevNumber(Die->getAbbrev()); + + // Get the abbreviation for this DIE. + unsigned AbbrevNumber = Die->getAbbrevNumber(); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + + // Set DIE offset + Die->setOffset(Offset); + + // Start the size with the size of abbreviation code. + Offset += TargetAsmInfo::getULEB128Size(AbbrevNumber); + + const SmallVector<DIEValue*, 32> &Values = Die->getValues(); + const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData(); + + // Size the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) + // Size attribute value. + Offset += Values[i]->SizeOf(TD, AbbrevData[i].getForm()); + + // Size the DIE children if any. + if (!Children.empty()) { + assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes && + "Children flag not set"); + + for (unsigned j = 0, M = Children.size(); j < M; ++j) + Offset = SizeAndOffsetDie(Children[j], Offset, (j + 1) == M); + + // End of children marker. + Offset += sizeof(int8_t); + } + + Die->setSize(Offset - Die->getOffset()); + return Offset; +} + +/// SizeAndOffsets - Compute the size and offset of all the DIEs. +/// +void DwarfDebug::SizeAndOffsets() { + // Compute size of compile unit header. + static unsigned Offset = + sizeof(int32_t) + // Length of Compilation Unit Info + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + + // Process base compile unit. + if (MainCU) { + SizeAndOffsetDie(MainCU->getDie(), Offset, true); + CompileUnitOffsets[MainCU] = 0; + return; + } + + // Process all compile units. + unsigned PrevOffset = 0; + + for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) { + CompileUnit *Unit = CompileUnits[i]; + CompileUnitOffsets[Unit] = PrevOffset; + PrevOffset += SizeAndOffsetDie(Unit->getDie(), Offset, true) + + sizeof(int32_t); // FIXME - extra pad for gdb bug. + } +} + +/// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc +/// tools to recognize the object file contains Dwarf information. +void DwarfDebug::EmitInitial() { + // Check to see if we already emitted intial headers. + if (didInitial) return; + didInitial = true; + + // Dwarf sections base addresses. + if (TAI->doesDwarfRequireFrameSection()) { + Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + EmitLabel("section_debug_frame", 0); + } + + Asm->SwitchToDataSection(TAI->getDwarfInfoSection()); + EmitLabel("section_info", 0); + Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection()); + EmitLabel("section_abbrev", 0); + Asm->SwitchToDataSection(TAI->getDwarfARangesSection()); + EmitLabel("section_aranges", 0); + + if (TAI->doesSupportMacInfoSection()) { + Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection()); + EmitLabel("section_macinfo", 0); + } + + Asm->SwitchToDataSection(TAI->getDwarfLineSection()); + EmitLabel("section_line", 0); + Asm->SwitchToDataSection(TAI->getDwarfLocSection()); + EmitLabel("section_loc", 0); + Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection()); + EmitLabel("section_pubnames", 0); + Asm->SwitchToDataSection(TAI->getDwarfStrSection()); + EmitLabel("section_str", 0); + Asm->SwitchToDataSection(TAI->getDwarfRangesSection()); + EmitLabel("section_ranges", 0); + + Asm->SwitchToSection(TAI->getTextSection()); + EmitLabel("text_begin", 0); + Asm->SwitchToSection(TAI->getDataSection()); + EmitLabel("data_begin", 0); +} + +/// EmitDIE - Recusively Emits a debug information entry. +/// +void DwarfDebug::EmitDIE(DIE *Die) { + // Get the abbreviation for this DIE. + unsigned AbbrevNumber = Die->getAbbrevNumber(); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + + Asm->EOL(); + + // Emit the code (index) for the abbreviation. + Asm->EmitULEB128Bytes(AbbrevNumber); + + if (Asm->isVerbose()) + Asm->EOL(std::string("Abbrev [" + + utostr(AbbrevNumber) + + "] 0x" + utohexstr(Die->getOffset()) + + ":0x" + utohexstr(Die->getSize()) + " " + + dwarf::TagString(Abbrev->getTag()))); + else + Asm->EOL(); + + SmallVector<DIEValue*, 32> &Values = Die->getValues(); + const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData(); + + // Emit the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + unsigned Attr = AbbrevData[i].getAttribute(); + unsigned Form = AbbrevData[i].getForm(); + assert(Form && "Too many attributes for DIE (check abbreviation)"); + + switch (Attr) { + case dwarf::DW_AT_sibling: + Asm->EmitInt32(Die->SiblingOffset()); + break; + case dwarf::DW_AT_abstract_origin: { + DIEEntry *E = cast<DIEEntry>(Values[i]); + DIE *Origin = E->getEntry(); + unsigned Addr = + CompileUnitOffsets[Die->getAbstractCompileUnit()] + + Origin->getOffset(); + + Asm->EmitInt32(Addr); + break; + } + default: + // Emit an attribute using the defined form. + Values[i]->EmitValue(this, Form); + break; + } + + Asm->EOL(dwarf::AttributeString(Attr)); + } + + // Emit the DIE children if any. + if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) { + const std::vector<DIE *> &Children = Die->getChildren(); + + for (unsigned j = 0, M = Children.size(); j < M; ++j) + EmitDIE(Children[j]); + + Asm->EmitInt8(0); Asm->EOL("End Of Children Mark"); + } +} + +/// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section. +/// +void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) { + DIE *Die = Unit->getDie(); + + // Emit the compile units header. + EmitLabel("info_begin", Unit->getID()); + + // Emit size of content not including length itself + unsigned ContentSize = Die->getSize() + + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int32_t); // FIXME - extra pad for gdb bug. + + Asm->EmitInt32(ContentSize); Asm->EOL("Length of Compilation Unit Info"); + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF version number"); + EmitSectionOffset("abbrev_begin", "section_abbrev", 0, 0, true, false); + Asm->EOL("Offset Into Abbrev. Section"); + Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)"); + + EmitDIE(Die); + // FIXME - extra padding for gdb bug. + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + EmitLabel("info_end", Unit->getID()); + + Asm->EOL(); +} + +void DwarfDebug::EmitDebugInfo() { + // Start debug info section. + Asm->SwitchToDataSection(TAI->getDwarfInfoSection()); + + if (MainCU) { + EmitDebugInfoPerCU(MainCU); + return; + } + + for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) + EmitDebugInfoPerCU(CompileUnits[i]); +} + +/// EmitAbbreviations - Emit the abbreviation section. +/// +void DwarfDebug::EmitAbbreviations() const { + // Check to see if it is worth the effort. + if (!Abbreviations.empty()) { + // Start the debug abbrev section. + Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection()); + + EmitLabel("abbrev_begin", 0); + + // For each abbrevation. + for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) { + // Get abbreviation data + const DIEAbbrev *Abbrev = Abbreviations[i]; + + // Emit the abbrevations code (base 1 index.) + Asm->EmitULEB128Bytes(Abbrev->getNumber()); + Asm->EOL("Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev->Emit(Asm); + + Asm->EOL(); + } + + // Mark end of abbreviations. + Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(3)"); + + EmitLabel("abbrev_end", 0); + Asm->EOL(); + } +} + +/// EmitEndOfLineMatrix - Emit the last address of the section and the end of +/// the line matrix. +/// +void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) { + // Define last address of section. + Asm->EmitInt8(0); Asm->EOL("Extended Op"); + Asm->EmitInt8(TD->getPointerSize() + 1); Asm->EOL("Op size"); + Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->EOL("DW_LNE_set_address"); + EmitReference("section_end", SectionEnd); Asm->EOL("Section end label"); + + // Mark end of matrix. + Asm->EmitInt8(0); Asm->EOL("DW_LNE_end_sequence"); + Asm->EmitULEB128Bytes(1); Asm->EOL(); + Asm->EmitInt8(1); Asm->EOL(); +} + +/// EmitDebugLines - Emit source line information. +/// +void DwarfDebug::EmitDebugLines() { + // If the target is using .loc/.file, the assembler will be emitting the + // .debug_line table automatically. + if (TAI->hasDotLocAndDotFile()) + return; + + // Minimum line delta, thus ranging from -10..(255-10). + const int MinLineDelta = -(dwarf::DW_LNS_fixed_advance_pc + 1); + // Maximum line delta, thus ranging from -10..(255-10). + const int MaxLineDelta = 255 + MinLineDelta; + + // Start the dwarf line section. + Asm->SwitchToDataSection(TAI->getDwarfLineSection()); + + // Construct the section header. + EmitDifference("line_end", 0, "line_begin", 0, true); + Asm->EOL("Length of Source Line Info"); + EmitLabel("line_begin", 0); + + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF version number"); + + EmitDifference("line_prolog_end", 0, "line_prolog_begin", 0, true); + Asm->EOL("Prolog Length"); + EmitLabel("line_prolog_begin", 0); + + Asm->EmitInt8(1); Asm->EOL("Minimum Instruction Length"); + + Asm->EmitInt8(1); Asm->EOL("Default is_stmt_start flag"); + + Asm->EmitInt8(MinLineDelta); Asm->EOL("Line Base Value (Special Opcodes)"); + + Asm->EmitInt8(MaxLineDelta); Asm->EOL("Line Range Value (Special Opcodes)"); + + Asm->EmitInt8(-MinLineDelta); Asm->EOL("Special Opcode Base"); + + // Line number standard opcode encodings argument count + Asm->EmitInt8(0); Asm->EOL("DW_LNS_copy arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_pc arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_line arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_file arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_column arg count"); + Asm->EmitInt8(0); Asm->EOL("DW_LNS_negate_stmt arg count"); + Asm->EmitInt8(0); Asm->EOL("DW_LNS_set_basic_block arg count"); + Asm->EmitInt8(0); Asm->EOL("DW_LNS_const_add_pc arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_fixed_advance_pc arg count"); + + // Emit directories. + for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) { + Asm->EmitString(getSourceDirectoryName(DI)); + Asm->EOL("Directory"); + } + + Asm->EmitInt8(0); Asm->EOL("End of directories"); + + // Emit files. + for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) { + // Remember source id starts at 1. + std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(SI); + Asm->EmitString(getSourceFileName(Id.second)); + Asm->EOL("Source"); + Asm->EmitULEB128Bytes(Id.first); + Asm->EOL("Directory #"); + Asm->EmitULEB128Bytes(0); + Asm->EOL("Mod date"); + Asm->EmitULEB128Bytes(0); + Asm->EOL("File size"); + } + + Asm->EmitInt8(0); Asm->EOL("End of files"); + + EmitLabel("line_prolog_end", 0); + + // A sequence for each text section. + unsigned SecSrcLinesSize = SectionSourceLines.size(); + + for (unsigned j = 0; j < SecSrcLinesSize; ++j) { + // Isolate current sections line info. + const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j]; + + if (Asm->isVerbose()) { + const Section* S = SectionMap[j + 1]; + O << '\t' << TAI->getCommentString() << " Section" + << S->getName() << '\n'; + } else { + Asm->EOL(); + } + + // Dwarf assumes we start with first line of first source file. + unsigned Source = 1; + unsigned Line = 1; + + // Construct rows of the address, source, line, column matrix. + for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) { + const SrcLineInfo &LineInfo = LineInfos[i]; + unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID()); + if (!LabelID) continue; + + if (!Asm->isVerbose()) + Asm->EOL(); + else { + std::pair<unsigned, unsigned> SourceID = + getSourceDirectoryAndFileIds(LineInfo.getSourceID()); + O << '\t' << TAI->getCommentString() << ' ' + << getSourceDirectoryName(SourceID.first) << ' ' + << getSourceFileName(SourceID.second) + <<" :" << utostr_32(LineInfo.getLine()) << '\n'; + } + + // Define the line address. + Asm->EmitInt8(0); Asm->EOL("Extended Op"); + Asm->EmitInt8(TD->getPointerSize() + 1); Asm->EOL("Op size"); + Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->EOL("DW_LNE_set_address"); + EmitReference("label", LabelID); Asm->EOL("Location label"); + + // If change of source, then switch to the new source. + if (Source != LineInfo.getSourceID()) { + Source = LineInfo.getSourceID(); + Asm->EmitInt8(dwarf::DW_LNS_set_file); Asm->EOL("DW_LNS_set_file"); + Asm->EmitULEB128Bytes(Source); Asm->EOL("New Source"); + } + + // If change of line. + if (Line != LineInfo.getLine()) { + // Determine offset. + int Offset = LineInfo.getLine() - Line; + int Delta = Offset - MinLineDelta; + + // Update line. + Line = LineInfo.getLine(); + + // If delta is small enough and in range... + if (Delta >= 0 && Delta < (MaxLineDelta - 1)) { + // ... then use fast opcode. + Asm->EmitInt8(Delta - MinLineDelta); Asm->EOL("Line Delta"); + } else { + // ... otherwise use long hand. + Asm->EmitInt8(dwarf::DW_LNS_advance_line); + Asm->EOL("DW_LNS_advance_line"); + Asm->EmitSLEB128Bytes(Offset); Asm->EOL("Line Offset"); + Asm->EmitInt8(dwarf::DW_LNS_copy); Asm->EOL("DW_LNS_copy"); + } + } else { + // Copy the previous row (different address or source) + Asm->EmitInt8(dwarf::DW_LNS_copy); Asm->EOL("DW_LNS_copy"); + } + } + + EmitEndOfLineMatrix(j + 1); + } + + if (SecSrcLinesSize == 0) + // Because we're emitting a debug_line section, we still need a line + // table. The linker and friends expect it to exist. If there's nothing to + // put into it, emit an empty table. + EmitEndOfLineMatrix(1); + + EmitLabel("line_end", 0); + Asm->EOL(); +} + +/// EmitCommonDebugFrame - Emit common frame info into a debug frame section. +/// +void DwarfDebug::EmitCommonDebugFrame() { + if (!TAI->doesDwarfRequireFrameSection()) + return; + + int stackGrowth = + Asm->TM.getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TD->getPointerSize() : -TD->getPointerSize(); + + // Start the dwarf frame section. + Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + + EmitLabel("debug_frame_common", 0); + EmitDifference("debug_frame_common_end", 0, + "debug_frame_common_begin", 0, true); + Asm->EOL("Length of Common Information Entry"); + + EmitLabel("debug_frame_common_begin", 0); + Asm->EmitInt32((int)dwarf::DW_CIE_ID); + Asm->EOL("CIE Identifier Tag"); + Asm->EmitInt8(dwarf::DW_CIE_VERSION); + Asm->EOL("CIE Version"); + Asm->EmitString(""); + Asm->EOL("CIE Augmentation"); + Asm->EmitULEB128Bytes(1); + Asm->EOL("CIE Code Alignment Factor"); + Asm->EmitSLEB128Bytes(stackGrowth); + Asm->EOL("CIE Data Alignment Factor"); + Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false)); + Asm->EOL("CIE RA Column"); + + std::vector<MachineMove> Moves; + RI->getInitialFrameState(Moves); + + EmitFrameMoves(NULL, 0, Moves, false); + + Asm->EmitAlignment(2, 0, 0, false); + EmitLabel("debug_frame_common_end", 0); + + Asm->EOL(); +} + +/// EmitFunctionDebugFrame - Emit per function frame info into a debug frame +/// section. +void +DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){ + if (!TAI->doesDwarfRequireFrameSection()) + return; + + // Start the dwarf frame section. + Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + + EmitDifference("debug_frame_end", DebugFrameInfo.Number, + "debug_frame_begin", DebugFrameInfo.Number, true); + Asm->EOL("Length of Frame Information Entry"); + + EmitLabel("debug_frame_begin", DebugFrameInfo.Number); + + EmitSectionOffset("debug_frame_common", "section_debug_frame", + 0, 0, true, false); + Asm->EOL("FDE CIE offset"); + + EmitReference("func_begin", DebugFrameInfo.Number); + Asm->EOL("FDE initial location"); + EmitDifference("func_end", DebugFrameInfo.Number, + "func_begin", DebugFrameInfo.Number); + Asm->EOL("FDE address range"); + + EmitFrameMoves("func_begin", DebugFrameInfo.Number, DebugFrameInfo.Moves, + false); + + Asm->EmitAlignment(2, 0, 0, false); + EmitLabel("debug_frame_end", DebugFrameInfo.Number); + + Asm->EOL(); +} + +void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) { + EmitDifference("pubnames_end", Unit->getID(), + "pubnames_begin", Unit->getID(), true); + Asm->EOL("Length of Public Names Info"); + + EmitLabel("pubnames_begin", Unit->getID()); + + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF Version"); + + EmitSectionOffset("info_begin", "section_info", + Unit->getID(), 0, true, false); + Asm->EOL("Offset of Compilation Unit Info"); + + EmitDifference("info_end", Unit->getID(), "info_begin", Unit->getID(), + true); + Asm->EOL("Compilation Unit Length"); + + StringMap<DIE*> &Globals = Unit->getGlobals(); + for (StringMap<DIE*>::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE * Entity = GI->second; + + Asm->EmitInt32(Entity->getOffset()); Asm->EOL("DIE offset"); + Asm->EmitString(Name, strlen(Name)); Asm->EOL("External Name"); + } + + Asm->EmitInt32(0); Asm->EOL("End Mark"); + EmitLabel("pubnames_end", Unit->getID()); + + Asm->EOL(); +} + +/// EmitDebugPubNames - Emit visible names into a debug pubnames section. +/// +void DwarfDebug::EmitDebugPubNames() { + // Start the dwarf pubnames section. + Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection()); + + if (MainCU) { + EmitDebugPubNamesPerCU(MainCU); + return; + } + + for (unsigned i = 0, e = CompileUnits.size(); i != e; ++i) + EmitDebugPubNamesPerCU(CompileUnits[i]); +} + +/// EmitDebugStr - Emit visible names into a debug str section. +/// +void DwarfDebug::EmitDebugStr() { + // Check to see if it is worth the effort. + if (!StringPool.empty()) { + // Start the dwarf str section. + Asm->SwitchToDataSection(TAI->getDwarfStrSection()); + + // For each of strings in the string pool. + for (unsigned StringID = 1, N = StringPool.size(); + StringID <= N; ++StringID) { + // Emit a label for reference from debug information entries. + EmitLabel("string", StringID); + + // Emit the string itself. + const std::string &String = StringPool[StringID]; + Asm->EmitString(String); Asm->EOL(); + } + + Asm->EOL(); + } +} + +/// EmitDebugLoc - Emit visible names into a debug loc section. +/// +void DwarfDebug::EmitDebugLoc() { + // Start the dwarf loc section. + Asm->SwitchToDataSection(TAI->getDwarfLocSection()); + Asm->EOL(); +} + +/// EmitDebugARanges - Emit visible names into a debug aranges section. +/// +void DwarfDebug::EmitDebugARanges() { + // Start the dwarf aranges section. + Asm->SwitchToDataSection(TAI->getDwarfARangesSection()); + + // FIXME - Mock up +#if 0 + CompileUnit *Unit = GetBaseCompileUnit(); + + // Don't include size of length + Asm->EmitInt32(0x1c); Asm->EOL("Length of Address Ranges Info"); + + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version"); + + EmitReference("info_begin", Unit->getID()); + Asm->EOL("Offset of Compilation Unit Info"); + + Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Size of Address"); + + Asm->EmitInt8(0); Asm->EOL("Size of Segment Descriptor"); + + Asm->EmitInt16(0); Asm->EOL("Pad (1)"); + Asm->EmitInt16(0); Asm->EOL("Pad (2)"); + + // Range 1 + EmitReference("text_begin", 0); Asm->EOL("Address"); + EmitDifference("text_end", 0, "text_begin", 0, true); Asm->EOL("Length"); + + Asm->EmitInt32(0); Asm->EOL("EOM (1)"); + Asm->EmitInt32(0); Asm->EOL("EOM (2)"); +#endif + + Asm->EOL(); +} + +/// EmitDebugRanges - Emit visible names into a debug ranges section. +/// +void DwarfDebug::EmitDebugRanges() { + // Start the dwarf ranges section. + Asm->SwitchToDataSection(TAI->getDwarfRangesSection()); + Asm->EOL(); +} + +/// EmitDebugMacInfo - Emit visible names into a debug macinfo section. +/// +void DwarfDebug::EmitDebugMacInfo() { + if (TAI->doesSupportMacInfoSection()) { + // Start the dwarf macinfo section. + Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection()); + Asm->EOL(); + } +} + +/// EmitDebugInlineInfo - Emit inline info using following format. +/// Section Header: +/// 1. length of section +/// 2. Dwarf version number +/// 3. address size. +/// +/// Entries (one "entry" for each function that was inlined): +/// +/// 1. offset into __debug_str section for MIPS linkage name, if exists; +/// otherwise offset into __debug_str for regular function name. +/// 2. offset into __debug_str section for regular function name. +/// 3. an unsigned LEB128 number indicating the number of distinct inlining +/// instances for the function. +/// +/// The rest of the entry consists of a {die_offset, low_pc} pair for each +/// inlined instance; the die_offset points to the inlined_subroutine die in the +/// __debug_info section, and the low_pc is the starting address for the +/// inlining instance. +void DwarfDebug::EmitDebugInlineInfo() { + if (!TAI->doesDwarfUsesInlineInfoSection()) + return; + + if (!MainCU) + return; + + Asm->SwitchToDataSection(TAI->getDwarfDebugInlineSection()); + Asm->EOL(); + EmitDifference("debug_inlined_end", 1, + "debug_inlined_begin", 1, true); + Asm->EOL("Length of Debug Inlined Information Entry"); + + EmitLabel("debug_inlined_begin", 1); + + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version"); + Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)"); + + for (DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator + I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) { + GlobalVariable *GV = I->first; + SmallVector<unsigned, 4> &Labels = I->second; + DISubprogram SP(GV); + std::string Name; + std::string LName; + + SP.getLinkageName(LName); + SP.getName(Name); + + Asm->EmitString(LName.empty() ? Name : LName); + Asm->EOL("MIPS linkage name"); + + Asm->EmitString(Name); Asm->EOL("Function name"); + + Asm->EmitULEB128Bytes(Labels.size()); Asm->EOL("Inline count"); + + for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(), + LE = Labels.end(); LI != LE; ++LI) { + DIE *SP = MainCU->getDieMapSlotFor(GV); + Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset"); + + if (TD->getPointerSize() == sizeof(int32_t)) + O << TAI->getData32bitsDirective(); + else + O << TAI->getData64bitsDirective(); + + PrintLabelName("label", *LI); Asm->EOL("low_pc"); + } + } + + EmitLabel("debug_inlined_end", 1); + Asm->EOL(); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h new file mode 100644 index 0000000..9824566 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -0,0 +1,561 @@ +//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ + +#include "DIE.h" +#include "DwarfPrinter.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/UniqueVector.h" +#include <string> + +namespace llvm { + +class CompileUnit; +class DbgVariable; +class DbgScope; +class DbgConcreteScope; +class MachineFrameInfo; +class MachineModuleInfo; +class TargetAsmInfo; +class Timer; + +//===----------------------------------------------------------------------===// +/// SrcLineInfo - This class is used to record source line correspondence. +/// +class VISIBILITY_HIDDEN SrcLineInfo { + unsigned Line; // Source line number. + unsigned Column; // Source column. + unsigned SourceID; // Source ID number. + unsigned LabelID; // Label in code ID number. +public: + SrcLineInfo(unsigned L, unsigned C, unsigned S, unsigned I) + : Line(L), Column(C), SourceID(S), LabelID(I) {} + + // Accessors + unsigned getLine() const { return Line; } + unsigned getColumn() const { return Column; } + unsigned getSourceID() const { return SourceID; } + unsigned getLabelID() const { return LabelID; } +}; + +class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { + //===--------------------------------------------------------------------===// + // Attributes used to construct specific Dwarf sections. + // + + /// CompileUnitMap - A map of global variables representing compile units to + /// compile units. + DenseMap<Value *, CompileUnit *> CompileUnitMap; + + /// CompileUnits - All the compile units in this module. + /// + SmallVector<CompileUnit *, 8> CompileUnits; + + /// MainCU - Some platform prefers one compile unit per .o file. In such + /// cases, all dies are inserted in MainCU. + CompileUnit *MainCU; + + /// AbbreviationsSet - Used to uniquely define abbreviations. + /// + FoldingSet<DIEAbbrev> AbbreviationsSet; + + /// Abbreviations - A list of all the unique abbreviations in use. + /// + std::vector<DIEAbbrev *> Abbreviations; + + /// DirectoryIdMap - Directory name to directory id map. + /// + StringMap<unsigned> DirectoryIdMap; + + /// DirectoryNames - A list of directory names. + SmallVector<std::string, 8> DirectoryNames; + + /// SourceFileIdMap - Source file name to source file id map. + /// + StringMap<unsigned> SourceFileIdMap; + + /// SourceFileNames - A list of source file names. + SmallVector<std::string, 8> SourceFileNames; + + /// SourceIdMap - Source id map, i.e. pair of directory id and source file + /// id mapped to a unique id. + DenseMap<std::pair<unsigned, unsigned>, unsigned> SourceIdMap; + + /// SourceIds - Reverse map from source id to directory id + file id pair. + /// + SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds; + + /// Lines - List of of source line correspondence. + std::vector<SrcLineInfo> Lines; + + /// ValuesSet - Used to uniquely define values. + /// + FoldingSet<DIEValue> ValuesSet; + + /// Values - A list of all the unique values in use. + /// + std::vector<DIEValue *> Values; + + /// StringPool - A UniqueVector of strings used by indirect references. + /// + UniqueVector<std::string> StringPool; + + /// SectionMap - Provides a unique id per text section. + /// + UniqueVector<const Section*> SectionMap; + + /// SectionSourceLines - Tracks line numbers per text section. + /// + std::vector<std::vector<SrcLineInfo> > SectionSourceLines; + + /// didInitial - Flag to indicate if initial emission has been done. + /// + bool didInitial; + + /// shouldEmit - Flag to indicate if debug information should be emitted. + /// + bool shouldEmit; + + // FunctionDbgScope - Top level scope for the current function. + // + DbgScope *FunctionDbgScope; + + /// DbgScopeMap - Tracks the scopes in the current function. + DenseMap<GlobalVariable *, DbgScope *> DbgScopeMap; + + /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current + /// function. + DenseMap<GlobalVariable *, DbgScope *> DbgAbstractScopeMap; + + /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current + /// function. + DenseMap<GlobalVariable *, + SmallVector<DbgScope *, 8> > DbgConcreteScopeMap; + + /// InlineInfo - Keep track of inlined functions and their location. This + /// information is used to populate debug_inlined section. + DenseMap<GlobalVariable *, SmallVector<unsigned, 4> > InlineInfo; + + /// InlinedVariableScopes - Scopes information for the inlined subroutine + /// variables. + DenseMap<const MachineInstr *, DbgScope *> InlinedVariableScopes; + + /// AbstractInstanceRootMap - Map of abstract instance roots of inlined + /// functions. These are subroutine entries that contain a DW_AT_inline + /// attribute. + DenseMap<const GlobalVariable *, DbgScope *> AbstractInstanceRootMap; + + /// InlinedParamMap - A map keeping track of which parameters are assigned to + /// which abstract instance. + DenseMap<const GlobalVariable *, + SmallSet<const GlobalVariable *, 32> > InlinedParamMap; + + /// AbstractInstanceRootList - List of abstract instance roots of inlined + /// functions. These are subroutine entries that contain a DW_AT_inline + /// attribute. + SmallVector<DbgScope *, 32> AbstractInstanceRootList; + + /// LexicalScopeStack - A stack of lexical scopes. The top one is the current + /// scope. + SmallVector<DbgScope *, 16> LexicalScopeStack; + + /// CompileUnitOffsets - A vector of the offsets of the compile units. This is + /// used when calculating the "origin" of a concrete instance of an inlined + /// function. + DenseMap<CompileUnit *, unsigned> CompileUnitOffsets; + + /// DebugTimer - Timer for the Dwarf debug writer. + Timer *DebugTimer; + + struct FunctionDebugFrameInfo { + unsigned Number; + std::vector<MachineMove> Moves; + + FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M) + : Number(Num), Moves(M) {} + }; + + std::vector<FunctionDebugFrameInfo> DebugFrames; + + /// getSourceDirectoryAndFileIds - Return the directory and file ids that + /// maps to the source id. Source id starts at 1. + std::pair<unsigned, unsigned> + getSourceDirectoryAndFileIds(unsigned SId) const { + return SourceIds[SId-1]; + } + + /// getNumSourceDirectories - Return the number of source directories in the + /// debug info. + unsigned getNumSourceDirectories() const { + return DirectoryNames.size(); + } + + /// getSourceDirectoryName - Return the name of the directory corresponding + /// to the id. + const std::string &getSourceDirectoryName(unsigned Id) const { + return DirectoryNames[Id - 1]; + } + + /// getSourceFileName - Return the name of the source file corresponding + /// to the id. + const std::string &getSourceFileName(unsigned Id) const { + return SourceFileNames[Id - 1]; + } + + /// getNumSourceIds - Return the number of unique source ids. + unsigned getNumSourceIds() const { + return SourceIds.size(); + } + + /// AssignAbbrevNumber - Define a unique number for the abbreviation. + /// + void AssignAbbrevNumber(DIEAbbrev &Abbrev); + + /// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *CreateDIEEntry(DIE *Entry = NULL); + + /// SetDIEEntry - Set a DIEEntry once the debug information entry is defined. + /// + void SetDIEEntry(DIEEntry *Value, DIE *Entry); + + /// AddUInt - Add an unsigned integer attribute data and value. + /// + void AddUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + + /// AddSInt - Add an signed integer attribute data and value. + /// + void AddSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + + /// AddString - Add a string attribute data and value. + /// + void AddString(DIE *Die, unsigned Attribute, unsigned Form, + const std::string &String); + + /// AddLabel - Add a Dwarf label attribute data and value. + /// + void AddLabel(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Label); + + /// AddObjectLabel - Add an non-Dwarf label attribute data and value. + /// + void AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form, + const std::string &Label); + + /// AddSectionOffset - Add a section offset label attribute data and value. + /// + void AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Label, const DWLabel &Section, + bool isEH = false, bool useSet = true); + + /// AddDelta - Add a label delta attribute data and value. + /// + void AddDelta(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Hi, const DWLabel &Lo); + + /// AddDIEEntry - Add a DIE attribute data and value. + /// + void AddDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) { + Die->AddValue(Attribute, Form, CreateDIEEntry(Entry)); + } + + /// AddBlock - Add block data. + /// + void AddBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + + /// AddSourceLine - Add location information to specified debug information + /// entry. + void AddSourceLine(DIE *Die, const DIVariable *V); + + /// AddSourceLine - Add location information to specified debug information + /// entry. + void AddSourceLine(DIE *Die, const DIGlobal *G); + + void AddSourceLine(DIE *Die, const DIType *Ty); + + /// AddAddress - Add an address attribute to a die based on the location + /// provided. + void AddAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// AddType - Add a new type attribute to the specified entity. + void AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty); + + /// ConstructTypeDIE - Construct basic type die from DIBasicType. + void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DIBasicType BTy); + + /// ConstructTypeDIE - Construct derived type die from DIDerivedType. + void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DIDerivedType DTy); + + /// ConstructTypeDIE - Construct type DIE from DICompositeType. + void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DICompositeType CTy); + + /// ConstructSubrangeDIE - Construct subrange DIE from DISubrange. + void ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); + + /// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType. + void ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + DICompositeType *CTy); + + /// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator. + DIE *ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy); + + /// CreateGlobalVariableDIE - Create new DIE using GV. + DIE *CreateGlobalVariableDIE(CompileUnit *DW_Unit, + const DIGlobalVariable &GV); + + /// CreateMemberDIE - Create new member DIE. + DIE *CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT); + + /// CreateSubprogramDIE - Create new DIE using SP. + DIE *CreateSubprogramDIE(CompileUnit *DW_Unit, + const DISubprogram &SP, + bool IsConstructor = false, + bool IsInlined = false); + + /// FindCompileUnit - Get the compile unit for the given descriptor. + /// + CompileUnit &FindCompileUnit(DICompileUnit Unit) const; + + /// CreateDbgScopeVariable - Create a new scope variable. + /// + DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit); + + /// getOrCreateScope - Returns the scope associated with the given descriptor. + /// + DbgScope *getOrCreateScope(GlobalVariable *V); + + /// ConstructDbgScope - Construct the components of a scope. + /// + void ConstructDbgScope(DbgScope *ParentScope, + unsigned ParentStartID, unsigned ParentEndID, + DIE *ParentDie, CompileUnit *Unit); + + /// ConstructFunctionDbgScope - Construct the scope for the subprogram. + /// + void ConstructFunctionDbgScope(DbgScope *RootScope, + bool AbstractScope = false); + + /// ConstructDefaultDbgScope - Construct a default scope for the subprogram. + /// + void ConstructDefaultDbgScope(MachineFunction *MF); + + /// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc + /// tools to recognize the object file contains Dwarf information. + void EmitInitial(); + + /// EmitDIE - Recusively Emits a debug information entry. + /// + void EmitDIE(DIE *Die); + + /// SizeAndOffsetDie - Compute the size and offset of a DIE. + /// + unsigned SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last); + + /// SizeAndOffsets - Compute the size and offset of all the DIEs. + /// + void SizeAndOffsets(); + + /// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section. + /// + void EmitDebugInfoPerCU(CompileUnit *Unit); + + void EmitDebugInfo(); + + /// EmitAbbreviations - Emit the abbreviation section. + /// + void EmitAbbreviations() const; + + /// EmitEndOfLineMatrix - Emit the last address of the section and the end of + /// the line matrix. + /// + void EmitEndOfLineMatrix(unsigned SectionEnd); + + /// EmitDebugLines - Emit source line information. + /// + void EmitDebugLines(); + + /// EmitCommonDebugFrame - Emit common frame info into a debug frame section. + /// + void EmitCommonDebugFrame(); + + /// EmitFunctionDebugFrame - Emit per function frame info into a debug frame + /// section. + void EmitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo); + + void EmitDebugPubNamesPerCU(CompileUnit *Unit); + + /// EmitDebugPubNames - Emit visible names into a debug pubnames section. + /// + void EmitDebugPubNames(); + + /// EmitDebugStr - Emit visible names into a debug str section. + /// + void EmitDebugStr(); + + /// EmitDebugLoc - Emit visible names into a debug loc section. + /// + void EmitDebugLoc(); + + /// EmitDebugARanges - Emit visible names into a debug aranges section. + /// + void EmitDebugARanges(); + + /// EmitDebugRanges - Emit visible names into a debug ranges section. + /// + void EmitDebugRanges(); + + /// EmitDebugMacInfo - Emit visible names into a debug macinfo section. + /// + void EmitDebugMacInfo(); + + /// EmitDebugInlineInfo - Emit inline info using following format. + /// Section Header: + /// 1. length of section + /// 2. Dwarf version number + /// 3. address size. + /// + /// Entries (one "entry" for each function that was inlined): + /// + /// 1. offset into __debug_str section for MIPS linkage name, if exists; + /// otherwise offset into __debug_str for regular function name. + /// 2. offset into __debug_str section for regular function name. + /// 3. an unsigned LEB128 number indicating the number of distinct inlining + /// instances for the function. + /// + /// The rest of the entry consists of a {die_offset, low_pc} pair for each + /// inlined instance; the die_offset points to the inlined_subroutine die in + /// the __debug_info section, and the low_pc is the starting address for the + /// inlining instance. + void EmitDebugInlineInfo(); + + /// GetOrCreateSourceID - Look up the source id with the given directory and + /// source file names. If none currently exists, create a new id and insert it + /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps + /// as well. + unsigned GetOrCreateSourceID(const std::string &DirName, + const std::string &FileName); + + void ConstructCompileUnit(GlobalVariable *GV); + + /// ConstructCompileUnits - Create a compile unit DIEs. + void ConstructCompileUnits(); + + bool ConstructGlobalVariableDIE(GlobalVariable *GV); + + /// ConstructGlobalVariableDIEs - Create DIEs for each of the externally + /// visible global variables. Return true if at least one global DIE is + /// created. + bool ConstructGlobalVariableDIEs(); + + bool ConstructSubprogram(GlobalVariable *GV); + + /// ConstructSubprograms - Create DIEs for each of the externally visible + /// subprograms. Return true if at least one subprogram DIE is created. + bool ConstructSubprograms(); +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T); + virtual ~DwarfDebug(); + + /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should + /// be emitted. + bool ShouldEmitDwarfDebug() const { return shouldEmit; } + + /// SetDebugInfo - Create global DIEs and emit initial debug info sections. + /// This is inovked by the target AsmPrinter. + void SetDebugInfo(MachineModuleInfo *mmi); + + /// BeginModule - Emit all Dwarf sections that should come prior to the + /// content. + void BeginModule(Module *M) { + this->M = M; + } + + /// EndModule - Emit all Dwarf sections that should come after the content. + /// + void EndModule(); + + /// BeginFunction - Gather pre-function debug information. Assumes being + /// emitted immediately after the function entry point. + void BeginFunction(MachineFunction *MF); + + /// EndFunction - Gather and emit post-function debug information. + /// + void EndFunction(MachineFunction *MF); + + /// RecordSourceLine - Records location information and associates it with a + /// label. Returns a unique label ID used to generate a label and provide + /// correspondence to the source line list. + unsigned RecordSourceLine(Value *V, unsigned Line, unsigned Col); + + /// RecordSourceLine - Records location information and associates it with a + /// label. Returns a unique label ID used to generate a label and provide + /// correspondence to the source line list. + unsigned RecordSourceLine(unsigned Line, unsigned Col, DICompileUnit CU); + + /// getRecordSourceLineCount - Return the number of source lines in the debug + /// info. + unsigned getRecordSourceLineCount() const { + return Lines.size(); + } + + /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be + /// timed. Look up the source id with the given directory and source file + /// names. If none currently exists, create a new id and insert it in the + /// SourceIds map. This can update DirectoryNames and SourceFileNames maps as + /// well. + unsigned getOrCreateSourceID(const std::string &DirName, + const std::string &FileName); + + /// RecordRegionStart - Indicate the start of a region. + unsigned RecordRegionStart(GlobalVariable *V); + + /// RecordRegionEnd - Indicate the end of a region. + unsigned RecordRegionEnd(GlobalVariable *V); + + /// RecordVariable - Indicate the declaration of a local variable. + void RecordVariable(GlobalVariable *GV, unsigned FrameIndex, + const MachineInstr *MI); + + //// RecordInlinedFnStart - Indicate the start of inlined subroutine. + unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, + unsigned Line, unsigned Col); + + /// RecordInlinedFnEnd - Indicate the end of inlined subroutine. + unsigned RecordInlinedFnEnd(DISubprogram &SP); + + /// RecordVariableScope - Record scope for the variable declared by + /// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes + /// for only inlined subroutine variables. Other variables's scopes are + /// determined during RecordVariable(). + void RecordVariableScope(DIVariable &DV, const MachineInstr *DeclareMI); +}; + +} // End of namespace llvm + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp new file mode 100644 index 0000000..37466ab --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -0,0 +1,706 @@ +//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +static TimerGroup &getDwarfTimerGroup() { + static TimerGroup DwarfTimerGroup("Dwarf Exception"); + return DwarfTimerGroup; +} + +DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A, + const TargetAsmInfo *T) + : Dwarf(OS, A, T, "eh"), shouldEmitTable(false), shouldEmitMoves(false), + shouldEmitTableModule(false), shouldEmitMovesModule(false), + ExceptionTimer(0) { + if (TimePassesIsEnabled) + ExceptionTimer = new Timer("Dwarf Exception Writer", + getDwarfTimerGroup()); +} + +DwarfException::~DwarfException() { + delete ExceptionTimer; +} + +void DwarfException::EmitCommonEHFrame(const Function *Personality, + unsigned Index) { + // Size and sign of stack growth. + int stackGrowth = + Asm->TM.getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TD->getPointerSize() : -TD->getPointerSize(); + + // Begin eh frame section. + Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection()); + + if (!TAI->doesRequireNonLocalEHFrameLabel()) + O << TAI->getEHGlobalPrefix(); + + O << "EH_frame" << Index << ":\n"; + EmitLabel("section_eh_frame", Index); + + // Define base labels. + EmitLabel("eh_frame_common", Index); + + // Define the eh frame length. + EmitDifference("eh_frame_common_end", Index, + "eh_frame_common_begin", Index, true); + Asm->EOL("Length of Common Information Entry"); + + // EH frame header. + EmitLabel("eh_frame_common_begin", Index); + Asm->EmitInt32((int)0); + Asm->EOL("CIE Identifier Tag"); + Asm->EmitInt8(dwarf::DW_CIE_VERSION); + Asm->EOL("CIE Version"); + + // The personality presence indicates that language specific information will + // show up in the eh frame. + Asm->EmitString(Personality ? "zPLR" : "zR"); + Asm->EOL("CIE Augmentation"); + + // Round out reader. + Asm->EmitULEB128Bytes(1); + Asm->EOL("CIE Code Alignment Factor"); + Asm->EmitSLEB128Bytes(stackGrowth); + Asm->EOL("CIE Data Alignment Factor"); + Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true)); + Asm->EOL("CIE Return Address Column"); + + // If there is a personality, we need to indicate the functions location. + if (Personality) { + Asm->EmitULEB128Bytes(7); + Asm->EOL("Augmentation Size"); + + if (TAI->getNeedsIndirectEncoding()) { + Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 | + dwarf::DW_EH_PE_indirect); + Asm->EOL("Personality (pcrel sdata4 indirect)"); + } else { + Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + Asm->EOL("Personality (pcrel sdata4)"); + } + + PrintRelDirective(true); + O << TAI->getPersonalityPrefix(); + Asm->EmitExternalGlobal((const GlobalVariable *)(Personality)); + O << TAI->getPersonalitySuffix(); + if (strcmp(TAI->getPersonalitySuffix(), "+4@GOTPCREL")) + O << "-" << TAI->getPCSymbol(); + Asm->EOL("Personality"); + + Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + Asm->EOL("LSDA Encoding (pcrel sdata4)"); + + Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + Asm->EOL("FDE Encoding (pcrel sdata4)"); + } else { + Asm->EmitULEB128Bytes(1); + Asm->EOL("Augmentation Size"); + + Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + Asm->EOL("FDE Encoding (pcrel sdata4)"); + } + + // Indicate locations of general callee saved registers in frame. + std::vector<MachineMove> Moves; + RI->getInitialFrameState(Moves); + EmitFrameMoves(NULL, 0, Moves, true); + + // On Darwin the linker honors the alignment of eh_frame, which means it must + // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get + // holes which confuse readers of eh_frame. + Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3, + 0, 0, false); + EmitLabel("eh_frame_common_end", Index); + + Asm->EOL(); +} + +/// EmitEHFrame - Emit function exception frame information. +/// +void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { + assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && + "Should not emit 'available externally' functions at all"); + + Function::LinkageTypes linkage = EHFrameInfo.function->getLinkage(); + Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection()); + + // Externally visible entry into the functions eh frame info. If the + // corresponding function is static, this should not be externally visible. + if (linkage != Function::InternalLinkage && + linkage != Function::PrivateLinkage) { + if (const char *GlobalEHDirective = TAI->getGlobalEHDirective()) + O << GlobalEHDirective << EHFrameInfo.FnName << "\n"; + } + + // If corresponding function is weak definition, this should be too. + if ((linkage == Function::WeakAnyLinkage || + linkage == Function::WeakODRLinkage || + linkage == Function::LinkOnceAnyLinkage || + linkage == Function::LinkOnceODRLinkage) && + TAI->getWeakDefDirective()) + O << TAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n"; + + // If there are no calls then you can't unwind. This may mean we can omit the + // EH Frame, but some environments do not handle weak absolute symbols. If + // UnwindTablesMandatory is set we cannot do this optimization; the unwind + // info is to be available for non-EH uses. + if (!EHFrameInfo.hasCalls && + !UnwindTablesMandatory && + ((linkage != Function::WeakAnyLinkage && + linkage != Function::WeakODRLinkage && + linkage != Function::LinkOnceAnyLinkage && + linkage != Function::LinkOnceODRLinkage) || + !TAI->getWeakDefDirective() || + TAI->getSupportsWeakOmittedEHFrame())) { + O << EHFrameInfo.FnName << " = 0\n"; + // This name has no connection to the function, so it might get + // dead-stripped when the function is not, erroneously. Prohibit + // dead-stripping unconditionally. + if (const char *UsedDirective = TAI->getUsedDirective()) + O << UsedDirective << EHFrameInfo.FnName << "\n\n"; + } else { + O << EHFrameInfo.FnName << ":\n"; + + // EH frame header. + EmitDifference("eh_frame_end", EHFrameInfo.Number, + "eh_frame_begin", EHFrameInfo.Number, true); + Asm->EOL("Length of Frame Information Entry"); + + EmitLabel("eh_frame_begin", EHFrameInfo.Number); + + if (TAI->doesRequireNonLocalEHFrameLabel()) { + PrintRelDirective(true, true); + PrintLabelName("eh_frame_begin", EHFrameInfo.Number); + + if (!TAI->isAbsoluteEHSectionOffsets()) + O << "-EH_frame" << EHFrameInfo.PersonalityIndex; + } else { + EmitSectionOffset("eh_frame_begin", "eh_frame_common", + EHFrameInfo.Number, EHFrameInfo.PersonalityIndex, + true, true, false); + } + + Asm->EOL("FDE CIE offset"); + + EmitReference("eh_func_begin", EHFrameInfo.Number, true, true); + Asm->EOL("FDE initial location"); + EmitDifference("eh_func_end", EHFrameInfo.Number, + "eh_func_begin", EHFrameInfo.Number, true); + Asm->EOL("FDE address range"); + + // If there is a personality and landing pads then point to the language + // specific data area in the exception table. + if (EHFrameInfo.PersonalityIndex) { + Asm->EmitULEB128Bytes(4); + Asm->EOL("Augmentation size"); + + if (EHFrameInfo.hasLandingPads) + EmitReference("exception", EHFrameInfo.Number, true, true); + else + Asm->EmitInt32((int)0); + Asm->EOL("Language Specific Data Area"); + } else { + Asm->EmitULEB128Bytes(0); + Asm->EOL("Augmentation size"); + } + + // Indicate locations of function specific callee saved registers in frame. + EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves, + true); + + // On Darwin the linker honors the alignment of eh_frame, which means it + // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you + // get holes which confuse readers of eh_frame. + Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3, + 0, 0, false); + EmitLabel("eh_frame_end", EHFrameInfo.Number); + + // If the function is marked used, this table should be also. We cannot + // make the mark unconditional in this case, since retaining the table also + // retains the function in this case, and there is code around that depends + // on unused functions (calling undefined externals) being dead-stripped to + // link correctly. Yes, there really is. + if (MMI->getUsedFunctions().count(EHFrameInfo.function)) + if (const char *UsedDirective = TAI->getUsedDirective()) + O << UsedDirective << EHFrameInfo.FnName << "\n\n"; + } +} + +/// EmitExceptionTable - Emit landing pads and actions. +/// +/// The general organization of the table is complex, but the basic concepts are +/// easy. First there is a header which describes the location and organization +/// of the three components that follow. +/// +/// 1. The landing pad site information describes the range of code covered by +/// the try. In our case it's an accumulation of the ranges covered by the +/// invokes in the try. There is also a reference to the landing pad that +/// handles the exception once processed. Finally an index into the actions +/// table. +/// 2. The action table, in our case, is composed of pairs of type ids and next +/// action offset. Starting with the action index from the landing pad +/// site, each type Id is checked for a match to the current exception. If +/// it matches then the exception and type id are passed on to the landing +/// pad. Otherwise the next action is looked up. This chain is terminated +/// with a next action of zero. If no type id is found the the frame is +/// unwound and handling continues. +/// 3. Type id table contains references to all the C++ typeinfo for all +/// catches in the function. This tables is reversed indexed base 1. + +/// SharedTypeIds - How many leading type ids two landing pads have in common. +unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, + const LandingPadInfo *R) { + const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + unsigned Count = 0; + + for (; Count != MinSize; ++Count) + if (LIds[Count] != RIds[Count]) + return Count; + + return Count; +} + +/// PadLT - Order landing pads lexicographically by type id. +bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) { + const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + + for (unsigned i = 0; i != MinSize; ++i) + if (LIds[i] != RIds[i]) + return LIds[i] < RIds[i]; + + return LSize < RSize; +} + +void DwarfException::EmitExceptionTable() { + const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + if (PadInfos.empty()) return; + + // Sort the landing pads in order of their type ids. This is used to fold + // duplicate actions. + SmallVector<const LandingPadInfo *, 64> LandingPads; + LandingPads.reserve(PadInfos.size()); + for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) + LandingPads.push_back(&PadInfos[i]); + std::sort(LandingPads.begin(), LandingPads.end(), PadLT); + + // Negative type ids index into FilterIds, positive type ids index into + // TypeInfos. The value written for a positive type id is just the type id + // itself. For a negative type id, however, the value written is the + // (negative) byte offset of the corresponding FilterIds entry. The byte + // offset is usually equal to the type id, because the FilterIds entries are + // written using a variable width encoding which outputs one byte per entry as + // long as the value written is not too large, but can differ. This kind of + // complication does not occur for positive type ids because type infos are + // output using a fixed width encoding. FilterOffsets[i] holds the byte + // offset corresponding to FilterIds[i]. + SmallVector<int, 16> FilterOffsets; + FilterOffsets.reserve(FilterIds.size()); + int Offset = -1; + for(std::vector<unsigned>::const_iterator I = FilterIds.begin(), + E = FilterIds.end(); I != E; ++I) { + FilterOffsets.push_back(Offset); + Offset -= TargetAsmInfo::getULEB128Size(*I); + } + + // Compute the actions table and gather the first action index for each + // landing pad site. + SmallVector<ActionEntry, 32> Actions; + SmallVector<unsigned, 64> FirstActions; + FirstActions.reserve(LandingPads.size()); + + int FirstAction = 0; + unsigned SizeActions = 0; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LP = LandingPads[i]; + const std::vector<int> &TypeIds = LP->TypeIds; + const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0; + unsigned SizeSiteActions = 0; + + if (NumShared < TypeIds.size()) { + unsigned SizeAction = 0; + ActionEntry *PrevAction = 0; + + if (NumShared) { + const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size(); + assert(Actions.size()); + PrevAction = &Actions.back(); + SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) + + TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + + for (unsigned j = NumShared; j != SizePrevIds; ++j) { + SizeAction -= + TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + SizeAction += -PrevAction->NextAction; + PrevAction = PrevAction->Previous; + } + } + + // Compute the actions. + for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) { + int TypeID = TypeIds[I]; + assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); + int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; + unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID); + + int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; + SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction); + SizeSiteActions += SizeAction; + + ActionEntry Action = {ValueForTypeID, NextAction, PrevAction}; + Actions.push_back(Action); + + PrevAction = &Actions.back(); + } + + // Record the first action of the landing pad site. + FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; + } // else identical - re-use previous FirstAction + + FirstActions.push_back(FirstAction); + + // Compute this sites contribution to size. + SizeActions += SizeSiteActions; + } + + // Compute the call-site table. The entry for an invoke has a try-range + // containing the call, a non-zero landing pad and an appropriate action. The + // entry for an ordinary call has a try-range containing the call and zero for + // the landing pad and the action. Calls marked 'nounwind' have no entry and + // must not be contained in the try-range of any entry - they form gaps in the + // table. Entries must be ordered by try-range address. + SmallVector<CallSiteEntry, 64> CallSites; + + RangeMapType PadMap; + + // Invokes and nounwind calls have entries in PadMap (due to being bracketed + // by try-range labels when lowered). Ordinary calls do not, so appropriate + // try-ranges for them need be deduced. + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + unsigned BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } + + // The end label of the previous invoke or nounwind try-range. + unsigned LastLabel = 0; + + // Whether there is a potentially throwing instruction (currently this means + // an ordinary call) between the end of the previous try-range and now. + bool SawPotentiallyThrowing = false; + + // Whether the last callsite entry was for an invoke. + bool PreviousIsInvoke = false; + + // Visit all instructions in order of address. + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); + MI != E; ++MI) { + if (!MI->isLabel()) { + SawPotentiallyThrowing |= MI->getDesc().isCall(); + continue; + } + + unsigned BeginLabel = MI->getOperand(0).getImm(); + assert(BeginLabel && "Invalid label!"); + + // End of the previous try-range? + if (BeginLabel == LastLabel) + SawPotentiallyThrowing = false; + + // Beginning of a new try-range? + RangeMapType::iterator L = PadMap.find(BeginLabel); + if (L == PadMap.end()) + // Nope, it was just some random label. + continue; + + PadRange P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + + assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && + "Inconsistent landing pad map!"); + + // If some instruction between the previous try-range and this one may + // throw, create a call-site entry with no landing pad for the region + // between the try-ranges. + if (SawPotentiallyThrowing) { + CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0}; + CallSites.push_back(Site); + PreviousIsInvoke = false; + } + + LastLabel = LandingPad->EndLabels[P.RangeIndex]; + assert(BeginLabel && LastLabel && "Invalid landing pad!"); + + if (LandingPad->LandingPadLabel) { + // This try-range is for an invoke. + CallSiteEntry Site = {BeginLabel, LastLabel, + LandingPad->LandingPadLabel, + FirstActions[P.PadIndex]}; + + // Try to merge with the previous call-site. + if (PreviousIsInvoke) { + CallSiteEntry &Prev = CallSites.back(); + if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { + // Extend the range of the previous entry. + Prev.EndLabel = Site.EndLabel; + continue; + } + } + + // Otherwise, create a new call-site. + CallSites.push_back(Site); + PreviousIsInvoke = true; + } else { + // Create a gap. + PreviousIsInvoke = false; + } + } + } + + // If some instruction between the previous try-range and the end of the + // function may throw, create a call-site entry with no landing pad for the + // region following the try-range. + if (SawPotentiallyThrowing) { + CallSiteEntry Site = {LastLabel, 0, 0, 0}; + CallSites.push_back(Site); + } + + // Final tallies. + + // Call sites. + const unsigned SiteStartSize = sizeof(int32_t); // DW_EH_PE_udata4 + const unsigned SiteLengthSize = sizeof(int32_t); // DW_EH_PE_udata4 + const unsigned LandingPadSize = sizeof(int32_t); // DW_EH_PE_udata4 + unsigned SizeSites = CallSites.size() * (SiteStartSize + + SiteLengthSize + + LandingPadSize); + for (unsigned i = 0, e = CallSites.size(); i < e; ++i) + SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action); + + // Type infos. + const unsigned TypeInfoSize = TD->getPointerSize(); // DW_EH_PE_absptr + unsigned SizeTypes = TypeInfos.size() * TypeInfoSize; + + unsigned TypeOffset = sizeof(int8_t) + // Call site format + TargetAsmInfo::getULEB128Size(SizeSites) + // Call-site table length + SizeSites + SizeActions + SizeTypes; + + unsigned TotalSize = sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset + TypeOffset; + + unsigned SizeAlign = (4 - TotalSize) & 3; + + // Begin the exception table. + Asm->SwitchToDataSection(TAI->getDwarfExceptionSection()); + Asm->EmitAlignment(2, 0, 0, false); + O << "GCC_except_table" << SubprogramCount << ":\n"; + + for (unsigned i = 0; i != SizeAlign; ++i) { + Asm->EmitInt8(0); + Asm->EOL("Padding"); + } + + EmitLabel("exception", SubprogramCount); + + // Emit the header. + Asm->EmitInt8(dwarf::DW_EH_PE_omit); + Asm->EOL("LPStart format (DW_EH_PE_omit)"); + Asm->EmitInt8(dwarf::DW_EH_PE_absptr); + Asm->EOL("TType format (DW_EH_PE_absptr)"); + Asm->EmitULEB128Bytes(TypeOffset); + Asm->EOL("TType base offset"); + Asm->EmitInt8(dwarf::DW_EH_PE_udata4); + Asm->EOL("Call site format (DW_EH_PE_udata4)"); + Asm->EmitULEB128Bytes(SizeSites); + Asm->EOL("Call-site table length"); + + // Emit the landing pad site information. + for (unsigned i = 0; i < CallSites.size(); ++i) { + CallSiteEntry &S = CallSites[i]; + const char *BeginTag; + unsigned BeginNumber; + + if (!S.BeginLabel) { + BeginTag = "eh_func_begin"; + BeginNumber = SubprogramCount; + } else { + BeginTag = "label"; + BeginNumber = S.BeginLabel; + } + + EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount, + true, true); + Asm->EOL("Region start"); + + if (!S.EndLabel) + EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber, + true); + else + EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true); + + Asm->EOL("Region length"); + + if (!S.PadLabel) + Asm->EmitInt32(0); + else + EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount, + true, true); + + Asm->EOL("Landing pad"); + + Asm->EmitULEB128Bytes(S.Action); + Asm->EOL("Action"); + } + + // Emit the actions. + for (unsigned I = 0, N = Actions.size(); I != N; ++I) { + ActionEntry &Action = Actions[I]; + + Asm->EmitSLEB128Bytes(Action.ValueForTypeID); + Asm->EOL("TypeInfo index"); + Asm->EmitSLEB128Bytes(Action.NextAction); + Asm->EOL("Next action"); + } + + // Emit the type ids. + for (unsigned M = TypeInfos.size(); M; --M) { + GlobalVariable *GV = TypeInfos[M - 1]; + PrintRelDirective(); + + if (GV) { + std::string GLN; + O << Asm->getGlobalLinkName(GV, GLN); + } else { + O << "0"; + } + + Asm->EOL("TypeInfo"); + } + + // Emit the filter typeids. + for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) { + unsigned TypeID = FilterIds[j]; + Asm->EmitULEB128Bytes(TypeID); + Asm->EOL("Filter TypeInfo index"); + } + + Asm->EmitAlignment(2, 0, 0, false); +} + +/// EndModule - Emit all exception information that should come after the +/// content. +void DwarfException::EndModule() { + if (TimePassesIsEnabled) + ExceptionTimer->startTimer(); + + if (shouldEmitMovesModule || shouldEmitTableModule) { + const std::vector<Function *> Personalities = MMI->getPersonalities(); + for (unsigned i = 0; i < Personalities.size(); ++i) + EmitCommonEHFrame(Personalities[i], i); + + for (std::vector<FunctionEHFrameInfo>::iterator I = EHFrames.begin(), + E = EHFrames.end(); I != E; ++I) + EmitEHFrame(*I); + } + + if (TimePassesIsEnabled) + ExceptionTimer->stopTimer(); +} + +/// BeginFunction - Gather pre-function exception information. Assumes being +/// emitted immediately after the function entry point. +void DwarfException::BeginFunction(MachineFunction *MF) { + if (TimePassesIsEnabled) + ExceptionTimer->startTimer(); + + this->MF = MF; + shouldEmitTable = shouldEmitMoves = false; + + if (MMI && TAI->doesSupportExceptionHandling()) { + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + // If any landing pads survive, we need an EH table. + if (MMI->getLandingPads().size()) + shouldEmitTable = true; + + // See if we need frame move info. + if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory) + shouldEmitMoves = true; + + if (shouldEmitMoves || shouldEmitTable) + // Assumes in correct section after the entry point. + EmitLabel("eh_func_begin", ++SubprogramCount); + } + + shouldEmitTableModule |= shouldEmitTable; + shouldEmitMovesModule |= shouldEmitMoves; + + if (TimePassesIsEnabled) + ExceptionTimer->stopTimer(); +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void DwarfException::EndFunction() { + if (TimePassesIsEnabled) + ExceptionTimer->startTimer(); + + if (shouldEmitMoves || shouldEmitTable) { + EmitLabel("eh_func_end", SubprogramCount); + EmitExceptionTable(); + + // Save EH frame information + std::string Name; + EHFrames.push_back( + FunctionEHFrameInfo(getAsm()->getCurrentFunctionEHName(MF, Name), + SubprogramCount, + MMI->getPersonalityIndex(), + MF->getFrameInfo()->hasCalls(), + !MMI->getLandingPads().empty(), + MMI->getFrameMoves(), + MF->getFunction())); + } + + if (TimePassesIsEnabled) + ExceptionTimer->stopTimer(); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h new file mode 100644 index 0000000..4479af2 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -0,0 +1,178 @@ +//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__ +#define CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__ + +#include "DIE.h" +#include "DwarfPrinter.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/ADT/DenseMap.h" +#include <string> + +namespace llvm { + +struct LandingPadInfo; +class MachineModuleInfo; +class TargetAsmInfo; +class Timer; +class raw_ostream; + +//===----------------------------------------------------------------------===// +/// DwarfException - Emits Dwarf exception handling directives. +/// +class VISIBILITY_HIDDEN DwarfException : public Dwarf { + struct FunctionEHFrameInfo { + std::string FnName; + unsigned Number; + unsigned PersonalityIndex; + bool hasCalls; + bool hasLandingPads; + std::vector<MachineMove> Moves; + const Function * function; + + FunctionEHFrameInfo(const std::string &FN, unsigned Num, unsigned P, + bool hC, bool hL, + const std::vector<MachineMove> &M, + const Function *f): + FnName(FN), Number(Num), PersonalityIndex(P), + hasCalls(hC), hasLandingPads(hL), Moves(M), function (f) { } + }; + + std::vector<FunctionEHFrameInfo> EHFrames; + + /// shouldEmitTable - Per-function flag to indicate if EH tables should + /// be emitted. + bool shouldEmitTable; + + /// shouldEmitMoves - Per-function flag to indicate if frame moves info + /// should be emitted. + bool shouldEmitMoves; + + /// shouldEmitTableModule - Per-module flag to indicate if EH tables + /// should be emitted. + bool shouldEmitTableModule; + + /// shouldEmitFrameModule - Per-module flag to indicate if frame moves + /// should be emitted. + bool shouldEmitMovesModule; + + /// ExceptionTimer - Timer for the Dwarf exception writer. + Timer *ExceptionTimer; + + /// EmitCommonEHFrame - Emit the common eh unwind frame. + /// + void EmitCommonEHFrame(const Function *Personality, unsigned Index); + + /// EmitEHFrame - Emit function exception frame information. + /// + void EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo); + + /// EmitExceptionTable - Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. + + /// SharedTypeIds - How many leading type ids two landing pads have in common. + static unsigned SharedTypeIds(const LandingPadInfo *L, + const LandingPadInfo *R); + + /// PadLT - Order landing pads lexicographically by type id. + static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R); + + struct KeyInfo { + static inline unsigned getEmptyKey() { return -1U; } + static inline unsigned getTombstoneKey() { return -2U; } + static unsigned getHashValue(const unsigned &Key) { return Key; } + static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; } + static bool isPod() { return true; } + }; + + /// ActionEntry - Structure describing an entry in the actions table. + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + struct ActionEntry *Previous; + }; + + /// PadRange - Structure holding a try-range and the associated landing pad. + struct PadRange { + // The index of the landing pad. + unsigned PadIndex; + // The index of the begin and end labels in the landing pad's label lists. + unsigned RangeIndex; + }; + + typedef DenseMap<unsigned, PadRange, KeyInfo> RangeMapType; + + /// CallSiteEntry - Structure describing an entry in the call-site table. + struct CallSiteEntry { + // The 'try-range' is BeginLabel .. EndLabel. + unsigned BeginLabel; // zero indicates the start of the function. + unsigned EndLabel; // zero indicates the end of the function. + // The landing pad starts at PadLabel. + unsigned PadLabel; // zero indicates that there is no landing pad. + unsigned Action; + }; + + void EmitExceptionTable(); + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfException(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T); + virtual ~DwarfException(); + + /// SetModuleInfo - Set machine module information when it's known that pass + /// manager has created it. Set by the target AsmPrinter. + void SetModuleInfo(MachineModuleInfo *mmi) { + MMI = mmi; + } + + /// BeginModule - Emit all exception information that should come prior to the + /// content. + void BeginModule(Module *M) { + this->M = M; + } + + /// EndModule - Emit all exception information that should come after the + /// content. + void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + void BeginFunction(MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + void EndFunction(); +}; + +} // End of namespace llvm + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp new file mode 100644 index 0000000..8021b7c --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp @@ -0,0 +1,35 @@ +//===--- lib/CodeGen/DwarfLabel.cpp - Dwarf Label -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// DWARF Labels +// +//===----------------------------------------------------------------------===// + +#include "DwarfLabel.h" +#include "llvm/ADT/FoldingSet.h" +#include <ostream> + +using namespace llvm; + +/// Profile - Used to gather unique data for the folding set. +/// +void DWLabel::Profile(FoldingSetNodeID &ID) const { + ID.AddString(Tag); + ID.AddInteger(Number); +} + +#ifndef NDEBUG +void DWLabel::print(std::ostream *O) const { + if (O) print(*O); +} +void DWLabel::print(std::ostream &O) const { + O << "." << Tag; + if (Number) O << Number; +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.h b/lib/CodeGen/AsmPrinter/DwarfLabel.h new file mode 100644 index 0000000..b493903 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfLabel.h @@ -0,0 +1,56 @@ +//===--- lib/CodeGen/DwarfLabel.h - Dwarf Label -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// DWARF Labels. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFLABEL_H__ +#define CODEGEN_ASMPRINTER_DWARFLABEL_H__ + +#include "llvm/Support/Compiler.h" +#include <iosfwd> +#include <vector> + +namespace llvm { + class FoldingSetNodeID; + + //===--------------------------------------------------------------------===// + /// DWLabel - Labels are used to track locations in the assembler file. + /// Labels appear in the form @verbatim <prefix><Tag><Number> @endverbatim, + /// where the tag is a category of label (Ex. location) and number is a value + /// unique in that category. + class VISIBILITY_HIDDEN DWLabel { + /// Tag - Label category tag. Should always be a statically declared C + /// string. + /// + const char *Tag; + + /// Number - Value to make label unique. + /// + unsigned Number; + public: + DWLabel(const char *T, unsigned N) : Tag(T), Number(N) {} + + // Accessors. + const char *getTag() const { return Tag; } + unsigned getNumber() const { return Number; } + + /// Profile - Used to gather unique data for the folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + +#ifndef NDEBUG + void print(std::ostream *O) const; + void print(std::ostream &O) const; +#endif + }; +} // end llvm namespace + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp new file mode 100644 index 0000000..45e7dd3 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp @@ -0,0 +1,235 @@ +//===--- lib/CodeGen/DwarfPrinter.cpp - Dwarf Printer ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Emit general DWARF directives. +// +//===----------------------------------------------------------------------===// + +#include "DwarfPrinter.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <ostream> + +using namespace llvm; + +Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T, + const char *flavor) +: O(OS), Asm(A), TAI(T), TD(Asm->TM.getTargetData()), + RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL), + SubprogramCount(0), Flavor(flavor), SetCounter(1) {} + +void Dwarf::PrintRelDirective(bool Force32Bit, bool isInSection) const { + if (isInSection && TAI->getDwarfSectionOffsetDirective()) + O << TAI->getDwarfSectionOffsetDirective(); + else if (Force32Bit || TD->getPointerSize() == sizeof(int32_t)) + O << TAI->getData32bitsDirective(); + else + O << TAI->getData64bitsDirective(); +} + +/// PrintLabelName - Print label name in form used by Dwarf writer. +/// +void Dwarf::PrintLabelName(const char *Tag, unsigned Number) const { + O << TAI->getPrivateGlobalPrefix() << Tag; + if (Number) O << Number; +} +void Dwarf::PrintLabelName(const char *Tag, unsigned Number, + const char *Suffix) const { + O << TAI->getPrivateGlobalPrefix() << Tag; + if (Number) O << Number; + O << Suffix; +} + +/// EmitLabel - Emit location label for internal use by Dwarf. +/// +void Dwarf::EmitLabel(const char *Tag, unsigned Number) const { + PrintLabelName(Tag, Number); + O << ":\n"; +} + +/// EmitReference - Emit a reference to a label. +/// +void Dwarf::EmitReference(const char *Tag, unsigned Number, + bool IsPCRelative, bool Force32Bit) const { + PrintRelDirective(Force32Bit); + PrintLabelName(Tag, Number); + if (IsPCRelative) O << "-" << TAI->getPCSymbol(); +} +void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative, + bool Force32Bit) const { + PrintRelDirective(Force32Bit); + O << Name; + if (IsPCRelative) O << "-" << TAI->getPCSymbol(); +} + +/// EmitDifference - Emit the difference between two labels. Some assemblers do +/// not behave with absolute expressions with data directives, so there is an +/// option (needsSet) to use an intermediary set expression. +void Dwarf::EmitDifference(const char *TagHi, unsigned NumberHi, + const char *TagLo, unsigned NumberLo, + bool IsSmall) { + if (TAI->needsSet()) { + O << "\t.set\t"; + PrintLabelName("set", SetCounter, Flavor); + O << ","; + PrintLabelName(TagHi, NumberHi); + O << "-"; + PrintLabelName(TagLo, NumberLo); + O << "\n"; + + PrintRelDirective(IsSmall); + PrintLabelName("set", SetCounter, Flavor); + ++SetCounter; + } else { + PrintRelDirective(IsSmall); + PrintLabelName(TagHi, NumberHi); + O << "-"; + PrintLabelName(TagLo, NumberLo); + } +} + +void Dwarf::EmitSectionOffset(const char* Label, const char* Section, + unsigned LabelNumber, unsigned SectionNumber, + bool IsSmall, bool isEH, + bool useSet) { + bool printAbsolute = false; + if (isEH) + printAbsolute = TAI->isAbsoluteEHSectionOffsets(); + else + printAbsolute = TAI->isAbsoluteDebugSectionOffsets(); + + if (TAI->needsSet() && useSet) { + O << "\t.set\t"; + PrintLabelName("set", SetCounter, Flavor); + O << ","; + PrintLabelName(Label, LabelNumber); + + if (!printAbsolute) { + O << "-"; + PrintLabelName(Section, SectionNumber); + } + + O << "\n"; + PrintRelDirective(IsSmall); + PrintLabelName("set", SetCounter, Flavor); + ++SetCounter; + } else { + PrintRelDirective(IsSmall, true); + PrintLabelName(Label, LabelNumber); + + if (!printAbsolute) { + O << "-"; + PrintLabelName(Section, SectionNumber); + } + } +} + +/// EmitFrameMoves - Emit frame instructions to describe the layout of the +/// frame. +void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID, + const std::vector<MachineMove> &Moves, bool isEH) { + int stackGrowth = + Asm->TM.getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TD->getPointerSize() : -TD->getPointerSize(); + bool IsLocal = BaseLabel && strcmp(BaseLabel, "label") == 0; + + for (unsigned i = 0, N = Moves.size(); i < N; ++i) { + const MachineMove &Move = Moves[i]; + unsigned LabelID = Move.getLabelID(); + + if (LabelID) { + LabelID = MMI->MappedLabel(LabelID); + + // Throw out move if the label is invalid. + if (!LabelID) continue; + } + + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); + + // Advance row if new location. + if (BaseLabel && LabelID && (BaseLabelID != LabelID || !IsLocal)) { + Asm->EmitInt8(dwarf::DW_CFA_advance_loc4); + Asm->EOL("DW_CFA_advance_loc4"); + EmitDifference("label", LabelID, BaseLabel, BaseLabelID, true); + Asm->EOL(); + + BaseLabelID = LabelID; + BaseLabel = "label"; + IsLocal = true; + } + + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + if (!Src.isReg()) { + if (Src.getReg() == MachineLocation::VirtualFP) { + Asm->EmitInt8(dwarf::DW_CFA_def_cfa_offset); + Asm->EOL("DW_CFA_def_cfa_offset"); + } else { + Asm->EmitInt8(dwarf::DW_CFA_def_cfa); + Asm->EOL("DW_CFA_def_cfa"); + Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), isEH)); + Asm->EOL("Register"); + } + + int Offset = -Src.getOffset(); + + Asm->EmitULEB128Bytes(Offset); + Asm->EOL("Offset"); + } else { + assert(0 && "Machine move no supported yet."); + } + } else if (Src.isReg() && + Src.getReg() == MachineLocation::VirtualFP) { + if (Dst.isReg()) { + Asm->EmitInt8(dwarf::DW_CFA_def_cfa_register); + Asm->EOL("DW_CFA_def_cfa_register"); + Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH)); + Asm->EOL("Register"); + } else { + assert(0 && "Machine move no supported yet."); + } + } else { + unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH); + int Offset = Dst.getOffset() / stackGrowth; + + if (Offset < 0) { + Asm->EmitInt8(dwarf::DW_CFA_offset_extended_sf); + Asm->EOL("DW_CFA_offset_extended_sf"); + Asm->EmitULEB128Bytes(Reg); + Asm->EOL("Reg"); + Asm->EmitSLEB128Bytes(Offset); + Asm->EOL("Offset"); + } else if (Reg < 64) { + Asm->EmitInt8(dwarf::DW_CFA_offset + Reg); + if (Asm->isVerbose()) + Asm->EOL("DW_CFA_offset + Reg (" + utostr(Reg) + ")"); + else + Asm->EOL(); + Asm->EmitULEB128Bytes(Offset); + Asm->EOL("Offset"); + } else { + Asm->EmitInt8(dwarf::DW_CFA_offset_extended); + Asm->EOL("DW_CFA_offset_extended"); + Asm->EmitULEB128Bytes(Reg); + Asm->EOL("Reg"); + Asm->EmitULEB128Bytes(Offset); + Asm->EOL("Offset"); + } + } + } +} diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h new file mode 100644 index 0000000..6e75992 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h @@ -0,0 +1,153 @@ +//===--- lib/CodeGen/DwarfPrinter.h - Dwarf Printer -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Emit general DWARF directives. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFPRINTER_H__ +#define CODEGEN_ASMPRINTER_DWARFPRINTER_H__ + +#include "DwarfLabel.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include <vector> + +namespace llvm { + class AsmPrinter; + class MachineFunction; + class MachineModuleInfo; + class Module; + class TargetAsmInfo; + class TargetData; + class TargetRegisterInfo; + + class VISIBILITY_HIDDEN Dwarf { + protected: + //===-------------------------------------------------------------==---===// + // Core attributes used by the DWARF printer. + // + + /// O - Stream to .s file. + /// + raw_ostream &O; + + /// Asm - Target of Dwarf emission. + /// + AsmPrinter *Asm; + + /// TAI - Target asm information. + /// + const TargetAsmInfo *TAI; + + /// TD - Target data. + /// + const TargetData *TD; + + /// RI - Register Information. + /// + const TargetRegisterInfo *RI; + + /// M - Current module. + /// + Module *M; + + /// MF - Current machine function. + /// + MachineFunction *MF; + + /// MMI - Collected machine module information. + /// + MachineModuleInfo *MMI; + + /// SubprogramCount - The running count of functions being compiled. + /// + unsigned SubprogramCount; + + /// Flavor - A unique string indicating what dwarf producer this is, used to + /// unique labels. + /// + const char * const Flavor; + + /// SetCounter - A unique number for each '.set' directive. + /// + unsigned SetCounter; + + Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T, + const char *flavor); + public: + //===------------------------------------------------------------------===// + // Accessors. + // + const AsmPrinter *getAsm() const { return Asm; } + MachineModuleInfo *getMMI() const { return MMI; } + const TargetAsmInfo *getTargetAsmInfo() const { return TAI; } + const TargetData *getTargetData() const { return TD; } + + void PrintRelDirective(bool Force32Bit = false, + bool isInSection = false) const; + + + /// PrintLabelName - Print label name in form used by Dwarf writer. + /// + void PrintLabelName(const DWLabel &Label) const { + PrintLabelName(Label.getTag(), Label.getNumber()); + } + void PrintLabelName(const char *Tag, unsigned Number) const; + void PrintLabelName(const char *Tag, unsigned Number, + const char *Suffix) const; + + /// EmitLabel - Emit location label for internal use by Dwarf. + /// + void EmitLabel(const DWLabel &Label) const { + EmitLabel(Label.getTag(), Label.getNumber()); + } + void EmitLabel(const char *Tag, unsigned Number) const; + + /// EmitReference - Emit a reference to a label. + /// + void EmitReference(const DWLabel &Label, bool IsPCRelative = false, + bool Force32Bit = false) const { + EmitReference(Label.getTag(), Label.getNumber(), + IsPCRelative, Force32Bit); + } + void EmitReference(const char *Tag, unsigned Number, + bool IsPCRelative = false, + bool Force32Bit = false) const; + void EmitReference(const std::string &Name, bool IsPCRelative = false, + bool Force32Bit = false) const; + + /// EmitDifference - Emit the difference between two labels. Some + /// assemblers do not behave with absolute expressions with data directives, + /// so there is an option (needsSet) to use an intermediary set expression. + void EmitDifference(const DWLabel &LabelHi, const DWLabel &LabelLo, + bool IsSmall = false) { + EmitDifference(LabelHi.getTag(), LabelHi.getNumber(), + LabelLo.getTag(), LabelLo.getNumber(), + IsSmall); + } + void EmitDifference(const char *TagHi, unsigned NumberHi, + const char *TagLo, unsigned NumberLo, + bool IsSmall = false); + + void EmitSectionOffset(const char* Label, const char* Section, + unsigned LabelNumber, unsigned SectionNumber, + bool IsSmall = false, bool isEH = false, + bool useSet = true); + + /// EmitFrameMoves - Emit frame instructions to describe the layout of the + /// frame. + void EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID, + const std::vector<MachineMove> &Moves, bool isEH); +}; + +} // end llvm namespace + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp new file mode 100644 index 0000000..483ee559 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp @@ -0,0 +1,129 @@ +//===-- llvm/CodeGen/DwarfWriter.cpp - Dwarf Framework --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DwarfWriter.h" +#include "DwarfDebug.h" +#include "DwarfException.h" +#include "llvm/CodeGen/MachineModuleInfo.h" + +using namespace llvm; + +static RegisterPass<DwarfWriter> +X("dwarfwriter", "DWARF Information Writer"); +char DwarfWriter::ID = 0; + +//===----------------------------------------------------------------------===// +/// DwarfWriter Implementation +/// + +DwarfWriter::DwarfWriter() + : ImmutablePass(&ID), DD(0), DE(0) {} + +DwarfWriter::~DwarfWriter() { + delete DE; + delete DD; +} + +/// BeginModule - Emit all Dwarf sections that should come prior to the +/// content. +void DwarfWriter::BeginModule(Module *M, + MachineModuleInfo *MMI, + raw_ostream &OS, AsmPrinter *A, + const TargetAsmInfo *T) { + DE = new DwarfException(OS, A, T); + DD = new DwarfDebug(OS, A, T); + DE->BeginModule(M); + DD->BeginModule(M); + DD->SetDebugInfo(MMI); + DE->SetModuleInfo(MMI); +} + +/// EndModule - Emit all Dwarf sections that should come after the content. +/// +void DwarfWriter::EndModule() { + DE->EndModule(); + DD->EndModule(); +} + +/// BeginFunction - Gather pre-function debug information. Assumes being +/// emitted immediately after the function entry point. +void DwarfWriter::BeginFunction(MachineFunction *MF) { + DE->BeginFunction(MF); + DD->BeginFunction(MF); +} + +/// EndFunction - Gather and emit post-function debug information. +/// +void DwarfWriter::EndFunction(MachineFunction *MF) { + DD->EndFunction(MF); + DE->EndFunction(); + + if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI()) + // Clear function debug information. + MMI->EndFunction(); +} + +/// RecordSourceLine - Records location information and associates it with a +/// label. Returns a unique label ID used to generate a label and provide +/// correspondence to the source line list. +unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, + DICompileUnit CU) { + return DD->RecordSourceLine(Line, Col, CU); +} + +/// RecordRegionStart - Indicate the start of a region. +unsigned DwarfWriter::RecordRegionStart(GlobalVariable *V) { + return DD->RecordRegionStart(V); +} + +/// RecordRegionEnd - Indicate the end of a region. +unsigned DwarfWriter::RecordRegionEnd(GlobalVariable *V) { + return DD->RecordRegionEnd(V); +} + +/// getRecordSourceLineCount - Count source lines. +unsigned DwarfWriter::getRecordSourceLineCount() { + return DD->getRecordSourceLineCount(); +} + +/// RecordVariable - Indicate the declaration of a local variable. +/// +void DwarfWriter::RecordVariable(GlobalVariable *GV, unsigned FrameIndex, + const MachineInstr *MI) { + DD->RecordVariable(GV, FrameIndex, MI); +} + +/// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should +/// be emitted. +bool DwarfWriter::ShouldEmitDwarfDebug() const { + return DD && DD->ShouldEmitDwarfDebug(); +} + +//// RecordInlinedFnStart - Global variable GV is inlined at the location marked +//// by LabelID label. +unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU, + unsigned Line, unsigned Col) { + return DD->RecordInlinedFnStart(SP, CU, Line, Col); +} + +/// RecordInlinedFnEnd - Indicate the end of inlined subroutine. +unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) { + return DD->RecordInlinedFnEnd(SP); +} + +/// RecordVariableScope - Record scope for the variable declared by +/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. +void DwarfWriter::RecordVariableScope(DIVariable &DV, + const MachineInstr *DeclareMI) { + DD->RecordVariableScope(DV, DeclareMI); +} diff --git a/lib/CodeGen/AsmPrinter/Makefile b/lib/CodeGen/AsmPrinter/Makefile new file mode 100644 index 0000000..cb5b3f6 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMAsmPrinter +PARALLEL_DIRS = +BUILD_ARCHIVE = 1 +DONT_BUILD_RELINKED = 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp new file mode 100644 index 0000000..8ba903a --- /dev/null +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -0,0 +1,160 @@ +//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements printing the assembly code for an Ocaml frametable. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/Module.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + + class VISIBILITY_HIDDEN OcamlGCMetadataPrinter : public GCMetadataPrinter { + public: + void beginAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI); + + void finishAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI); + }; + +} + +static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter> +Y("ocaml", "ocaml 3.10-compatible collector"); + +void llvm::linkOcamlGCPrinter() { } + +static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI, const char *Id) { + const std::string &MId = M.getModuleIdentifier(); + + std::string Mangled; + Mangled += TAI.getGlobalPrefix(); + Mangled += "caml"; + size_t Letter = Mangled.size(); + Mangled.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); + Mangled += "__"; + Mangled += Id; + + // Capitalize the first letter of the module name. + Mangled[Letter] = toupper(Mangled[Letter]); + + if (const char *GlobalDirective = TAI.getGlobalDirective()) + OS << GlobalDirective << Mangled << "\n"; + OS << Mangled << ":\n"; +} + +void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + AP.SwitchToSection(TAI.getTextSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "code_begin"); + + AP.SwitchToSection(TAI.getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "data_begin"); +} + +/// emitAssembly - Print the frametable. The ocaml frametable format is thus: +/// +/// extern "C" struct align(sizeof(intptr_t)) { +/// uint16_t NumDescriptors; +/// struct align(sizeof(intptr_t)) { +/// void *ReturnAddress; +/// uint16_t FrameSize; +/// uint16_t NumLiveOffsets; +/// uint16_t LiveOffsets[NumLiveOffsets]; +/// } Descriptors[NumDescriptors]; +/// } caml${module}__frametable; +/// +/// Note that this precludes programs from stack frames larger than 64K +/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if +/// either condition is detected in a function which uses the GC. +/// +void OcamlGCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + const char *AddressDirective; + int AddressAlignLog; + if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) { + AddressDirective = TAI.getData32bitsDirective(); + AddressAlignLog = 2; + } else { + AddressDirective = TAI.getData64bitsDirective(); + AddressAlignLog = 3; + } + + AP.SwitchToSection(TAI.getTextSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "code_end"); + + AP.SwitchToSection(TAI.getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "data_end"); + + OS << AddressDirective << 0; // FIXME: Why does ocaml emit this?? + AP.EOL(); + + AP.SwitchToSection(TAI.getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "frametable"); + + for (iterator I = begin(), IE = end(); I != IE; ++I) { + GCFunctionInfo &FI = **I; + + uint64_t FrameSize = FI.getFrameSize(); + if (FrameSize >= 1<<16) { + cerr << "Function '" << FI.getFunction().getNameStart() + << "' is too large for the ocaml GC! " + << "Frame size " << FrameSize << " >= 65536.\n"; + cerr << "(" << uintptr_t(&FI) << ")\n"; + abort(); // Very rude! + } + + OS << "\t" << TAI.getCommentString() << " live roots for " + << FI.getFunction().getNameStart() << "\n"; + + for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { + size_t LiveCount = FI.live_size(J); + if (LiveCount >= 1<<16) { + cerr << "Function '" << FI.getFunction().getNameStart() + << "' is too large for the ocaml GC! " + << "Live root count " << LiveCount << " >= 65536.\n"; + abort(); // Very rude! + } + + OS << AddressDirective + << TAI.getPrivateGlobalPrefix() << "label" << J->Num; + AP.EOL("call return address"); + + AP.EmitInt16(FrameSize); + AP.EOL("stack frame size"); + + AP.EmitInt16(LiveCount); + AP.EOL("live root count"); + + for (GCFunctionInfo::live_iterator K = FI.live_begin(J), + KE = FI.live_end(J); K != KE; ++K) { + assert(K->StackOffset < 1<<16 && + "GC root stack offset is outside of fixed stack frame and out " + "of range for ocaml GC!"); + + OS << "\t.word\t" << K->StackOffset; + AP.EOL("stack offset"); + } + + AP.EmitAlignment(AddressAlignLog); + } + } +} diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp new file mode 100644 index 0000000..2635303 --- /dev/null +++ b/lib/CodeGen/BranchFolding.cpp @@ -0,0 +1,1204 @@ +//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass forwards branches to unconditional branches to make them branch +// directly to the target block. This pass often results in dead MBB's, which +// it then removes. +// +// Note that this pass must be run after register allocation, it cannot handle +// SSA form. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "branchfolding" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); +STATISTIC(NumBranchOpts, "Number of branches optimized"); +STATISTIC(NumTailMerge , "Number of block tails merged"); +static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", + cl::init(cl::BOU_UNSET), cl::Hidden); +// Throttle for huge numbers of predecessors (compile speed problems) +static cl::opt<unsigned> +TailMergeThreshold("tail-merge-threshold", + cl::desc("Max number of predecessors to consider tail merging"), + cl::init(150), cl::Hidden); + +namespace { + struct VISIBILITY_HIDDEN BranchFolder : public MachineFunctionPass { + static char ID; + explicit BranchFolder(bool defaultEnableTailMerge) : + MachineFunctionPass(&ID) { + switch (FlagEnableTailMerge) { + case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; + case cl::BOU_TRUE: EnableTailMerge = true; break; + case cl::BOU_FALSE: EnableTailMerge = false; break; + } + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Control Flow Optimizer"; } + const TargetInstrInfo *TII; + MachineModuleInfo *MMI; + bool MadeChange; + private: + // Tail Merging. + bool EnableTailMerge; + bool TailMergeBlocks(MachineFunction &MF); + bool TryMergeBlocks(MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest); + MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1); + unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength); + void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength); + + typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt; + typedef std::vector<MergePotentialsElt>::iterator MPIterator; + std::vector<MergePotentialsElt> MergePotentials; + + typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt; + std::vector<SameTailElt> SameTails; + + const TargetRegisterInfo *RegInfo; + RegScavenger *RS; + // Branch optzn. + bool OptimizeBranches(MachineFunction &MF); + void OptimizeBlock(MachineBasicBlock *MBB); + void RemoveDeadBlock(MachineBasicBlock *MBB); + bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); + + bool CanFallThrough(MachineBasicBlock *CurBB); + bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable, + MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond); + }; + char BranchFolder::ID = 0; +} + +FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { + return new BranchFolder(DefaultEnableTailMerge); } + +/// RemoveDeadBlock - Remove the specified dead machine basic block from the +/// function, updating the CFG. +void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { + assert(MBB->pred_empty() && "MBB must be dead!"); + DOUT << "\nRemoving MBB: " << *MBB; + + MachineFunction *MF = MBB->getParent(); + // drop all successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end()-1); + + // If there are any labels in the basic block, unregister them from + // MachineModuleInfo. + if (MMI && !MBB->empty()) { + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->isLabel()) + // The label ID # is always operand #0, an immediate. + MMI->InvalidateLabel(I->getOperand(0).getImm()); + } + } + + // Remove the block. + MF->erase(MBB); +} + +/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def +/// followed by terminators, and if the implicitly defined registers are not +/// used by the terminators, remove those implicit_def's. e.g. +/// BB1: +/// r0 = implicit_def +/// r1 = implicit_def +/// br +/// This block can be optimized away later if the implicit instructions are +/// removed. +bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { + SmallSet<unsigned, 4> ImpDefRegs; + MachineBasicBlock::iterator I = MBB->begin(); + while (I != MBB->end()) { + if (I->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) + break; + unsigned Reg = I->getOperand(0).getReg(); + ImpDefRegs.insert(Reg); + for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + ImpDefRegs.insert(SubReg); + ++I; + } + if (ImpDefRegs.empty()) + return false; + + MachineBasicBlock::iterator FirstTerm = I; + while (I != MBB->end()) { + if (!TII->isUnpredicatedTerminator(I)) + return false; + // See if it uses any of the implicitly defined registers. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (ImpDefRegs.count(Reg)) + return false; + } + ++I; + } + + I = MBB->begin(); + while (I != FirstTerm) { + MachineInstr *ImpDefMI = &*I; + ++I; + MBB->erase(ImpDefMI); + } + + return true; +} + +bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + if (!TII) return false; + + RegInfo = MF.getTarget().getRegisterInfo(); + + // Fix CFG. The later algorithms expect it to be right. + bool EverMadeChange = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { + MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true)) + EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); + EverMadeChange |= OptimizeImpDefsBlock(MBB); + } + + RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; + + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + + bool MadeChangeThisIteration = true; + while (MadeChangeThisIteration) { + MadeChangeThisIteration = false; + MadeChangeThisIteration |= TailMergeBlocks(MF); + MadeChangeThisIteration |= OptimizeBranches(MF); + EverMadeChange |= MadeChangeThisIteration; + } + + // See if any jump tables have become mergable or dead as the code generator + // did its thing. + MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables(); + if (!JTs.empty()) { + // Figure out how these jump tables should be merged. + std::vector<unsigned> JTMapping; + JTMapping.reserve(JTs.size()); + + // We always keep the 0th jump table. + JTMapping.push_back(0); + + // Scan the jump tables, seeing if there are any duplicates. Note that this + // is N^2, which should be fixed someday. + for (unsigned i = 1, e = JTs.size(); i != e; ++i) + JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); + + // If a jump table was merge with another one, walk the function rewriting + // references to jump tables to reference the new JT ID's. Keep track of + // whether we see a jump table idx, if not, we can delete the JT. + BitVector JTIsLive(JTs.size()); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); + BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + MachineOperand &Op = I->getOperand(op); + if (!Op.isJTI()) continue; + unsigned NewIdx = JTMapping[Op.getIndex()]; + Op.setIndex(NewIdx); + + // Remember that this JT is live. + JTIsLive.set(NewIdx); + } + } + + // Finally, remove dead jump tables. This happens either because the + // indirect jump was unreachable (and thus deleted) or because the jump + // table was merged with some other one. + for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) + if (!JTIsLive.test(i)) { + JTI->RemoveJumpTable(i); + EverMadeChange = true; + } + } + + delete RS; + return EverMadeChange; +} + +//===----------------------------------------------------------------------===// +// Tail Merging of Blocks +//===----------------------------------------------------------------------===// + +/// HashMachineInstr - Compute a hash value for MI and its operands. +static unsigned HashMachineInstr(const MachineInstr *MI) { + unsigned Hash = MI->getOpcode(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &Op = MI->getOperand(i); + + // Merge in bits from the operand if easy. + unsigned OperandHash = 0; + switch (Op.getType()) { + case MachineOperand::MO_Register: OperandHash = Op.getReg(); break; + case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break; + case MachineOperand::MO_MachineBasicBlock: + OperandHash = Op.getMBB()->getNumber(); + break; + case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + OperandHash = Op.getIndex(); + break; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + // Global address / external symbol are too hard, don't bother, but do + // pull in the offset. + OperandHash = Op.getOffset(); + break; + default: break; + } + + Hash += ((OperandHash << 3) | Op.getType()) << (i&31); + } + return Hash; +} + +/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks +/// with no successors, we hash two instructions, because cross-jumping +/// only saves code when at least two instructions are removed (since a +/// branch must be inserted). For blocks with a successor, one of the +/// two blocks to be tail-merged will end with a branch already, so +/// it gains to cross-jump even for one instruction. + +static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, + unsigned minCommonTailLength) { + MachineBasicBlock::const_iterator I = MBB->end(); + if (I == MBB->begin()) + return 0; // Empty MBB. + + --I; + unsigned Hash = HashMachineInstr(I); + + if (I == MBB->begin() || minCommonTailLength == 1) + return Hash; // Single instr MBB. + + --I; + // Hash in the second-to-last instruction. + Hash ^= HashMachineInstr(I) << 2; + return Hash; +} + +/// ComputeCommonTailLength - Given two machine basic blocks, compute the number +/// of instructions they actually have in common together at their end. Return +/// iterators for the first shared instruction in each block. +static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2, + MachineBasicBlock::iterator &I1, + MachineBasicBlock::iterator &I2) { + I1 = MBB1->end(); + I2 = MBB2->end(); + + unsigned TailLen = 0; + while (I1 != MBB1->begin() && I2 != MBB2->begin()) { + --I1; --I2; + if (!I1->isIdenticalTo(I2) || + // FIXME: This check is dubious. It's used to get around a problem where + // people incorrectly expect inline asm directives to remain in the same + // relative order. This is untenable because normal compiler + // optimizations (like this one) may reorder and/or merge these + // directives. + I1->getOpcode() == TargetInstrInfo::INLINEASM) { + ++I1; ++I2; + break; + } + ++TailLen; + } + return TailLen; +} + +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. This +/// returns true if OldInst's block is modified, false if NewDest is modified. +void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest) { + MachineBasicBlock *OldBB = OldInst->getParent(); + + // Remove all the old successors of OldBB from the CFG. + while (!OldBB->succ_empty()) + OldBB->removeSuccessor(OldBB->succ_begin()); + + // Remove all the dead instructions from the end of OldBB. + OldBB->erase(OldInst, OldBB->end()); + + // If OldBB isn't immediately before OldBB, insert a branch to it. + if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest)) + TII->InsertBranch(*OldBB, NewDest, 0, SmallVector<MachineOperand, 0>()); + OldBB->addSuccessor(NewDest); + ++NumTailMerge; +} + +/// SplitMBBAt - Given a machine basic block and an iterator into it, split the +/// MBB so that the part before the iterator falls into the part starting at the +/// iterator. This returns the new MBB. +MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1) { + MachineFunction &MF = *CurMBB.getParent(); + + // Create the fall-through block. + MachineFunction::iterator MBBI = &CurMBB; + MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock()); + CurMBB.getParent()->insert(++MBBI, NewMBB); + + // Move all the successors of this block to the specified block. + NewMBB->transferSuccessors(&CurMBB); + + // Add an edge from CurMBB to NewMBB for the fall-through. + CurMBB.addSuccessor(NewMBB); + + // Splice the code over. + NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); + + // For targets that use the register scavenger, we must maintain LiveIns. + if (RS) { + RS->enterBasicBlock(&CurMBB); + if (!CurMBB.empty()) + RS->forward(prior(CurMBB.end())); + BitVector RegsLiveAtExit(RegInfo->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (unsigned int i=0, e=RegInfo->getNumRegs(); i!=e; i++) + if (RegsLiveAtExit[i]) + NewMBB->addLiveIn(i); + } + + return NewMBB; +} + +/// EstimateRuntime - Make a rough estimate for how long it will take to run +/// the specified code. +static unsigned EstimateRuntime(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + unsigned Time = 0; + for (; I != E; ++I) { + const TargetInstrDesc &TID = I->getDesc(); + if (TID.isCall()) + Time += 10; + else if (TID.mayLoad() || TID.mayStore()) + Time += 2; + else + ++Time; + } + return Time; +} + +// CurMBB needs to add an unconditional branch to SuccMBB (we removed these +// branches temporarily for tail merging). In the case where CurMBB ends +// with a conditional branch to the next block, optimize by reversing the +// test and conditionally branching to SuccMBB instead. + +static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB, + const TargetInstrInfo *TII) { + MachineFunction *MF = CurMBB->getParent(); + MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB)); + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (I != MF->end() && + !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { + MachineBasicBlock *NextBB = I; + if (TBB == NextBB && !Cond.empty() && !FBB) { + if (!TII->ReverseBranchCondition(Cond)) { + TII->RemoveBranch(*CurMBB); + TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond); + return; + } + } + } + TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>()); +} + +static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p, + const std::pair<unsigned,MachineBasicBlock*> &q) { + if (p.first < q.first) + return true; + else if (p.first > q.first) + return false; + else if (p.second->getNumber() < q.second->getNumber()) + return true; + else if (p.second->getNumber() > q.second->getNumber()) + return false; + else { + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. +#ifndef _GLIBCXX_DEBUG + assert(0 && "Predecessor appears twice"); +#endif + return false; + } +} + +/// ComputeSameTails - Look through all the blocks in MergePotentials that have +/// hash CurHash (guaranteed to match the last element). Build the vector +/// SameTails of all those that have the (same) largest number of instructions +/// in common of any pair of these blocks. SameTails entries contain an +/// iterator into MergePotentials (from which the MachineBasicBlock can be +/// found) and a MachineBasicBlock::iterator into that MBB indicating the +/// instruction where the matching code sequence begins. +/// Order of elements in SameTails is the reverse of the order in which +/// those blocks appear in MergePotentials (where they are not necessarily +/// consecutive). +unsigned BranchFolder::ComputeSameTails(unsigned CurHash, + unsigned minCommonTailLength) { + unsigned maxCommonTailLength = 0U; + SameTails.clear(); + MachineBasicBlock::iterator TrialBBI1, TrialBBI2; + MPIterator HighestMPIter = prior(MergePotentials.end()); + for (MPIterator CurMPIter = prior(MergePotentials.end()), + B = MergePotentials.begin(); + CurMPIter!=B && CurMPIter->first==CurHash; + --CurMPIter) { + for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) { + unsigned CommonTailLen = ComputeCommonTailLength( + CurMPIter->second, + I->second, + TrialBBI1, TrialBBI2); + // If we will have to split a block, there should be at least + // minCommonTailLength instructions in common; if not, at worst + // we will be replacing a fallthrough into the common tail with a + // branch, which at worst breaks even with falling through into + // the duplicated common tail, so 1 instruction in common is enough. + // We will always pick a block we do not have to split as the common + // tail if there is one. + // (Empty blocks will get forwarded and need not be considered.) + if (CommonTailLen >= minCommonTailLength || + (CommonTailLen > 0 && + (TrialBBI1==CurMPIter->second->begin() || + TrialBBI2==I->second->begin()))) { + if (CommonTailLen > maxCommonTailLength) { + SameTails.clear(); + maxCommonTailLength = CommonTailLen; + HighestMPIter = CurMPIter; + SameTails.push_back(std::make_pair(CurMPIter, TrialBBI1)); + } + if (HighestMPIter == CurMPIter && + CommonTailLen == maxCommonTailLength) + SameTails.push_back(std::make_pair(I, TrialBBI2)); + } + if (I==B) + break; + } + } + return maxCommonTailLength; +} + +/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from +/// MergePotentials, restoring branches at ends of blocks as appropriate. +void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, + MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB) { + MPIterator CurMPIter, B; + for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); + CurMPIter->first==CurHash; + --CurMPIter) { + // Put the unconditional branch back, if we need one. + MachineBasicBlock *CurMBB = CurMPIter->second; + if (SuccBB && CurMBB != PredBB) + FixTail(CurMBB, SuccBB, TII); + if (CurMPIter==B) + break; + } + if (CurMPIter->first!=CurHash) + CurMPIter++; + MergePotentials.erase(CurMPIter, MergePotentials.end()); +} + +/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist +/// only of the common tail. Create a block that does by splitting one. +unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength) { + unsigned i, commonTailIndex; + unsigned TimeEstimate = ~0U; + for (i=0, commonTailIndex=0; i<SameTails.size(); i++) { + // Use PredBB if possible; that doesn't require a new branch. + if (SameTails[i].first->second==PredBB) { + commonTailIndex = i; + break; + } + // Otherwise, make a (fairly bogus) choice based on estimate of + // how long it will take the various blocks to execute. + unsigned t = EstimateRuntime(SameTails[i].first->second->begin(), + SameTails[i].second); + if (t<=TimeEstimate) { + TimeEstimate = t; + commonTailIndex = i; + } + } + + MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second; + MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; + + DOUT << "\nSplitting " << MBB->getNumber() << ", size " << + maxCommonTailLength; + + MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + SameTails[commonTailIndex].first->second = newMBB; + SameTails[commonTailIndex].second = newMBB->begin(); + // If we split PredBB, newMBB is the new predecessor. + if (PredBB==MBB) + PredBB = newMBB; + + return commonTailIndex; +} + +// See if any of the blocks in MergePotentials (which all have a common single +// successor, or all have no successor) can be tail-merged. If there is a +// successor, any blocks in MergePotentials that are not tail-merged and +// are not immediately before Succ must have an unconditional branch to +// Succ added (but the predecessor/successor lists need no adjustment). +// The lone predecessor of Succ that falls through into Succ, +// if any, is given in PredBB. + +bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, + MachineBasicBlock* PredBB) { + // It doesn't make sense to save a single instruction since tail merging + // will add a jump. + // FIXME: Ask the target to provide the threshold? + unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1; + MadeChange = false; + + DOUT << "\nTryMergeBlocks " << MergePotentials.size() << '\n'; + + // Sort by hash value so that blocks with identical end sequences sort + // together. + std::stable_sort(MergePotentials.begin(), MergePotentials.end(),MergeCompare); + + // Walk through equivalence sets looking for actual exact matches. + while (MergePotentials.size() > 1) { + unsigned CurHash = prior(MergePotentials.end())->first; + + // Build SameTails, identifying the set of blocks with this hash code + // and with the maximum number of instructions in common. + unsigned maxCommonTailLength = ComputeSameTails(CurHash, + minCommonTailLength); + + // If we didn't find any pair that has at least minCommonTailLength + // instructions in common, remove all blocks with this hash code and retry. + if (SameTails.empty()) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } + + // If one of the blocks is the entire common tail (and not the entry + // block, which we can't jump to), we can treat all blocks with this same + // tail at once. Use PredBB if that is one of the possibilities, as that + // will not introduce any extra branches. + MachineBasicBlock *EntryBB = MergePotentials.begin()->second-> + getParent()->begin(); + unsigned int commonTailIndex, i; + for (commonTailIndex=SameTails.size(), i=0; i<SameTails.size(); i++) { + MachineBasicBlock *MBB = SameTails[i].first->second; + if (MBB->begin() == SameTails[i].second && MBB != EntryBB) { + commonTailIndex = i; + if (MBB==PredBB) + break; + } + } + + if (commonTailIndex==SameTails.size()) { + // None of the blocks consist entirely of the common tail. + // Split a block so that one does. + commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength); + } + + MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; + // MBB is common tail. Adjust all other BB's to jump to this one. + // Traversal must be forwards so erases work. + DOUT << "\nUsing common tail " << MBB->getNumber() << " for "; + for (unsigned int i=0; i<SameTails.size(); ++i) { + if (commonTailIndex==i) + continue; + DOUT << SameTails[i].first->second->getNumber() << ","; + // Hack the end off BB i, making it jump to BB commonTailIndex instead. + ReplaceTailWithBranchTo(SameTails[i].second, MBB); + // BB i is no longer a predecessor of SuccBB; remove it from the worklist. + MergePotentials.erase(SameTails[i].first); + } + DOUT << "\n"; + // We leave commonTailIndex in the worklist in case there are other blocks + // that match it with a smaller number of instructions. + MadeChange = true; + } + return MadeChange; +} + +bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { + + if (!EnableTailMerge) return false; + + MadeChange = false; + + // First find blocks with no successors. + MergePotentials.clear(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + if (I->succ_empty()) + MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I)); + } + // See if we can do any tail merging on those. + if (MergePotentials.size() < TailMergeThreshold && + MergePotentials.size() >= 2) + MadeChange |= TryMergeBlocks(NULL, NULL); + + // Look at blocks (IBB) with multiple predecessors (PBB). + // We change each predecessor to a canonical form, by + // (1) temporarily removing any unconditional branch from the predecessor + // to IBB, and + // (2) alter conditional branches so they branch to the other block + // not IBB; this may require adding back an unconditional branch to IBB + // later, where there wasn't one coming in. E.g. + // Bcc IBB + // fallthrough to QBB + // here becomes + // Bncc QBB + // with a conceptual B to IBB after that, which never actually exists. + // With those changes, we see whether the predecessors' tails match, + // and merge them if so. We change things out of canonical form and + // back to the way they were later in the process. (OptimizeBranches + // would undo some of this, but we can't use it, because we'd get into + // a compile-time infinite loop repeatedly doing and undoing the same + // transformations.) + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { + MachineBasicBlock *IBB = I; + MachineBasicBlock *PredBB = prior(I); + MergePotentials.clear(); + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + E2 = I->pred_end(); + P != E2; ++P) { + MachineBasicBlock* PBB = *P; + // Skip blocks that loop to themselves, can't tail merge these. + if (PBB==IBB) + continue; + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { + // Failing case: IBB is the target of a cbr, and + // we cannot reverse the branch. + SmallVector<MachineOperand, 4> NewCond(Cond); + if (!Cond.empty() && TBB==IBB) { + if (TII->ReverseBranchCondition(NewCond)) + continue; + // This is the QBB case described above + if (!FBB) + FBB = next(MachineFunction::iterator(PBB)); + } + // Failing case: the only way IBB can be reached from PBB is via + // exception handling. Happens for landing pads. Would be nice + // to have a bit in the edge so we didn't have to do all this. + if (IBB->isLandingPad()) { + MachineFunction::iterator IP = PBB; IP++; + MachineBasicBlock* PredNextBB = NULL; + if (IP!=MF.end()) + PredNextBB = IP; + if (TBB==NULL) { + if (IBB!=PredNextBB) // fallthrough + continue; + } else if (FBB) { + if (TBB!=IBB && FBB!=IBB) // cbr then ubr + continue; + } else if (Cond.empty()) { + if (TBB!=IBB) // ubr + continue; + } else { + if (TBB!=IBB && IBB!=PredNextBB) // cbr + continue; + } + } + // Remove the unconditional branch at the end, if any. + if (TBB && (Cond.empty() || FBB)) { + TII->RemoveBranch(*PBB); + if (!Cond.empty()) + // reinsert conditional branch only, for now + TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond); + } + MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P)); + } + } + if (MergePotentials.size() >= 2) + MadeChange |= TryMergeBlocks(I, PredBB); + // Reinsert an unconditional branch if needed. + // The 1 below can occur as a result of removing blocks in TryMergeBlocks. + PredBB = prior(I); // this may have been changed in TryMergeBlocks + if (MergePotentials.size()==1 && + MergePotentials.begin()->second != PredBB) + FixTail(MergePotentials.begin()->second, I, TII); + } + } + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Branch Optimization +//===----------------------------------------------------------------------===// + +bool BranchFolder::OptimizeBranches(MachineFunction &MF) { + MadeChange = false; + + // Make sure blocks are numbered in order + MF.RenumberBlocks(); + + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + OptimizeBlock(MBB); + + // If it is dead, remove it. + if (MBB->pred_empty()) { + RemoveDeadBlock(MBB); + MadeChange = true; + ++NumDeadBlocks; + } + } + return MadeChange; +} + + +/// CanFallThrough - Return true if the specified block (with the specified +/// branch condition) can implicitly transfer control to the block after it by +/// falling off the end of it. This should return false if it can reach the +/// block after it, but it uses an explicit branch to do so (e.g. a table jump). +/// +/// True is a conservative answer. +/// +bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB, + bool BranchUnAnalyzable, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond) { + MachineFunction::iterator Fallthrough = CurBB; + ++Fallthrough; + // If FallthroughBlock is off the end of the function, it can't fall through. + if (Fallthrough == CurBB->getParent()->end()) + return false; + + // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible. + if (!CurBB->isSuccessor(Fallthrough)) + return false; + + // If we couldn't analyze the branch, assume it could fall through. + if (BranchUnAnalyzable) return true; + + // If there is no branch, control always falls through. + if (TBB == 0) return true; + + // If there is some explicit branch to the fallthrough block, it can obviously + // reach, even though the branch should get folded to fall through implicitly. + if (MachineFunction::iterator(TBB) == Fallthrough || + MachineFunction::iterator(FBB) == Fallthrough) + return true; + + // If it's an unconditional branch to some block not the fall through, it + // doesn't fall through. + if (Cond.empty()) return false; + + // Otherwise, if it is conditional and has no explicit false block, it falls + // through. + return FBB == 0; +} + +/// CanFallThrough - Return true if the specified can implicitly transfer +/// control to the block after it by falling off the end of it. This should +/// return false if it can reach the block after it, but it uses an explicit +/// branch to do so (e.g. a table jump). +/// +/// True is a conservative answer. +/// +bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) { + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond, true); + return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond); +} + +/// IsBetterFallthrough - Return true if it would be clearly better to +/// fall-through to MBB1 than to fall through into MBB2. This has to return +/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will +/// result in infinite loops. +static bool IsBetterFallthrough(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2) { + // Right now, we use a simple heuristic. If MBB2 ends with a call, and + // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to + // optimize branches that branch to either a return block or an assert block + // into a fallthrough to the return. + if (MBB1->empty() || MBB2->empty()) return false; + + // If there is a clear successor ordering we make sure that one block + // will fall through to the next + if (MBB1->isSuccessor(MBB2)) return true; + if (MBB2->isSuccessor(MBB1)) return false; + + MachineInstr *MBB1I = --MBB1->end(); + MachineInstr *MBB2I = --MBB2->end(); + return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); +} + +/// OptimizeBlock - Analyze and optimize control flow related to the specified +/// block. This is never called on the entry block. +void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { + MachineFunction::iterator FallThrough = MBB; + ++FallThrough; + + // If this block is empty, make everyone use its fall-through, not the block + // explicitly. Landing pads should not do this since the landing-pad table + // points to this block. + if (MBB->empty() && !MBB->isLandingPad()) { + // Dead block? Leave for cleanup later. + if (MBB->pred_empty()) return; + + if (FallThrough == MBB->getParent()->end()) { + // TODO: Simplify preds to not branch here if possible! + } else { + // Rewrite all predecessors of the old block to go to the fallthrough + // instead. + while (!MBB->pred_empty()) { + MachineBasicBlock *Pred = *(MBB->pred_end()-1); + Pred->ReplaceUsesOfBlockWith(MBB, FallThrough); + } + + // If MBB was the target of a jump table, update jump tables to go to the + // fallthrough instead. + MBB->getParent()->getJumpTableInfo()-> + ReplaceMBBInJumpTables(MBB, FallThrough); + MadeChange = true; + } + return; + } + + // Check to see if we can simplify the terminator of the block before this + // one. + MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB)); + + MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + SmallVector<MachineOperand, 4> PriorCond; + bool PriorUnAnalyzable = + TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); + if (!PriorUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, + !PriorCond.empty()); + + // If the previous branch is conditional and both conditions go to the same + // destination, remove the branch, replacing it with an unconditional one or + // a fall-through. + if (PriorTBB && PriorTBB == PriorFBB) { + TII->RemoveBranch(PrevBB); + PriorCond.clear(); + if (PriorTBB != MBB) + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + + // If the previous branch *only* branches to *this* block (conditional or + // not) remove the branch. + if (PriorTBB == MBB && PriorFBB == 0) { + TII->RemoveBranch(PrevBB); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + + // If the prior block branches somewhere else on the condition and here if + // the condition is false, remove the uncond second branch. + if (PriorFBB == MBB) { + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + + // If the prior block branches here on true and somewhere else on false, and + // if the branch condition is reversible, reverse the branch to create a + // fall-through. + if (PriorTBB == MBB) { + SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + } + + // If this block doesn't fall through (e.g. it ends with an uncond branch or + // has no successors) and if the pred falls through into this block, and if + // it would otherwise fall through into the block after this, move this + // block to the end of the function. + // + // We consider it more likely that execution will stay in the function (e.g. + // due to loops) than it is to exit it. This asserts in loops etc, moving + // the assert condition out of the loop body. + if (!PriorCond.empty() && PriorFBB == 0 && + MachineFunction::iterator(PriorTBB) == FallThrough && + !CanFallThrough(MBB)) { + bool DoTransform = true; + + // We have to be careful that the succs of PredBB aren't both no-successor + // blocks. If neither have successors and if PredBB is the second from + // last block in the function, we'd just keep swapping the two blocks for + // last. Only do the swap if one is clearly better to fall through than + // the other. + if (FallThrough == --MBB->getParent()->end() && + !IsBetterFallthrough(PriorTBB, MBB)) + DoTransform = false; + + // We don't want to do this transformation if we have control flow like: + // br cond BB2 + // BB1: + // .. + // jmp BBX + // BB2: + // .. + // ret + // + // In this case, we could actually be moving the return block *into* a + // loop! + if (DoTransform && !MBB->succ_empty() && + (!CanFallThrough(PriorTBB) || PriorTBB->empty())) + DoTransform = false; + + + if (DoTransform) { + // Reverse the branch so we will fall through on the previous true cond. + SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + DOUT << "\nMoving MBB: " << *MBB; + DOUT << "To make fallthrough to: " << *PriorTBB << "\n"; + + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond); + + // Move this block to the end of the function. + MBB->moveAfter(--MBB->getParent()->end()); + MadeChange = true; + ++NumBranchOpts; + return; + } + } + } + } + + // Analyze the branch in the current block. + MachineBasicBlock *CurTBB = 0, *CurFBB = 0; + SmallVector<MachineOperand, 4> CurCond; + bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); + if (!CurUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); + + // If this is a two-way branch, and the FBB branches to this block, reverse + // the condition so the single-basic-block loop is faster. Instead of: + // Loop: xxx; jcc Out; jmp Loop + // we want: + // Loop: xxx; jncc Loop; jmp Out + if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { + SmallVector<MachineOperand, 4> NewCond(CurCond); + if (!TII->ReverseBranchCondition(NewCond)) { + TII->RemoveBranch(*MBB); + TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + } + + + // If this branch is the only thing in its block, see if we can forward + // other blocks across it. + if (CurTBB && CurCond.empty() && CurFBB == 0 && + MBB->begin()->getDesc().isBranch() && CurTBB != MBB) { + // This block may contain just an unconditional branch. Because there can + // be 'non-branch terminators' in the block, try removing the branch and + // then seeing if the block is empty. + TII->RemoveBranch(*MBB); + + // If this block is just an unconditional branch to CurTBB, we can + // usually completely eliminate the block. The only case we cannot + // completely eliminate the block is when the block before this one + // falls through into MBB and we can't understand the prior block's branch + // condition. + if (MBB->empty()) { + bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB); + if (PredHasNoFallThrough || !PriorUnAnalyzable || + !PrevBB.isSuccessor(MBB)) { + // If the prior block falls through into us, turn it into an + // explicit branch to us to make updates simpler. + if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && + PriorTBB != MBB && PriorFBB != MBB) { + if (PriorTBB == 0) { + assert(PriorCond.empty() && PriorFBB == 0 && + "Bad branch analysis"); + PriorTBB = MBB; + } else { + assert(PriorFBB == 0 && "Machine CFG out of date!"); + PriorFBB = MBB; + } + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond); + } + + // Iterate through all the predecessors, revectoring each in-turn. + size_t PI = 0; + bool DidChange = false; + bool HasBranchToSelf = false; + while(PI != MBB->pred_size()) { + MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI); + if (PMBB == MBB) { + // If this block has an uncond branch to itself, leave it. + ++PI; + HasBranchToSelf = true; + } else { + DidChange = true; + PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB); + // If this change resulted in PMBB ending in a conditional + // branch where both conditions go to the same destination, + // change this to an unconditional branch (and fix the CFG). + MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0; + SmallVector<MachineOperand, 4> NewCurCond; + bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, + NewCurFBB, NewCurCond, true); + if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { + TII->RemoveBranch(*PMBB); + NewCurCond.clear(); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond); + MadeChange = true; + ++NumBranchOpts; + PMBB->CorrectExtraCFGEdges(NewCurTBB, NewCurFBB, false); + } + } + } + + // Change any jumptables to go to the new MBB. + MBB->getParent()->getJumpTableInfo()-> + ReplaceMBBInJumpTables(MBB, CurTBB); + if (DidChange) { + ++NumBranchOpts; + MadeChange = true; + if (!HasBranchToSelf) return; + } + } + } + + // Add the branch back if the block is more than just an uncond branch. + TII->InsertBranch(*MBB, CurTBB, 0, CurCond); + } + } + + // If the prior block doesn't fall through into this block, and if this + // block doesn't fall through into some other block, see if we can find a + // place to move this block where a fall-through will happen. + if (!CanFallThrough(&PrevBB, PriorUnAnalyzable, + PriorTBB, PriorFBB, PriorCond)) { + // Now we know that there was no fall-through into this block, check to + // see if it has a fall-through into its successor. + bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, + CurCond); + + if (!MBB->isLandingPad()) { + // Check all the predecessors of this block. If one of them has no fall + // throughs, move this block right after it. + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + E = MBB->pred_end(); PI != E; ++PI) { + // Analyze the branch at the end of the pred. + MachineBasicBlock *PredBB = *PI; + MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; + if (PredBB != MBB && !CanFallThrough(PredBB) + && (!CurFallsThru || !CurTBB || !CurFBB) + && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { + // If the current block doesn't fall through, just move it. + // If the current block can fall through and does not end with a + // conditional branch, we need to append an unconditional jump to + // the (current) next block. To avoid a possible compile-time + // infinite loop, move blocks only backward in this case. + // Also, if there are already 2 branches here, we cannot add a third; + // this means we have the case + // Bcc next + // B elsewhere + // next: + if (CurFallsThru) { + MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); + CurCond.clear(); + TII->InsertBranch(*MBB, NextBB, 0, CurCond); + } + MBB->moveAfter(PredBB); + MadeChange = true; + return OptimizeBlock(MBB); + } + } + } + + if (!CurFallsThru) { + // Check all successors to see if we can move this block before it. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++SI) { + // Analyze the branch at the end of the block before the succ. + MachineBasicBlock *SuccBB = *SI; + MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev; + std::vector<MachineOperand> SuccPrevCond; + + // If this block doesn't already fall-through to that successor, and if + // the succ doesn't already have a block that can fall through into it, + // and if the successor isn't an EH destination, we can arrange for the + // fallthrough to happen. + if (SuccBB != MBB && !CanFallThrough(SuccPrev) && + !SuccBB->isLandingPad()) { + MBB->moveBefore(SuccBB); + MadeChange = true; + return OptimizeBlock(MBB); + } + } + + // Okay, there is no really great place to put this block. If, however, + // the block before this one would be a fall-through if this block were + // removed, move this block to the end of the function. + if (FallThrough != MBB->getParent()->end() && + PrevBB.isSuccessor(FallThrough)) { + MBB->moveAfter(--MBB->getParent()->end()); + MadeChange = true; + return; + } + } + } +} diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt new file mode 100644 index 0000000..ca4b31c --- /dev/null +++ b/lib/CodeGen/CMakeLists.txt @@ -0,0 +1,62 @@ +add_llvm_library(LLVMCodeGen + BranchFolding.cpp + CodePlacementOpt.cpp + DeadMachineInstructionElim.cpp + DwarfEHPrepare.cpp + ELFWriter.cpp + GCMetadata.cpp + GCMetadataPrinter.cpp + GCStrategy.cpp + IfConversion.cpp + IntrinsicLowering.cpp + LLVMTargetMachine.cpp + LatencyPriorityQueue.cpp + LiveInterval.cpp + LiveIntervalAnalysis.cpp + LiveStackAnalysis.cpp + LiveVariables.cpp + LowerSubregs.cpp + MachOWriter.cpp + MachineBasicBlock.cpp + MachineDominators.cpp + MachineFunction.cpp + MachineInstr.cpp + MachineLICM.cpp + MachineLoopInfo.cpp + MachineModuleInfo.cpp + MachinePassRegistry.cpp + MachineRegisterInfo.cpp + MachineSink.cpp + MachineVerifier.cpp + OcamlGC.cpp + PBQP.cpp + PHIElimination.cpp + Passes.cpp + PostRASchedulerList.cpp + PreAllocSplitting.cpp + PrologEpilogInserter.cpp + PseudoSourceValue.cpp + RegAllocBigBlock.cpp + RegAllocLinearScan.cpp + RegAllocLocal.cpp + RegAllocPBQP.cpp + RegAllocSimple.cpp + RegisterCoalescer.cpp + RegisterScavenging.cpp + ScheduleDAG.cpp + ScheduleDAGEmit.cpp + ScheduleDAGInstrs.cpp + ScheduleDAGPrinter.cpp + ShadowStackGC.cpp + ShrinkWrapping.cpp + SimpleRegisterCoalescing.cpp + Spiller.cpp + StackProtector.cpp + StackSlotColoring.cpp + StrongPHIElimination.cpp + TargetInstrInfoImpl.cpp + TwoAddressInstructionPass.cpp + UnreachableBlockElim.cpp + VirtRegMap.cpp + VirtRegRewriter.cpp + ) diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp new file mode 100644 index 0000000..383098e --- /dev/null +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -0,0 +1,358 @@ +//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the pass that optimize code placement and align loop +// headers to target specific alignment boundary. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "code-placement" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumHeaderAligned, "Number of loop header aligned"); +STATISTIC(NumIntraElim, "Number of intra loop branches eliminated"); +STATISTIC(NumIntraMoved, "Number of intra loop branches moved"); + +namespace { + class CodePlacementOpt : public MachineFunctionPass { + const MachineLoopInfo *MLI; + const TargetInstrInfo *TII; + const TargetLowering *TLI; + + /// ChangedMBBs - BBs which are modified by OptimizeIntraLoopEdges. + SmallPtrSet<MachineBasicBlock*, 8> ChangedMBBs; + + /// UncondJmpMBBs - A list of BBs which are in loops and end with + /// unconditional branches. + SmallVector<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 4> + UncondJmpMBBs; + + /// LoopHeaders - A list of BBs which are loop headers. + SmallVector<MachineBasicBlock*, 4> LoopHeaders; + + public: + static char ID; + CodePlacementOpt() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { + return "Code Placement Optimizater"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + bool OptimizeIntraLoopEdges(); + bool HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L, + SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign); + bool AlignLoops(MachineFunction &MF); + }; + + char CodePlacementOpt::ID = 0; +} // end anonymous namespace + +FunctionPass *llvm::createCodePlacementOptPass() { + return new CodePlacementOpt(); +} + +/// OptimizeBackEdges - Place loop back edges to move unconditional branches +/// out of the loop. +/// +/// A: +/// ... +/// <fallthrough to B> +/// +/// B: --> loop header +/// ... +/// jcc <cond> C, [exit] +/// +/// C: +/// ... +/// jmp B +/// +/// ==> +/// +/// A: +/// ... +/// jmp B +/// +/// C: --> new loop header +/// ... +/// <fallthough to B> +/// +/// B: +/// ... +/// jcc <cond> C, [exit] +/// +bool CodePlacementOpt::OptimizeIntraLoopEdges() { + if (!TLI->shouldOptimizeCodePlacement()) + return false; + + bool Changed = false; + for (unsigned i = 0, e = UncondJmpMBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = UncondJmpMBBs[i].first; + MachineBasicBlock *SuccMBB = UncondJmpMBBs[i].second; + MachineLoop *L = MLI->getLoopFor(MBB); + assert(L && "BB is expected to be in a loop!"); + + if (ChangedMBBs.count(MBB)) { + // BB has been modified, re-analyze. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty()) + continue; + if (MLI->getLoopFor(TBB) != L || TBB->isLandingPad()) + continue; + SuccMBB = TBB; + } else { + assert(MLI->getLoopFor(SuccMBB) == L && + "Successor is not in the same loop!"); + } + + if (MBB->isLayoutSuccessor(SuccMBB)) { + // Successor is right after MBB, just eliminate the unconditional jmp. + // Can this happen? + TII->RemoveBranch(*MBB); + ChangedMBBs.insert(MBB); + ++NumIntraElim; + Changed = true; + continue; + } + + // Now check if the predecessor is fallthrough from any BB. If there is, + // that BB should be from outside the loop since edge will become a jmp. + bool OkToMove = true; + MachineBasicBlock *FtMBB = 0, *FtTBB = 0, *FtFBB = 0; + SmallVector<MachineOperand, 4> FtCond; + for (MachineBasicBlock::pred_iterator PI = SuccMBB->pred_begin(), + PE = SuccMBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *PredMBB = *PI; + if (PredMBB->isLayoutSuccessor(SuccMBB)) { + if (TII->AnalyzeBranch(*PredMBB, FtTBB, FtFBB, FtCond)) { + OkToMove = false; + break; + } + if (!FtTBB) + FtTBB = SuccMBB; + else if (!FtFBB) { + assert(FtFBB != SuccMBB && "Unexpected control flow!"); + FtFBB = SuccMBB; + } + + // A fallthrough. + FtMBB = PredMBB; + MachineLoop *PL = MLI->getLoopFor(PredMBB); + if (PL && (PL == L || PL->getLoopDepth() >= L->getLoopDepth())) + OkToMove = false; + + break; + } + } + + if (!OkToMove) + continue; + + // Is it profitable? If SuccMBB can fallthrough itself, that can be changed + // into a jmp. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*SuccMBB, TBB, FBB, Cond)) + continue; + if (!TBB && Cond.empty()) + TBB = next(MachineFunction::iterator(SuccMBB)); + else if (!FBB && !Cond.empty()) + FBB = next(MachineFunction::iterator(SuccMBB)); + + // This calculate the cost of the transformation. Also, it finds the *only* + // intra-loop edge if there is one. + int Cost = 0; + bool HasOneIntraSucc = true; + MachineBasicBlock *IntraSucc = 0; + for (MachineBasicBlock::succ_iterator SI = SuccMBB->succ_begin(), + SE = SuccMBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SSMBB = *SI; + if (MLI->getLoopFor(SSMBB) == L) { + if (!IntraSucc) + IntraSucc = SSMBB; + else + HasOneIntraSucc = false; + } + + if (SuccMBB->isLayoutSuccessor(SSMBB)) + // This will become a jmp. + ++Cost; + else if (MBB->isLayoutSuccessor(SSMBB)) { + // One of the successor will become the new fallthrough. + if (SSMBB == FBB) { + FBB = 0; + --Cost; + } else if (!FBB && SSMBB == TBB && Cond.empty()) { + TBB = 0; + --Cost; + } else if (!Cond.empty() && !TII->ReverseBranchCondition(Cond)) { + assert(SSMBB == TBB); + TBB = FBB; + FBB = 0; + --Cost; + } + } + } + if (Cost) + continue; + + // Now, let's move the successor to below the BB to eliminate the jmp. + SuccMBB->moveAfter(MBB); + TII->RemoveBranch(*MBB); + TII->RemoveBranch(*SuccMBB); + if (TBB) + TII->InsertBranch(*SuccMBB, TBB, FBB, Cond); + ChangedMBBs.insert(MBB); + ChangedMBBs.insert(SuccMBB); + if (FtMBB) { + TII->RemoveBranch(*FtMBB); + TII->InsertBranch(*FtMBB, FtTBB, FtFBB, FtCond); + ChangedMBBs.insert(FtMBB); + } + Changed = true; + + // If BB is the loop latch, we may have a new loop headr. + if (MBB == L->getLoopLatch()) { + assert(MLI->isLoopHeader(SuccMBB) && + "Only succ of loop latch is not the header?"); + if (HasOneIntraSucc && IntraSucc) + std::replace(LoopHeaders.begin(),LoopHeaders.end(), SuccMBB, IntraSucc); + } + } + + ++NumIntraMoved; + return Changed; +} + +/// HeaderShouldBeAligned - Return true if the specified loop header block +/// should be aligned. For now, we will not align it if all the predcessors +/// (i.e. loop back edges) are laid out above the header. FIXME: Do not +/// align small loops. +bool +CodePlacementOpt::HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L, + SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign) { + if (DoNotAlign.count(MBB)) + return false; + + bool BackEdgeBelow = false; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *PredMBB = *PI; + if (PredMBB == MBB || PredMBB->getNumber() > MBB->getNumber()) { + BackEdgeBelow = true; + break; + } + } + + if (!BackEdgeBelow) + return false; + + // Ok, we are going to align this loop header. If it's an inner loop, + // do not align its outer loop. + MachineBasicBlock *PreHeader = L->getLoopPreheader(); + if (PreHeader) { + MachineLoop *L = MLI->getLoopFor(PreHeader); + if (L) { + MachineBasicBlock *HeaderBlock = L->getHeader(); + HeaderBlock->setAlignment(0); + DoNotAlign.insert(HeaderBlock); + } + } + return true; +} + +/// AlignLoops - Align loop headers to target preferred alignments. +/// +bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { + const Function *F = MF.getFunction(); + if (F->hasFnAttr(Attribute::OptimizeForSize)) + return false; + + unsigned Align = TLI->getPrefLoopAlignment(); + if (!Align) + return false; // Don't care about loop alignment. + + // Make sure blocks are numbered in order + MF.RenumberBlocks(); + + bool Changed = false; + SmallPtrSet<MachineBasicBlock*, 4> DoNotAlign; + for (unsigned i = 0, e = LoopHeaders.size(); i != e; ++i) { + MachineBasicBlock *HeaderMBB = LoopHeaders[i]; + MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(HeaderMBB)); + MachineLoop *L = MLI->getLoopFor(HeaderMBB); + if (L == MLI->getLoopFor(PredMBB)) + // If previously BB is in the same loop, don't align this BB. We want + // to prevent adding noop's inside a loop. + continue; + if (HeaderShouldBeAligned(HeaderMBB, L, DoNotAlign)) { + HeaderMBB->setAlignment(Align); + Changed = true; + ++NumHeaderAligned; + } + } + + return Changed; +} + +bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) { + MLI = &getAnalysis<MachineLoopInfo>(); + if (MLI->empty()) + return false; // No loops. + + TLI = MF.getTarget().getTargetLowering(); + TII = MF.getTarget().getInstrInfo(); + + // Analyze the BBs first and keep track of loop headers and BBs that + // end with an unconditional jmp to another block in the same loop. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = I; + if (MBB->isLandingPad()) + continue; + MachineLoop *L = MLI->getLoopFor(MBB); + if (!L) + continue; + if (MLI->isLoopHeader(MBB)) + LoopHeaders.push_back(MBB); + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty()) + continue; + if (MLI->getLoopFor(TBB) == L && !TBB->isLandingPad()) + UncondJmpMBBs.push_back(std::make_pair(MBB, TBB)); + } + + bool Changed = OptimizeIntraLoopEdges(); + + Changed |= AlignLoops(MF); + + ChangedMBBs.clear(); + UncondJmpMBBs.clear(); + LoopHeaders.clear(); + + return Changed; +} diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp new file mode 100644 index 0000000..4832a5e --- /dev/null +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -0,0 +1,161 @@ +//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an extremely simple MachineInstr-level dead-code-elimination pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN DeadMachineInstructionElim : + public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &MF); + + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + BitVector LivePhysRegs; + + public: + static char ID; // Pass identification, replacement for typeid + DeadMachineInstructionElim() : MachineFunctionPass(&ID) {} + + private: + bool isDead(MachineInstr *MI) const; + }; +} +char DeadMachineInstructionElim::ID = 0; + +static RegisterPass<DeadMachineInstructionElim> +Y("dead-mi-elimination", + "Remove dead machine instructions"); + +FunctionPass *llvm::createDeadMachineInstructionElimPass() { + return new DeadMachineInstructionElim(); +} + +bool DeadMachineInstructionElim::isDead(MachineInstr *MI) const { + // Don't delete instructions with side effects. + bool SawStore = false; + if (!MI->isSafeToMove(TII, SawStore)) + return false; + + // Examine each operand. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg) ? + LivePhysRegs[Reg] : !MRI->use_empty(Reg)) { + // This def has a use. Don't delete the instruction! + return false; + } + } + } + + // If there are no defs with uses, the instruction is dead. + return true; +} + +bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { + bool AnyChanges = false; + MRI = &MF.getRegInfo(); + TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + + // Compute a bitvector to represent all non-allocatable physregs. + BitVector NonAllocatableRegs = TRI->getAllocatableSet(MF); + NonAllocatableRegs.flip(); + + // Loop over all instructions in all blocks, from bottom to top, so that it's + // more likely that chains of dependent but ultimately dead instructions will + // be cleaned up. + for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend(); + I != E; ++I) { + MachineBasicBlock *MBB = &*I; + + // Start out assuming that all non-allocatable registers are live + // out of this block. + LivePhysRegs = NonAllocatableRegs; + + // Also add any explicit live-out physregs for this block. + if (!MBB->empty() && MBB->back().getDesc().isReturn()) + for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), + LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { + unsigned Reg = *LOI; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + LivePhysRegs.set(Reg); + } + + // Now scan the instructions and delete dead ones, tracking physreg + // liveness as we go. + for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), + MIE = MBB->rend(); MII != MIE; ) { + MachineInstr *MI = &*MII; + + // If the instruction is dead, delete it! + if (isDead(MI)) { + DOUT << "DeadMachineInstructionElim: DELETING: " << *MI; + AnyChanges = true; + MI->eraseFromParent(); + MIE = MBB->rend(); + // MII is now pointing to the next instruction to process, + // so don't increment it. + continue; + } + + // Record the physreg defs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + LivePhysRegs.reset(Reg); + // Check the subreg set, not the alias set, because a def + // of a super-register may still be partially live after + // this def. + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) + LivePhysRegs.reset(*SubRegs); + } + } + } + // Record the physreg uses, after the defs, in case a physreg is + // both defined and used in the same instruction. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse()) { + unsigned Reg = MO.getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + LivePhysRegs.set(Reg); + for (const unsigned *AliasSet = TRI->getAliasSet(Reg); + *AliasSet; ++AliasSet) + LivePhysRegs.set(*AliasSet); + } + } + } + + // We didn't delete the current instruction, so increment MII to + // the next one. + ++MII; + } + } + + LivePhysRegs.clear(); + return AnyChanges; +} diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp new file mode 100644 index 0000000..720e3d1 --- /dev/null +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -0,0 +1,397 @@ +//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass mulches exception handling code into a form adapted to code +// generation. Required if using dwarf exception handling. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfehprepare" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +using namespace llvm; + +STATISTIC(NumLandingPadsSplit, "Number of landing pads split"); +STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered"); +STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved"); +STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced"); + +namespace { + class VISIBILITY_HIDDEN DwarfEHPrepare : public FunctionPass { + const TargetLowering *TLI; + bool CompileFast; + + // The eh.exception intrinsic. + Function *ExceptionValueIntrinsic; + + // _Unwind_Resume or the target equivalent. + Constant *RewindFunction; + + // Dominator info is used when turning stack temporaries into registers. + DominatorTree *DT; + DominanceFrontier *DF; + + // The function we are running on. + Function *F; + + // The landing pads for this function. + typedef SmallPtrSet<BasicBlock*, 8> BBSet; + BBSet LandingPads; + + // Stack temporary used to hold eh.exception values. + AllocaInst *ExceptionValueVar; + + bool NormalizeLandingPads(); + bool LowerUnwinds(); + bool MoveExceptionValueCalls(); + bool FinishStackTemporaries(); + bool PromoteStackTemporaries(); + + Instruction *CreateExceptionValueCall(BasicBlock *BB); + Instruction *CreateValueLoad(BasicBlock *BB); + + /// CreateReadOfExceptionValue - Return the result of the eh.exception + /// intrinsic by calling the intrinsic if in a landing pad, or loading + /// it from the exception value variable otherwise. + Instruction *CreateReadOfExceptionValue(BasicBlock *BB) { + return LandingPads.count(BB) ? + CreateExceptionValueCall(BB) : CreateValueLoad(BB); + } + + public: + static char ID; // Pass identification, replacement for typeid. + DwarfEHPrepare(const TargetLowering *tli, bool fast) : + FunctionPass(&ID), TLI(tli), CompileFast(fast), + ExceptionValueIntrinsic(0), RewindFunction(0) {} + + virtual bool runOnFunction(Function &Fn); + + // getAnalysisUsage - We need dominance frontiers for memory promotion. + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + if (!CompileFast) + AU.addRequired<DominatorTree>(); + AU.addPreserved<DominatorTree>(); + if (!CompileFast) + AU.addRequired<DominanceFrontier>(); + AU.addPreserved<DominanceFrontier>(); + } + + const char *getPassName() const { + return "Exception handling preparation"; + } + + }; +} // end anonymous namespace + +char DwarfEHPrepare::ID = 0; + +FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) { + return new DwarfEHPrepare(tli, fast); +} + +/// NormalizeLandingPads - Normalize and discover landing pads, noting them +/// in the LandingPads set. A landing pad is normal if the only CFG edges +/// that end at it are unwind edges from invoke instructions. +/// Abnormal landing pads are fixed up by redirecting all unwind edges to +/// a new basic block which falls through to the original. +bool DwarfEHPrepare::NormalizeLandingPads() { + bool Changed = false; + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + TerminatorInst *TI = I->getTerminator(); + if (!isa<InvokeInst>(TI)) + continue; + BasicBlock *LPad = TI->getSuccessor(1); + // Skip landing pads that have already been normalized. + if (LandingPads.count(LPad)) + continue; + + // Check that only invoke unwind edges end at the landing pad. + bool OnlyUnwoundTo = true; + for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); + PI != PE; ++PI) { + TerminatorInst *PT = (*PI)->getTerminator(); + if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) { + OnlyUnwoundTo = false; + break; + } + } + if (OnlyUnwoundTo) { + // Only unwind edges lead to the landing pad. Remember the landing pad. + LandingPads.insert(LPad); + continue; + } + + // At least one normal edge ends at the landing pad. Redirect the unwind + // edges to a new basic block which falls through into this one. + + // Create the new basic block. + BasicBlock *NewBB = BasicBlock::Create(LPad->getName() + "_unwind_edge"); + + // Insert it into the function right before the original landing pad. + LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); + + // Redirect unwind edges from the original landing pad to NewBB. + for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) { + TerminatorInst *PT = (*PI++)->getTerminator(); + if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad) + // Unwind to the new block. + PT->setSuccessor(1, NewBB); + } + + // If there are any PHI nodes in LPad, we need to update them so that they + // merge incoming values from NewBB instead. + for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) { + PHINode *PN = cast<PHINode>(II); + pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB); + + // Check to see if all of the values coming in via unwind edges are the + // same. If so, we don't need to create a new PHI node. + Value *InVal = PN->getIncomingValueForBlock(*PB); + for (pred_iterator PI = PB; PI != PE; ++PI) { + if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) { + InVal = 0; + break; + } + } + + if (InVal == 0) { + // Different unwind edges have different values. Create a new PHI node + // in NewBB. + PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind", + NewBB); + // Add an entry for each unwind edge, using the value from the old PHI. + for (pred_iterator PI = PB; PI != PE; ++PI) + NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); + + // Now use this new PHI as the common incoming value for NewBB in PN. + InVal = NewPN; + } + + // Revector exactly one entry in the PHI node to come from NewBB + // and delete all other entries that come from unwind edges. If + // there are both normal and unwind edges from the same predecessor, + // this leaves an entry for the normal edge. + for (pred_iterator PI = PB; PI != PE; ++PI) + PN->removeIncomingValue(*PI); + PN->addIncoming(InVal, NewBB); + } + + // Add a fallthrough from NewBB to the original landing pad. + BranchInst::Create(LPad, NewBB); + + // Now update DominatorTree and DominanceFrontier analysis information. + if (DT) + DT->splitBlock(NewBB); + if (DF) + DF->splitBlock(NewBB); + + // Remember the newly constructed landing pad. The original landing pad + // LPad is no longer a landing pad now that all unwind edges have been + // revectored to NewBB. + LandingPads.insert(NewBB); + ++NumLandingPadsSplit; + Changed = true; + } + + return Changed; +} + +/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume, +/// rethrowing any previously caught exception. This will crash horribly +/// at runtime if there is no such exception: using unwind to throw a new +/// exception is currently not supported. +bool DwarfEHPrepare::LowerUnwinds() { + bool Changed = false; + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + TerminatorInst *TI = I->getTerminator(); + if (!isa<UnwindInst>(TI)) + continue; + + // Replace the unwind instruction with a call to _Unwind_Resume (or the + // appropriate target equivalent) followed by an UnreachableInst. + + // Find the rewind function if we didn't already. + if (!RewindFunction) { + std::vector<const Type*> Params(1, PointerType::getUnqual(Type::Int8Ty)); + FunctionType *FTy = FunctionType::get(Type::VoidTy, Params, false); + const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); + RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); + } + + // Create the call... + CallInst::Create(RewindFunction, CreateReadOfExceptionValue(I), "", TI); + // ...followed by an UnreachableInst. + new UnreachableInst(TI); + + // Nuke the unwind instruction. + TI->eraseFromParent(); + ++NumUnwindsLowered; + Changed = true; + } + + return Changed; +} + +/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from +/// landing pads by replacing calls outside of landing pads with loads from a +/// stack temporary. Move eh.exception calls inside landing pads to the start +/// of the landing pad (optional, but may make things simpler for later passes). +bool DwarfEHPrepare::MoveExceptionValueCalls() { + // If the eh.exception intrinsic is not declared in the module then there is + // nothing to do. Speed up compilation by checking for this common case. + if (!ExceptionValueIntrinsic && + !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception))) + return false; + + bool Changed = false; + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) + if (CI->getIntrinsicID() == Intrinsic::eh_exception) { + if (!CI->use_empty()) { + Value *ExceptionValue = CreateReadOfExceptionValue(BB); + if (CI == ExceptionValue) { + // The call was at the start of a landing pad - leave it alone. + assert(LandingPads.count(BB) && + "Created eh.exception call outside landing pad!"); + continue; + } + CI->replaceAllUsesWith(ExceptionValue); + } + CI->eraseFromParent(); + ++NumExceptionValuesMoved; + Changed = true; + } + } + + return Changed; +} + +/// FinishStackTemporaries - If we introduced a stack variable to hold the +/// exception value then initialize it in each landing pad. +bool DwarfEHPrepare::FinishStackTemporaries() { + if (!ExceptionValueVar) + // Nothing to do. + return false; + + bool Changed = false; + + // Make sure that there is a store of the exception value at the start of + // each landing pad. + for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end(); + LI != LE; ++LI) { + Instruction *ExceptionValue = CreateReadOfExceptionValue(*LI); + Instruction *Store = new StoreInst(ExceptionValue, ExceptionValueVar); + Store->insertAfter(ExceptionValue); + Changed = true; + } + + return Changed; +} + +/// PromoteStackTemporaries - Turn any stack temporaries we introduced into +/// registers if possible. +bool DwarfEHPrepare::PromoteStackTemporaries() { + if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) { + // Turn the exception temporary into registers and phi nodes if possible. + std::vector<AllocaInst*> Allocas(1, ExceptionValueVar); + PromoteMemToReg(Allocas, *DT, *DF); + return true; + } + return false; +} + +/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at +/// the start of the basic block (unless there already is one, in which case +/// the existing call is returned). +Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) { + Instruction *Start = BB->getFirstNonPHI(); + // Is this a call to eh.exception? + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start)) + if (CI->getIntrinsicID() == Intrinsic::eh_exception) + // Reuse the existing call. + return Start; + + // Find the eh.exception intrinsic if we didn't already. + if (!ExceptionValueIntrinsic) + ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::eh_exception); + + // Create the call. + return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start); +} + +/// CreateValueLoad - Insert a load of the exception value stack variable +/// (creating it if necessary) at the start of the basic block (unless +/// there already is a load, in which case the existing load is returned). +Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) { + Instruction *Start = BB->getFirstNonPHI(); + // Is this a load of the exception temporary? + if (ExceptionValueVar) + if (LoadInst* LI = dyn_cast<LoadInst>(Start)) + if (LI->getPointerOperand() == ExceptionValueVar) + // Reuse the existing load. + return Start; + + // Create the temporary if we didn't already. + if (!ExceptionValueVar) { + ExceptionValueVar = new AllocaInst(PointerType::getUnqual(Type::Int8Ty), + "eh.value", F->begin()->begin()); + ++NumStackTempsIntroduced; + } + + // Load the value. + return new LoadInst(ExceptionValueVar, "eh.value.load", Start); +} + +bool DwarfEHPrepare::runOnFunction(Function &Fn) { + bool Changed = false; + + // Initialize internal state. + DT = getAnalysisIfAvailable<DominatorTree>(); + DF = getAnalysisIfAvailable<DominanceFrontier>(); + ExceptionValueVar = 0; + F = &Fn; + + // Ensure that only unwind edges end at landing pads (a landing pad is a + // basic block where an invoke unwind edge ends). + Changed |= NormalizeLandingPads(); + + // Turn unwind instructions into libcalls. + Changed |= LowerUnwinds(); + + // TODO: Move eh.selector calls to landing pads and combine them. + + // Move eh.exception calls to landing pads. + Changed |= MoveExceptionValueCalls(); + + // Initialize any stack temporaries we introduced. + Changed |= FinishStackTemporaries(); + + // Turn any stack temporaries into registers if possible. + if (!CompileFast) + Changed |= PromoteStackTemporaries(); + + LandingPads.clear(); + + return Changed; +} diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp new file mode 100644 index 0000000..7cc1162 --- /dev/null +++ b/lib/CodeGen/ELFWriter.cpp @@ -0,0 +1,575 @@ +//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the target-independent ELF writer. This file writes out +// the ELF file in the following order: +// +// #1. ELF Header +// #2. '.text' section +// #3. '.data' section +// #4. '.bss' section (conceptual position in file) +// ... +// #X. '.shstrtab' section +// #Y. Section Table +// +// The entries in the section table are laid out as: +// #0. Null entry [required] +// #1. ".text" entry - the program code +// #2. ".data" entry - global variables with initializers. [ if needed ] +// #3. ".bss" entry - global variables without initializers. [ if needed ] +// ... +// #N. ".shstrtab" entry - String table for the section names. +// +// NOTE: This code should eventually be extended to support 64-bit ELF (this +// won't be hard), but we haven't done so yet! +// +//===----------------------------------------------------------------------===// + +#include "ELFWriter.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/FileWriters.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetELFWriterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/OutputBuffer.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include <list> +using namespace llvm; + +char ELFWriter::ID = 0; +/// AddELFWriter - Concrete function to add the ELF writer to the function pass +/// manager. +MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, + raw_ostream &O, + TargetMachine &TM) { + ELFWriter *EW = new ELFWriter(O, TM); + PM.add(EW); + return &EW->getMachineCodeEmitter(); +} + +//===----------------------------------------------------------------------===// +// ELFCodeEmitter Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + /// ELFCodeEmitter - This class is used by the ELFWriter to emit the code for + /// functions to the ELF file. + class ELFCodeEmitter : public MachineCodeEmitter { + ELFWriter &EW; + TargetMachine &TM; + ELFWriter::ELFSection *ES; // Section to write to. + std::vector<unsigned char> *OutBuffer; + size_t FnStart; + public: + explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), OutBuffer(0) {} + + void startFunction(MachineFunction &F); + bool finishFunction(MachineFunction &F); + + void addRelocation(const MachineRelocation &MR) { + assert(0 && "relo not handled yet!"); + } + + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { + } + + virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { + assert(0 && "CP not implementated yet!"); + return 0; + } + virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { + assert(0 && "JT not implementated yet!"); + return 0; + } + + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(0 && "JT not implementated yet!"); + return 0; + } + + virtual uintptr_t getLabelAddress(uint64_t Label) const { + assert(0 && "Label address not implementated yet!"); + abort(); + return 0; + } + + virtual void emitLabel(uint64_t LabelID) { + assert(0 && "emit Label not implementated yet!"); + abort(); + } + + + virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { } + + + /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! + void startGVStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment = 1) { + assert(0 && "JIT specific function called!"); + abort(); + } + void startGVStub(const GlobalValue* F, void *Buffer, unsigned StubSize) { + assert(0 && "JIT specific function called!"); + abort(); + } + void *finishGVStub(const GlobalValue *F) { + assert(0 && "JIT specific function called!"); + abort(); + return 0; + } + }; +} + +/// startFunction - This callback is invoked when a new machine function is +/// about to be emitted. +void ELFCodeEmitter::startFunction(MachineFunction &F) { + // Align the output buffer to the appropriate alignment. + unsigned Align = 16; // FIXME: GENERICIZE!! + // Get the ELF Section that this function belongs in. + ES = &EW.getSection(".text", ELFWriter::ELFSection::SHT_PROGBITS, + ELFWriter::ELFSection::SHF_EXECINSTR | + ELFWriter::ELFSection::SHF_ALLOC); + OutBuffer = &ES->SectionData; + cerr << "FIXME: This code needs to be updated for changes in the " + << "CodeEmitter interfaces. In particular, this should set " + << "BufferBegin/BufferEnd/CurBufferPtr, not deal with OutBuffer!"; + abort(); + + // Upgrade the section alignment if required. + if (ES->Align < Align) ES->Align = Align; + + // Add padding zeros to the end of the buffer to make sure that the + // function will start on the correct byte alignment within the section. + OutputBuffer OB(*OutBuffer, + TM.getTargetData()->getPointerSizeInBits() == 64, + TM.getTargetData()->isLittleEndian()); + OB.align(Align); + FnStart = OutBuffer->size(); +} + +/// finishFunction - This callback is invoked after the function is completely +/// finished. +bool ELFCodeEmitter::finishFunction(MachineFunction &F) { + // We now know the size of the function, add a symbol to represent it. + ELFWriter::ELFSym FnSym(F.getFunction()); + + // Figure out the binding (linkage) of the symbol. + switch (F.getFunction()->getLinkage()) { + default: + // appending linkage is illegal for functions. + assert(0 && "Unknown linkage type!"); + case GlobalValue::ExternalLinkage: + FnSym.SetBind(ELFWriter::ELFSym::STB_GLOBAL); + break; + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + FnSym.SetBind(ELFWriter::ELFSym::STB_WEAK); + break; + case GlobalValue::PrivateLinkage: + assert (0 && "PrivateLinkage should not be in the symbol table."); + case GlobalValue::InternalLinkage: + FnSym.SetBind(ELFWriter::ELFSym::STB_LOCAL); + break; + } + + ES->Size = OutBuffer->size(); + + FnSym.SetType(ELFWriter::ELFSym::STT_FUNC); + FnSym.SectionIdx = ES->SectionIdx; + FnSym.Value = FnStart; // Value = Offset from start of Section. + FnSym.Size = OutBuffer->size()-FnStart; + + // Finally, add it to the symtab. + EW.SymbolTable.push_back(FnSym); + return false; +} + +//===----------------------------------------------------------------------===// +// ELFWriter Implementation +//===----------------------------------------------------------------------===// + +ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) + : MachineFunctionPass(&ID), O(o), TM(tm) { + e_flags = 0; // e_flags defaults to 0, no flags. + + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + + // Create the machine code emitter object for this target. + MCE = new ELFCodeEmitter(*this); + NumSections = 0; +} + +ELFWriter::~ELFWriter() { + delete MCE; +} + +// doInitialization - Emit the file header and all of the global variables for +// the module to the ELF file. +bool ELFWriter::doInitialization(Module &M) { + Mang = new Mangler(M); + + // Local alias to shortenify coming code. + std::vector<unsigned char> &FH = FileHeader; + OutputBuffer FHOut(FH, is64Bit, isLittleEndian); + + FHOut.outbyte(0x7F); // EI_MAG0 + FHOut.outbyte('E'); // EI_MAG1 + FHOut.outbyte('L'); // EI_MAG2 + FHOut.outbyte('F'); // EI_MAG3 + FHOut.outbyte(is64Bit ? 2 : 1); // EI_CLASS + FHOut.outbyte(isLittleEndian ? 1 : 2); // EI_DATA + FHOut.outbyte(1); // EI_VERSION + FH.resize(16); // EI_PAD up to 16 bytes. + + // This should change for shared objects. + FHOut.outhalf(1); // e_type = ET_REL + FHOut.outhalf(TM.getELFWriterInfo()->getEMachine()); // target-defined + FHOut.outword(1); // e_version = 1 + FHOut.outaddr(0); // e_entry = 0 -> no entry point in .o file + FHOut.outaddr(0); // e_phoff = 0 -> no program header for .o + + ELFHeader_e_shoff_Offset = FH.size(); + FHOut.outaddr(0); // e_shoff + FHOut.outword(e_flags); // e_flags = whatever the target wants + + FHOut.outhalf(is64Bit ? 64 : 52); // e_ehsize = ELF header size + FHOut.outhalf(0); // e_phentsize = prog header entry size + FHOut.outhalf(0); // e_phnum = # prog header entries = 0 + FHOut.outhalf(is64Bit ? 64 : 40); // e_shentsize = sect hdr entry size + + + ELFHeader_e_shnum_Offset = FH.size(); + FHOut.outhalf(0); // e_shnum = # of section header ents + ELFHeader_e_shstrndx_Offset = FH.size(); + FHOut.outhalf(0); // e_shstrndx = Section # of '.shstrtab' + + // Add the null section, which is required to be first in the file. + getSection("", 0, 0); + + // Start up the symbol table. The first entry in the symtab is the null + // entry. + SymbolTable.push_back(ELFSym(0)); + + return false; +} + +void ELFWriter::EmitGlobal(GlobalVariable *GV) { + // If this is an external global, emit it now. TODO: Note that it would be + // better to ignore the symbol here and only add it to the symbol table if + // referenced. + if (!GV->hasInitializer()) { + ELFSym ExternalSym(GV); + ExternalSym.SetBind(ELFSym::STB_GLOBAL); + ExternalSym.SetType(ELFSym::STT_NOTYPE); + ExternalSym.SectionIdx = ELFSection::SHN_UNDEF; + SymbolTable.push_back(ExternalSym); + return; + } + + unsigned Align = TM.getTargetData()->getPreferredAlignment(GV); + unsigned Size = + TM.getTargetData()->getTypeAllocSize(GV->getType()->getElementType()); + + // If this global has a zero initializer, it is part of the .bss or common + // section. + if (GV->getInitializer()->isNullValue()) { + // If this global is part of the common block, add it now. Variables are + // part of the common block if they are zero initialized and allowed to be + // merged with other symbols. + if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || + GV->hasCommonLinkage()) { + ELFSym CommonSym(GV); + // Value for common symbols is the alignment required. + CommonSym.Value = Align; + CommonSym.Size = Size; + CommonSym.SetBind(ELFSym::STB_GLOBAL); + CommonSym.SetType(ELFSym::STT_OBJECT); + // TODO SOMEDAY: add ELF visibility. + CommonSym.SectionIdx = ELFSection::SHN_COMMON; + SymbolTable.push_back(CommonSym); + return; + } + + // Otherwise, this symbol is part of the .bss section. Emit it now. + + // Handle alignment. Ensure section is aligned at least as much as required + // by this symbol. + ELFSection &BSSSection = getBSSSection(); + BSSSection.Align = std::max(BSSSection.Align, Align); + + // Within the section, emit enough virtual padding to get us to an alignment + // boundary. + if (Align) + BSSSection.Size = (BSSSection.Size + Align - 1) & ~(Align-1); + + ELFSym BSSSym(GV); + BSSSym.Value = BSSSection.Size; + BSSSym.Size = Size; + BSSSym.SetType(ELFSym::STT_OBJECT); + + switch (GV->getLinkage()) { + default: // weak/linkonce/common handled above + assert(0 && "Unexpected linkage type!"); + case GlobalValue::AppendingLinkage: // FIXME: This should be improved! + case GlobalValue::ExternalLinkage: + BSSSym.SetBind(ELFSym::STB_GLOBAL); + break; + case GlobalValue::InternalLinkage: + BSSSym.SetBind(ELFSym::STB_LOCAL); + break; + } + + // Set the idx of the .bss section + BSSSym.SectionIdx = BSSSection.SectionIdx; + if (!GV->hasPrivateLinkage()) + SymbolTable.push_back(BSSSym); + + // Reserve space in the .bss section for this symbol. + BSSSection.Size += Size; + return; + } + + // FIXME: handle .rodata + //assert(!GV->isConstant() && "unimp"); + + // FIXME: handle .data + //assert(0 && "unimp"); +} + + +bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { + // Nothing to do here, this is all done through the MCE object above. + return false; +} + +/// doFinalization - Now that the module has been completely processed, emit +/// the ELF file to 'O'. +bool ELFWriter::doFinalization(Module &M) { + // Okay, the ELF header and .text sections have been completed, build the + // .data, .bss, and "common" sections next. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + EmitGlobal(I); + + // Emit the symbol table now, if non-empty. + EmitSymbolTable(); + + // FIXME: Emit the relocations now. + + // Emit the string table for the sections in the ELF file we have. + EmitSectionTableStringTable(); + + // Emit the sections to the .o file, and emit the section table for the file. + OutputSectionsAndSectionTable(); + + // We are done with the abstract symbols. + SectionList.clear(); + NumSections = 0; + + // Release the name mangler object. + delete Mang; Mang = 0; + return false; +} + +/// EmitSymbolTable - If the current symbol table is non-empty, emit the string +/// table for it and then the symbol table itself. +void ELFWriter::EmitSymbolTable() { + if (SymbolTable.size() == 1) return; // Only the null entry. + + // FIXME: compact all local symbols to the start of the symtab. + unsigned FirstNonLocalSymbol = 1; + + ELFSection &StrTab = getSection(".strtab", ELFSection::SHT_STRTAB, 0); + StrTab.Align = 1; + + DataBuffer &StrTabBuf = StrTab.SectionData; + OutputBuffer StrTabOut(StrTabBuf, is64Bit, isLittleEndian); + + // Set the zero'th symbol to a null byte, as required. + StrTabOut.outbyte(0); + SymbolTable[0].NameIdx = 0; + unsigned Index = 1; + for (unsigned i = 1, e = SymbolTable.size(); i != e; ++i) { + // Use the name mangler to uniquify the LLVM symbol. + std::string Name = Mang->getValueName(SymbolTable[i].GV); + + if (Name.empty()) { + SymbolTable[i].NameIdx = 0; + } else { + SymbolTable[i].NameIdx = Index; + + // Add the name to the output buffer, including the null terminator. + StrTabBuf.insert(StrTabBuf.end(), Name.begin(), Name.end()); + + // Add a null terminator. + StrTabBuf.push_back(0); + + // Keep track of the number of bytes emitted to this section. + Index += Name.size()+1; + } + } + assert(Index == StrTabBuf.size()); + StrTab.Size = Index; + + // Now that we have emitted the string table and know the offset into the + // string table of each symbol, emit the symbol table itself. + ELFSection &SymTab = getSection(".symtab", ELFSection::SHT_SYMTAB, 0); + SymTab.Align = is64Bit ? 8 : 4; + SymTab.Link = SymTab.SectionIdx; // Section Index of .strtab. + SymTab.Info = FirstNonLocalSymbol; // First non-STB_LOCAL symbol. + SymTab.EntSize = 16; // Size of each symtab entry. FIXME: wrong for ELF64 + DataBuffer &SymTabBuf = SymTab.SectionData; + OutputBuffer SymTabOut(SymTabBuf, is64Bit, isLittleEndian); + + if (!is64Bit) { // 32-bit and 64-bit formats are shuffled a bit. + for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) { + ELFSym &Sym = SymbolTable[i]; + SymTabOut.outword(Sym.NameIdx); + SymTabOut.outaddr32(Sym.Value); + SymTabOut.outword(Sym.Size); + SymTabOut.outbyte(Sym.Info); + SymTabOut.outbyte(Sym.Other); + SymTabOut.outhalf(Sym.SectionIdx); + } + } else { + for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) { + ELFSym &Sym = SymbolTable[i]; + SymTabOut.outword(Sym.NameIdx); + SymTabOut.outbyte(Sym.Info); + SymTabOut.outbyte(Sym.Other); + SymTabOut.outhalf(Sym.SectionIdx); + SymTabOut.outaddr64(Sym.Value); + SymTabOut.outxword(Sym.Size); + } + } + + SymTab.Size = SymTabBuf.size(); +} + +/// EmitSectionTableStringTable - This method adds and emits a section for the +/// ELF Section Table string table: the string table that holds all of the +/// section names. +void ELFWriter::EmitSectionTableStringTable() { + // First step: add the section for the string table to the list of sections: + ELFSection &SHStrTab = getSection(".shstrtab", ELFSection::SHT_STRTAB, 0); + + // Now that we know which section number is the .shstrtab section, update the + // e_shstrndx entry in the ELF header. + OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); + FHOut.fixhalf(SHStrTab.SectionIdx, ELFHeader_e_shstrndx_Offset); + + // Set the NameIdx of each section in the string table and emit the bytes for + // the string table. + unsigned Index = 0; + DataBuffer &Buf = SHStrTab.SectionData; + + for (std::list<ELFSection>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + // Set the index into the table. Note if we have lots of entries with + // common suffixes, we could memoize them here if we cared. + I->NameIdx = Index; + + // Add the name to the output buffer, including the null terminator. + Buf.insert(Buf.end(), I->Name.begin(), I->Name.end()); + + // Add a null terminator. + Buf.push_back(0); + + // Keep track of the number of bytes emitted to this section. + Index += I->Name.size()+1; + } + + // Set the size of .shstrtab now that we know what it is. + assert(Index == Buf.size()); + SHStrTab.Size = Index; +} + +/// OutputSectionsAndSectionTable - Now that we have constructed the file header +/// and all of the sections, emit these to the ostream destination and emit the +/// SectionTable. +void ELFWriter::OutputSectionsAndSectionTable() { + // Pass #1: Compute the file offset for each section. + size_t FileOff = FileHeader.size(); // File header first. + + // Emit all of the section data in order. + for (std::list<ELFSection>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + // Align FileOff to whatever the alignment restrictions of the section are. + if (I->Align) + FileOff = (FileOff+I->Align-1) & ~(I->Align-1); + I->Offset = FileOff; + FileOff += I->SectionData.size(); + } + + // Align Section Header. + unsigned TableAlign = is64Bit ? 8 : 4; + FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); + + // Now that we know where all of the sections will be emitted, set the e_shnum + // entry in the ELF header. + OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); + FHOut.fixhalf(NumSections, ELFHeader_e_shnum_Offset); + + // Now that we know the offset in the file of the section table, update the + // e_shoff address in the ELF header. + FHOut.fixaddr(FileOff, ELFHeader_e_shoff_Offset); + + // Now that we know all of the data in the file header, emit it and all of the + // sections! + O.write((char*)&FileHeader[0], FileHeader.size()); + FileOff = FileHeader.size(); + DataBuffer().swap(FileHeader); + + DataBuffer Table; + OutputBuffer TableOut(Table, is64Bit, isLittleEndian); + + // Emit all of the section data and build the section table itself. + while (!SectionList.empty()) { + const ELFSection &S = *SectionList.begin(); + + // Align FileOff to whatever the alignment restrictions of the section are. + if (S.Align) + for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1); + FileOff != NewFileOff; ++FileOff) + O << (char)0xAB; + O.write((char*)&S.SectionData[0], S.SectionData.size()); + FileOff += S.SectionData.size(); + + TableOut.outword(S.NameIdx); // sh_name - Symbol table name idx + TableOut.outword(S.Type); // sh_type - Section contents & semantics + TableOut.outword(S.Flags); // sh_flags - Section flags. + TableOut.outaddr(S.Addr); // sh_addr - The mem addr this section is in. + TableOut.outaddr(S.Offset); // sh_offset - Offset from the file start. + TableOut.outword(S.Size); // sh_size - The section size. + TableOut.outword(S.Link); // sh_link - Section header table index link. + TableOut.outword(S.Info); // sh_info - Auxillary information. + TableOut.outword(S.Align); // sh_addralign - Alignment of section. + TableOut.outword(S.EntSize); // sh_entsize - Size of entries in the section + + SectionList.pop_front(); + } + + // Align output for the section table. + for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); + FileOff != NewFileOff; ++FileOff) + O << (char)0xAB; + + // Emit the section table itself. + O.write((char*)&Table[0], Table.size()); +} diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h new file mode 100644 index 0000000..31aa05a --- /dev/null +++ b/lib/CodeGen/ELFWriter.h @@ -0,0 +1,230 @@ +//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ELFWriter class. +// +//===----------------------------------------------------------------------===// + +#ifndef ELFWRITER_H +#define ELFWRITER_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include <list> +#include <map> + +namespace llvm { + class GlobalVariable; + class Mangler; + class MachineCodeEmitter; + class ELFCodeEmitter; + class raw_ostream; + + /// ELFWriter - This class implements the common target-independent code for + /// writing ELF files. Targets should derive a class from this to + /// parameterize the output format. + /// + class ELFWriter : public MachineFunctionPass { + friend class ELFCodeEmitter; + public: + static char ID; + + MachineCodeEmitter &getMachineCodeEmitter() const { + return *(MachineCodeEmitter*)MCE; + } + + ELFWriter(raw_ostream &O, TargetMachine &TM); + ~ELFWriter(); + + typedef std::vector<unsigned char> DataBuffer; + + protected: + /// Output stream to send the resultant object file to. + /// + raw_ostream &O; + + /// Target machine description. + /// + TargetMachine &TM; + + /// Mang - The object used to perform name mangling for this module. + /// + Mangler *Mang; + + /// MCE - The MachineCodeEmitter object that we are exposing to emit machine + /// code for functions to the .o file. + ELFCodeEmitter *MCE; + + //===------------------------------------------------------------------===// + // Properties to be set by the derived class ctor, used to configure the + // ELFWriter. + + // e_machine - This field is the target specific value to emit as the + // e_machine member of the ELF header. + unsigned short e_machine; + + // e_flags - The machine flags for the target. This defaults to zero. + unsigned e_flags; + + //===------------------------------------------------------------------===// + // Properties inferred automatically from the target machine. + // + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating whether to emit a 32- or 64-bit ELF file. + bool is64Bit, isLittleEndian; + + /// doInitialization - Emit the file header and all of the global variables + /// for the module to the ELF file. + bool doInitialization(Module &M); + + bool runOnMachineFunction(MachineFunction &MF); + + + /// doFinalization - Now that the module has been completely processed, emit + /// the ELF file to 'O'. + bool doFinalization(Module &M); + + private: + // The buffer we accumulate the file header into. Note that this should be + // changed into something much more efficient later (and the bitcode writer + // as well!). + DataBuffer FileHeader; + + /// ELFSection - This struct contains information about each section that is + /// emitted to the file. This is eventually turned into the section header + /// table at the end of the file. + struct ELFSection { + std::string Name; // Name of the section. + unsigned NameIdx; // Index in .shstrtab of name, once emitted. + unsigned Type; + unsigned Flags; + uint64_t Addr; + unsigned Offset; + unsigned Size; + unsigned Link; + unsigned Info; + unsigned Align; + unsigned EntSize; + + /// SectionIdx - The number of the section in the Section Table. + /// + unsigned short SectionIdx; + + /// SectionData - The actual data for this section which we are building + /// up for emission to the file. + DataBuffer SectionData; + + enum { SHT_NULL = 0, SHT_PROGBITS = 1, SHT_SYMTAB = 2, SHT_STRTAB = 3, + SHT_RELA = 4, SHT_HASH = 5, SHT_DYNAMIC = 6, SHT_NOTE = 7, + SHT_NOBITS = 8, SHT_REL = 9, SHT_SHLIB = 10, SHT_DYNSYM = 11 }; + enum { SHN_UNDEF = 0, SHN_ABS = 0xFFF1, SHN_COMMON = 0xFFF2 }; + enum { // SHF - ELF Section Header Flags + SHF_WRITE = 1 << 0, // Writable + SHF_ALLOC = 1 << 1, // Mapped into the process addr space + SHF_EXECINSTR = 1 << 2, // Executable + SHF_MERGE = 1 << 4, // Might be merged if equal + SHF_STRINGS = 1 << 5, // Contains null-terminated strings + SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index + SHF_LINK_ORDER = 1 << 7, // Preserve order after combining + SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required + SHF_GROUP = 1 << 9, // Section is a member of a group + SHF_TLS = 1 << 10 // Section holds thread-local data + }; + + ELFSection(const std::string &name) + : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0), + Link(0), Info(0), Align(0), EntSize(0) { + } + }; + + /// SectionList - This is the list of sections that we have emitted to the + /// file. Once the file has been completely built, the section header table + /// is constructed from this info. + std::list<ELFSection> SectionList; + unsigned NumSections; // Always = SectionList.size() + + /// SectionLookup - This is a mapping from section name to section number in + /// the SectionList. + std::map<std::string, ELFSection*> SectionLookup; + + /// getSection - Return the section with the specified name, creating a new + /// section if one does not already exist. + ELFSection &getSection(const std::string &Name, + unsigned Type, unsigned Flags = 0) { + ELFSection *&SN = SectionLookup[Name]; + if (SN) return *SN; + + SectionList.push_back(Name); + SN = &SectionList.back(); + SN->SectionIdx = NumSections++; + SN->Type = Type; + SN->Flags = Flags; + return *SN; + } + + ELFSection &getDataSection() { + return getSection(".data", ELFSection::SHT_PROGBITS, + ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); + } + ELFSection &getBSSSection() { + return getSection(".bss", ELFSection::SHT_NOBITS, + ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); + } + + /// ELFSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct ELFSym { + const GlobalValue *GV; // The global value this corresponds to. + unsigned NameIdx; // Index in .strtab of name, once emitted. + uint64_t Value; + unsigned Size; + unsigned char Info; + unsigned char Other; + unsigned short SectionIdx; + + enum { STB_LOCAL = 0, STB_GLOBAL = 1, STB_WEAK = 2 }; + enum { STT_NOTYPE = 0, STT_OBJECT = 1, STT_FUNC = 2, STT_SECTION = 3, + STT_FILE = 4 }; + ELFSym(const GlobalValue *gv) : GV(gv), Value(0), Size(0), Info(0), + Other(0), SectionIdx(0) {} + + void SetBind(unsigned X) { + assert(X == (X & 0xF) && "Bind value out of range!"); + Info = (Info & 0x0F) | (X << 4); + } + void SetType(unsigned X) { + assert(X == (X & 0xF) && "Type value out of range!"); + Info = (Info & 0xF0) | X; + } + }; + + /// SymbolTable - This is the list of symbols we have emitted to the file. + /// This actually gets rearranged before emission to the file (to put the + /// local symbols first in the list). + std::vector<ELFSym> SymbolTable; + + // As we complete the ELF file, we need to update fields in the ELF header + // (e.g. the location of the section table). These members keep track of + // the offset in ELFHeader of these various pieces to update and other + // locations in the file. + unsigned ELFHeader_e_shoff_Offset; // e_shoff in ELF header. + unsigned ELFHeader_e_shstrndx_Offset; // e_shstrndx in ELF header. + unsigned ELFHeader_e_shnum_Offset; // e_shnum in ELF header. + private: + void EmitGlobal(GlobalVariable *GV); + + void EmitSymbolTable(); + + void EmitSectionTableStringTable(); + void OutputSectionsAndSectionTable(); + }; +} + +#endif diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp new file mode 100644 index 0000000..cf2ebb3 --- /dev/null +++ b/lib/CodeGen/GCMetadata.cpp @@ -0,0 +1,212 @@ +//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the GCFunctionInfo class and GCModuleInfo pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/Support/Compiler.h" + +using namespace llvm; + +namespace { + + class VISIBILITY_HIDDEN Printer : public FunctionPass { + static char ID; + std::ostream &OS; + + public: + explicit Printer(std::ostream &OS = *cerr); + + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnFunction(Function &F); + }; + + class VISIBILITY_HIDDEN Deleter : public FunctionPass { + static char ID; + + public: + Deleter(); + + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnFunction(Function &F); + bool doFinalization(Module &M); + }; + +} + +static RegisterPass<GCModuleInfo> +X("collector-metadata", "Create Garbage Collector Module Metadata"); + +// ----------------------------------------------------------------------------- + +GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S) + : F(F), S(S), FrameSize(~0LL) {} + +GCFunctionInfo::~GCFunctionInfo() {} + +// ----------------------------------------------------------------------------- + +char GCModuleInfo::ID = 0; + +GCModuleInfo::GCModuleInfo() + : ImmutablePass(&ID) {} + +GCModuleInfo::~GCModuleInfo() { + clear(); +} + +GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, + const std::string &Name) { + const char *Start = Name.c_str(); + + strategy_map_type::iterator NMI = + StrategyMap.find(Start, Start + Name.size()); + if (NMI != StrategyMap.end()) + return NMI->getValue(); + + for (GCRegistry::iterator I = GCRegistry::begin(), + E = GCRegistry::end(); I != E; ++I) { + if (strcmp(Start, I->getName()) == 0) { + GCStrategy *S = I->instantiate(); + S->M = M; + S->Name = Name; + StrategyMap.GetOrCreateValue(Start, Start + Name.size()).setValue(S); + StrategyList.push_back(S); + return S; + } + } + + cerr << "unsupported GC: " << Name << "\n"; + abort(); +} + +GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { + assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!"); + assert(F.hasGC()); + + finfo_map_type::iterator I = FInfoMap.find(&F); + if (I != FInfoMap.end()) + return *I->second; + + GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC()); + GCFunctionInfo *GFI = S->insertFunctionInfo(F); + FInfoMap[&F] = GFI; + return *GFI; +} + +void GCModuleInfo::clear() { + FInfoMap.clear(); + StrategyMap.clear(); + + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; + StrategyList.clear(); +} + +// ----------------------------------------------------------------------------- + +char Printer::ID = 0; + +FunctionPass *llvm::createGCInfoPrinter(std::ostream &OS) { + return new Printer(OS); +} + +Printer::Printer(std::ostream &OS) + : FunctionPass(&ID), OS(OS) {} + +const char *Printer::getPassName() const { + return "Print Garbage Collector Information"; +} + +void Printer::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired<GCModuleInfo>(); +} + +static const char *DescKind(GC::PointKind Kind) { + switch (Kind) { + default: assert(0 && "Unknown GC point kind"); + case GC::Loop: return "loop"; + case GC::Return: return "return"; + case GC::PreCall: return "pre-call"; + case GC::PostCall: return "post-call"; + } +} + +bool Printer::runOnFunction(Function &F) { + if (!F.hasGC()) { + GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F); + + OS << "GC roots for " << FD->getFunction().getNameStart() << ":\n"; + for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), + RE = FD->roots_end(); RI != RE; ++RI) + OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; + + OS << "GC safe points for " << FD->getFunction().getNameStart() << ":\n"; + for (GCFunctionInfo::iterator PI = FD->begin(), + PE = FD->end(); PI != PE; ++PI) { + + OS << "\tlabel " << PI->Num << ": " << DescKind(PI->Kind) << ", live = {"; + + for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI), + RE = FD->live_end(PI);;) { + OS << " " << RI->Num; + if (++RI == RE) + break; + OS << ","; + } + + OS << " }\n"; + } + } + + return false; +} + +// ----------------------------------------------------------------------------- + +char Deleter::ID = 0; + +FunctionPass *llvm::createGCInfoDeleter() { + return new Deleter(); +} + +Deleter::Deleter() : FunctionPass(&ID) {} + +const char *Deleter::getPassName() const { + return "Delete Garbage Collector Information"; +} + +void Deleter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<GCModuleInfo>(); +} + +bool Deleter::runOnFunction(Function &MF) { + return false; +} + +bool Deleter::doFinalization(Module &M) { + GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(GMI && "Deleter didn't require GCModuleInfo?!"); + GMI->clear(); + return false; +} diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp new file mode 100644 index 0000000..5a5ef84 --- /dev/null +++ b/lib/CodeGen/GCMetadataPrinter.cpp @@ -0,0 +1,30 @@ +//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the abstract base class GCMetadataPrinter. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadataPrinter.h" + +using namespace llvm; + +GCMetadataPrinter::GCMetadataPrinter() { } + +GCMetadataPrinter::~GCMetadataPrinter() { } + +void GCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + // Default is no action. +} + +void GCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + // Default is no action. +} diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp new file mode 100644 index 0000000..ad7421a --- /dev/null +++ b/lib/CodeGen/GCStrategy.cpp @@ -0,0 +1,392 @@ +//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements target- and collector-independent garbage collection +// infrastructure. +// +// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots +// are identified in SelectionDAGISel. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" + +using namespace llvm; + +namespace { + + /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or + /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as + /// directed by the GCStrategy. It also performs automatic root initialization + /// and custom intrinsic lowering. + class VISIBILITY_HIDDEN LowerIntrinsics : public FunctionPass { + static bool NeedsDefaultLoweringPass(const GCStrategy &C); + static bool NeedsCustomLoweringPass(const GCStrategy &C); + static bool CouldBecomeSafePoint(Instruction *I); + bool PerformDefaultLowering(Function &F, GCStrategy &Coll); + static bool InsertRootInitializers(Function &F, + AllocaInst **Roots, unsigned Count); + + public: + static char ID; + + LowerIntrinsics(); + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + }; + + + /// MachineCodeAnalysis - This is a target-independent pass over the machine + /// function representation to identify safe points for the garbage collector + /// in the machine code. It inserts labels at safe points and populates a + /// GCMetadata record for each function. + class VISIBILITY_HIDDEN MachineCodeAnalysis : public MachineFunctionPass { + const TargetMachine *TM; + GCFunctionInfo *FI; + MachineModuleInfo *MMI; + const TargetInstrInfo *TII; + + void FindSafePoints(MachineFunction &MF); + void VisitCallPoint(MachineBasicBlock::iterator MI); + unsigned InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + void FindStackOffsets(MachineFunction &MF); + + public: + static char ID; + + MachineCodeAnalysis(); + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnMachineFunction(MachineFunction &MF); + }; + +} + +// ----------------------------------------------------------------------------- + +GCStrategy::GCStrategy() : + NeededSafePoints(0), + CustomReadBarriers(false), + CustomWriteBarriers(false), + CustomRoots(false), + InitRoots(true), + UsesMetadata(false) +{} + +GCStrategy::~GCStrategy() { + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; + + Functions.clear(); +} + +bool GCStrategy::initializeCustomLowering(Module &M) { return false; } + +bool GCStrategy::performCustomLowering(Function &F) { + cerr << "gc " << getName() << " must override performCustomLowering.\n"; + abort(); + return 0; +} + +GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { + GCFunctionInfo *FI = new GCFunctionInfo(F, *this); + Functions.push_back(FI); + return FI; +} + +// ----------------------------------------------------------------------------- + +FunctionPass *llvm::createGCLoweringPass() { + return new LowerIntrinsics(); +} + +char LowerIntrinsics::ID = 0; + +LowerIntrinsics::LowerIntrinsics() + : FunctionPass(&ID) {} + +const char *LowerIntrinsics::getPassName() const { + return "Lower Garbage Collection Instructions"; +} + +void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<GCModuleInfo>(); +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. +bool LowerIntrinsics::doInitialization(Module &M) { + // FIXME: This is rather antisocial in the context of a JIT since it performs + // work against the entire module. But this cannot be done at + // runFunction time (initializeCustomLowering likely needs to change + // the module). + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?"); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration() && I->hasGC()) + MI->getFunctionInfo(*I); // Instantiate the GC strategy. + + bool MadeChange = false; + for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) + if (NeedsCustomLoweringPass(**I)) + if ((*I)->initializeCustomLowering(M)) + MadeChange = true; + + return MadeChange; +} + +bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, + unsigned Count) { + // Scroll past alloca instructions. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + while (isa<AllocaInst>(IP)) ++IP; + + // Search for initializers in the initial BB. + SmallPtrSet<AllocaInst*,16> InitedRoots; + for (; !CouldBecomeSafePoint(IP); ++IP) + if (StoreInst *SI = dyn_cast<StoreInst>(IP)) + if (AllocaInst *AI = + dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts())) + InitedRoots.insert(AI); + + // Add root initializers. + bool MadeChange = false; + + for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) + if (!InitedRoots.count(*I)) { + new StoreInst(ConstantPointerNull::get(cast<PointerType>( + cast<PointerType>((*I)->getType())->getElementType())), + *I, IP); + MadeChange = true; + } + + return MadeChange; +} + +bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) { + // Default lowering is necessary only if read or write barriers have a default + // action. The default for roots is no action. + return !C.customWriteBarrier() + || !C.customReadBarrier() + || C.initializeRoots(); +} + +bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) { + // Custom lowering is only necessary if enabled for some action. + return C.customWriteBarrier() + || C.customReadBarrier() + || C.customRoots(); +} + +/// CouldBecomeSafePoint - Predicate to conservatively determine whether the +/// instruction could introduce a safe point. +bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) { + // The natural definition of instructions which could introduce safe points + // are: + // + // - call, invoke (AfterCall, BeforeCall) + // - phis (Loops) + // - invoke, ret, unwind (Exit) + // + // However, instructions as seemingly inoccuous as arithmetic can become + // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead + // it is necessary to take a conservative approach. + + if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || + isa<StoreInst>(I) || isa<LoadInst>(I)) + return false; + + // llvm.gcroot is safe because it doesn't do anything at runtime. + if (CallInst *CI = dyn_cast<CallInst>(I)) + if (Function *F = CI->getCalledFunction()) + if (unsigned IID = F->getIntrinsicID()) + if (IID == Intrinsic::gcroot) + return false; + + return true; +} + +/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores. +/// Leave gcroot intrinsics; the code generator needs to see those. +bool LowerIntrinsics::runOnFunction(Function &F) { + // Quick exit for functions that do not use GC. + if (!F.hasGC()) + return false; + + GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F); + GCStrategy &S = FI.getStrategy(); + + bool MadeChange = false; + + if (NeedsDefaultLoweringPass(S)) + MadeChange |= PerformDefaultLowering(F, S); + + if (NeedsCustomLoweringPass(S)) + MadeChange |= S.performCustomLowering(F); + + return MadeChange; +} + +bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { + bool LowerWr = !S.customWriteBarrier(); + bool LowerRd = !S.customReadBarrier(); + bool InitRoots = S.initializeRoots(); + + SmallVector<AllocaInst*,32> Roots; + + bool MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) { + Function *F = CI->getCalledFunction(); + switch (F->getIntrinsicID()) { + case Intrinsic::gcwrite: + if (LowerWr) { + // Replace a write barrier with a simple store. + Value *St = new StoreInst(CI->getOperand(1), CI->getOperand(3), CI); + CI->replaceAllUsesWith(St); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcread: + if (LowerRd) { + // Replace a read barrier with a simple load. + Value *Ld = new LoadInst(CI->getOperand(2), "", CI); + Ld->takeName(CI); + CI->replaceAllUsesWith(Ld); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcroot: + if (InitRoots) { + // Initialize the GC root, but do not delete the intrinsic. The + // backend needs the intrinsic to flag the stack slot. + Roots.push_back(cast<AllocaInst>( + CI->getOperand(1)->stripPointerCasts())); + } + break; + default: + continue; + } + + MadeChange = true; + } + } + } + + if (Roots.size()) + MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); + + return MadeChange; +} + +// ----------------------------------------------------------------------------- + +FunctionPass *llvm::createGCMachineCodeAnalysisPass() { + return new MachineCodeAnalysis(); +} + +char MachineCodeAnalysis::ID = 0; + +MachineCodeAnalysis::MachineCodeAnalysis() + : MachineFunctionPass(&ID) {} + +const char *MachineCodeAnalysis::getPassName() const { + return "Analyze Machine Code For Garbage Collection"; +} + +void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired<MachineModuleInfo>(); + AU.addRequired<GCModuleInfo>(); +} + +unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + unsigned Label = MMI->NextLabelID(); + // N.B. we assume that MI is *not* equal to the "end()" iterator. + BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(TargetInstrInfo::GC_LABEL)).addImm(Label); + return Label; +} + +void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { + // Find the return address (next instruction), too, so as to bracket the call + // instruction. + MachineBasicBlock::iterator RAI = CI; + ++RAI; + + if (FI->getStrategy().needsSafePoint(GC::PreCall)) + FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI)); + + if (FI->getStrategy().needsSafePoint(GC::PostCall)) + FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI)); +} + +void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { + for (MachineFunction::iterator BBI = MF.begin(), + BBE = MF.end(); BBI != BBE; ++BBI) + for (MachineBasicBlock::iterator MI = BBI->begin(), + ME = BBI->end(); MI != ME; ++MI) + if (MI->getDesc().isCall()) + VisitCallPoint(MI); +} + +void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { + const TargetRegisterInfo *TRI = TM->getRegisterInfo(); + assert(TRI && "TargetRegisterInfo not available!"); + + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(), + RE = FI->roots_end(); RI != RE; ++RI) + RI->StackOffset = TRI->getFrameIndexOffset(MF, RI->Num); +} + +bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { + // Quick exit for functions that do not use GC. + if (!MF.getFunction()->hasGC()) + return false; + + FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); + if (!FI->getStrategy().needsSafePoints()) + return false; + + TM = &MF.getTarget(); + MMI = &getAnalysis<MachineModuleInfo>(); + TII = TM->getInstrInfo(); + + // Find the size of the stack frame. + FI->setFrameSize(MF.getFrameInfo()->getStackSize()); + + // Find all safe points. + FindSafePoints(MF); + + // Find the stack offsets for all roots. + FindStackOffsets(MF); + + return false; +} diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp new file mode 100644 index 0000000..1d0887f --- /dev/null +++ b/lib/CodeGen/IfConversion.cpp @@ -0,0 +1,1229 @@ +//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level if-conversion pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ifcvt" +#include "llvm/Function.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +// Hidden options for help debugging. +static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); +static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); +static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden); +static cl::opt<bool> DisableSimple("disable-ifcvt-simple", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond", + cl::init(false), cl::Hidden); + +STATISTIC(NumSimple, "Number of simple if-conversions performed"); +STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed"); +STATISTIC(NumTriangle, "Number of triangle if-conversions performed"); +STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed"); +STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed"); +STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); +STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); +STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); +STATISTIC(NumDupBBs, "Number of duplicated blocks"); + +namespace { + class VISIBILITY_HIDDEN IfConverter : public MachineFunctionPass { + enum IfcvtKind { + ICNotClassfied, // BB data valid, but not classified. + ICSimpleFalse, // Same as ICSimple, but on the false path. + ICSimple, // BB is entry of an one split, no rejoin sub-CFG. + ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition. + ICTriangleRev, // Same as ICTriangle, but true path rev condition. + ICTriangleFalse, // Same as ICTriangle, but on the false path. + ICTriangle, // BB is entry of a triangle sub-CFG. + ICDiamond // BB is entry of a diamond sub-CFG. + }; + + /// BBInfo - One per MachineBasicBlock, this is used to cache the result + /// if-conversion feasibility analysis. This includes results from + /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its + /// classification, and common tail block of its successors (if it's a + /// diamond shape), its size, whether it's predicable, and whether any + /// instruction can clobber the 'would-be' predicate. + /// + /// IsDone - True if BB is not to be considered for ifcvt. + /// IsBeingAnalyzed - True if BB is currently being analyzed. + /// IsAnalyzed - True if BB has been analyzed (info is still valid). + /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed. + /// IsBrAnalyzable - True if AnalyzeBranch() returns false. + /// HasFallThrough - True if BB may fallthrough to the following BB. + /// IsUnpredicable - True if BB is known to be unpredicable. + /// ClobbersPred - True if BB could modify predicates (e.g. has + /// cmp, call, etc.) + /// NonPredSize - Number of non-predicated instructions. + /// BB - Corresponding MachineBasicBlock. + /// TrueBB / FalseBB- See AnalyzeBranch(). + /// BrCond - Conditions for end of block conditional branches. + /// Predicate - Predicate used in the BB. + struct BBInfo { + bool IsDone : 1; + bool IsBeingAnalyzed : 1; + bool IsAnalyzed : 1; + bool IsEnqueued : 1; + bool IsBrAnalyzable : 1; + bool HasFallThrough : 1; + bool IsUnpredicable : 1; + bool CannotBeCopied : 1; + bool ClobbersPred : 1; + unsigned NonPredSize; + MachineBasicBlock *BB; + MachineBasicBlock *TrueBB; + MachineBasicBlock *FalseBB; + SmallVector<MachineOperand, 4> BrCond; + SmallVector<MachineOperand, 4> Predicate; + BBInfo() : IsDone(false), IsBeingAnalyzed(false), + IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), + HasFallThrough(false), IsUnpredicable(false), + CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), + BB(0), TrueBB(0), FalseBB(0) {} + }; + + /// IfcvtToken - Record information about pending if-conversions to attemp: + /// BBI - Corresponding BBInfo. + /// Kind - Type of block. See IfcvtKind. + /// NeedSubsumption - True if the to-be-predicated BB has already been + /// predicated. + /// NumDups - Number of instructions that would be duplicated due + /// to this if-conversion. (For diamonds, the number of + /// identical instructions at the beginnings of both + /// paths). + /// NumDups2 - For diamonds, the number of identical instructions + /// at the ends of both paths. + struct IfcvtToken { + BBInfo &BBI; + IfcvtKind Kind; + bool NeedSubsumption; + unsigned NumDups; + unsigned NumDups2; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) + : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} + }; + + /// Roots - Basic blocks that do not have successors. These are the starting + /// points of Graph traversal. + std::vector<MachineBasicBlock*> Roots; + + /// BBAnalysis - Results of if-conversion feasibility analysis indexed by + /// basic block number. + std::vector<BBInfo> BBAnalysis; + + const TargetLowering *TLI; + const TargetInstrInfo *TII; + bool MadeChange; + public: + static char ID; + IfConverter() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "If Converter"; } + + private: + bool ReverseBranchCondition(BBInfo &BBI); + bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const; + bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups) const; + bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const; + void ScanInstructions(BBInfo &BBI); + BBInfo &AnalyzeBlock(MachineBasicBlock *BB, + std::vector<IfcvtToken*> &Tokens); + bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, + bool isTriangle = false, bool RevBranch = false); + bool AnalyzeBlocks(MachineFunction &MF, + std::vector<IfcvtToken*> &Tokens); + void InvalidatePreds(MachineBasicBlock *BB); + void RemoveExtraEdges(BBInfo &BBI); + bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2); + void PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + SmallVectorImpl<MachineOperand> &Cond); + void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + SmallVectorImpl<MachineOperand> &Cond, + bool IgnoreBr = false); + void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI); + + bool MeetIfcvtSizeLimit(unsigned Size) const { + return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit(); + } + + // blockAlwaysFallThrough - Block ends without a terminator. + bool blockAlwaysFallThrough(BBInfo &BBI) const { + return BBI.IsBrAnalyzable && BBI.TrueBB == NULL; + } + + // IfcvtTokenCmp - Used to sort if-conversion candidates. + static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) { + int Incr1 = (C1->Kind == ICDiamond) + ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups; + int Incr2 = (C2->Kind == ICDiamond) + ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups; + if (Incr1 > Incr2) + return true; + else if (Incr1 == Incr2) { + // Favors subsumption. + if (C1->NeedSubsumption == false && C2->NeedSubsumption == true) + return true; + else if (C1->NeedSubsumption == C2->NeedSubsumption) { + // Favors diamond over triangle, etc. + if ((unsigned)C1->Kind < (unsigned)C2->Kind) + return true; + else if (C1->Kind == C2->Kind) + return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber(); + } + } + return false; + } + }; + + char IfConverter::ID = 0; +} + +static RegisterPass<IfConverter> +X("if-converter", "If Converter"); + +FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } + +bool IfConverter::runOnMachineFunction(MachineFunction &MF) { + TLI = MF.getTarget().getTargetLowering(); + TII = MF.getTarget().getInstrInfo(); + if (!TII) return false; + + static int FnNum = -1; + DOUT << "\nIfcvt: function (" << ++FnNum << ") \'" + << MF.getFunction()->getName() << "\'"; + + if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { + DOUT << " skipped\n"; + return false; + } + DOUT << "\n"; + + MF.RenumberBlocks(); + BBAnalysis.resize(MF.getNumBlockIDs()); + + // Look for root nodes, i.e. blocks without successors. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + if (I->succ_empty()) + Roots.push_back(I); + + std::vector<IfcvtToken*> Tokens; + MadeChange = false; + unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + + NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) { + // Do an initial analysis for each basic block and find all the potential + // candidates to perform if-conversion. + bool Change = AnalyzeBlocks(MF, Tokens); + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + BBInfo &BBI = Token->BBI; + IfcvtKind Kind = Token->Kind; + unsigned NumDups = Token->NumDups; + unsigned NumDups2 = Token->NumDups2; + + delete Token; + + // If the block has been evicted out of the queue or it has already been + // marked dead (due to it being predicated), then skip it. + if (BBI.IsDone) + BBI.IsEnqueued = false; + if (!BBI.IsEnqueued) + continue; + + BBI.IsEnqueued = false; + + bool RetVal = false; + switch (Kind) { + default: assert(false && "Unexpected!"); + break; + case ICSimple: + case ICSimpleFalse: { + bool isFalse = Kind == ICSimpleFalse; + if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; + DOUT << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") + << "): BB#" << BBI.BB->getNumber() << " (" + << ((Kind == ICSimpleFalse) + ? BBI.FalseBB->getNumber() + : BBI.TrueBB->getNumber()) << ") "; + RetVal = IfConvertSimple(BBI, Kind); + DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + if (RetVal) { + if (isFalse) NumSimpleFalse++; + else NumSimple++; + } + break; + } + case ICTriangle: + case ICTriangleRev: + case ICTriangleFalse: + case ICTriangleFRev: { + bool isFalse = Kind == ICTriangleFalse; + bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev); + if (DisableTriangle && !isFalse && !isRev) break; + if (DisableTriangleR && !isFalse && isRev) break; + if (DisableTriangleF && isFalse && !isRev) break; + if (DisableTriangleFR && isFalse && isRev) break; + DOUT << "Ifcvt (Triangle"; + if (isFalse) + DOUT << " false"; + if (isRev) + DOUT << " rev"; + DOUT << "): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "; + RetVal = IfConvertTriangle(BBI, Kind); + DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + if (RetVal) { + if (isFalse) { + if (isRev) NumTriangleFRev++; + else NumTriangleFalse++; + } else { + if (isRev) NumTriangleRev++; + else NumTriangle++; + } + } + break; + } + case ICDiamond: { + if (DisableDiamond) break; + DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "; + RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); + DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + if (RetVal) NumDiamonds++; + break; + } + } + + Change |= RetVal; + + NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev + + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit) + break; + } + + if (!Change) + break; + MadeChange |= Change; + } + + // Delete tokens in case of early exit. + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + delete Token; + } + + Tokens.clear(); + Roots.clear(); + BBAnalysis.clear(); + + return MadeChange; +} + +/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given +/// its 'true' successor. +static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, + MachineBasicBlock *TrueBB) { + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + E = BB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBB = *SI; + if (SuccBB != TrueBB) + return SuccBB; + } + return NULL; +} + +/// ReverseBranchCondition - Reverse the condition of the end of the block +/// branch. Swap block's 'true' and 'false' successors. +bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { + if (!TII->ReverseBranchCondition(BBI.BrCond)) { + TII->RemoveBranch(*BBI.BB); + TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond); + std::swap(BBI.TrueBB, BBI.FalseBB); + return true; + } + return false; +} + +/// getNextBlock - Returns the next block in the function blocks ordering. If +/// it is the end, returns NULL. +static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { + MachineFunction::iterator I = BB; + MachineFunction::iterator E = BB->getParent()->end(); + if (++I == E) + return NULL; + return I; +} + +/// ValidSimple - Returns true if the 'true' block (along with its +/// predecessor) forms a valid simple shape for ifcvt. It also returns the +/// number of instructions that the ifcvt would need to duplicate if performed +/// in Dups. +bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.IsBrAnalyzable) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied || + TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit()) + return false; + Dups = TrueBBI.NonPredSize; + } + + return true; +} + +/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid triangle shape for ifcvt. +/// If 'FalseBranch' is true, it checks if 'true' block's false branch +/// branches to the false branch rather than the other way around. It also +/// returns the number of instructions that the ifcvt would need to duplicate +/// if performed in 'Dups'. +bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied) + return false; + + unsigned Size = TrueBBI.NonPredSize; + if (TrueBBI.IsBrAnalyzable) { + if (TrueBBI.TrueBB && TrueBBI.BrCond.empty()) + // Ends with an unconditional branch. It will be removed. + --Size; + else { + MachineBasicBlock *FExit = FalseBranch + ? TrueBBI.TrueBB : TrueBBI.FalseBB; + if (FExit) + // Require a conditional branch + ++Size; + } + } + if (Size > TLI->getIfCvtDupBlockSizeLimit()) + return false; + Dups = Size; + } + + MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB; + if (!TExit && blockAlwaysFallThrough(TrueBBI)) { + MachineFunction::iterator I = TrueBBI.BB; + if (++I == TrueBBI.BB->getParent()->end()) + return false; + TExit = I; + } + return TExit && TExit == FalseBBI.BB; +} + +static +MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB, + const TargetInstrInfo *TII) { + MachineBasicBlock::iterator I = BB->end(); + while (I != BB->begin()) { + --I; + if (!I->getDesc().isBranch()) + break; + } + return I; +} + +/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid diamond shape for ifcvt. +bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) + return false; + + MachineBasicBlock *TT = TrueBBI.TrueBB; + MachineBasicBlock *FT = FalseBBI.TrueBB; + + if (!TT && blockAlwaysFallThrough(TrueBBI)) + TT = getNextBlock(TrueBBI.BB); + if (!FT && blockAlwaysFallThrough(FalseBBI)) + FT = getNextBlock(FalseBBI.BB); + if (TT != FT) + return false; + if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) + return false; + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // FIXME: Allow true block to have an early exit? + if (TrueBBI.FalseBB || FalseBBI.FalseBB || + (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) + return false; + + MachineBasicBlock::iterator TI = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FI = FalseBBI.BB->begin(); + while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) { + if (!TI->isIdenticalTo(FI)) + break; + ++Dups1; + ++TI; + ++FI; + } + + TI = firstNonBranchInst(TrueBBI.BB, TII); + FI = firstNonBranchInst(FalseBBI.BB, TII); + while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) { + if (!TI->isIdenticalTo(FI)) + break; + ++Dups2; + --TI; + --FI; + } + + return true; +} + +/// ScanInstructions - Scan all the instructions in the block to determine if +/// the block is predicable. In most cases, that means all the instructions +/// in the block are isPredicable(). Also checks if the block contains any +/// instruction which can clobber a predicate (e.g. condition code register). +/// If so, the block is not predicable unless it's the last instruction. +void IfConverter::ScanInstructions(BBInfo &BBI) { + if (BBI.IsDone) + return; + + bool AlreadyPredicated = BBI.Predicate.size() > 0; + // First analyze the end of BB branches. + BBI.TrueBB = BBI.FalseBB = NULL; + BBI.BrCond.clear(); + BBI.IsBrAnalyzable = + !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL; + + if (BBI.BrCond.size()) { + // No false branch. This BB must end with a conditional branch and a + // fallthrough. + if (!BBI.FalseBB) + BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); + assert(BBI.FalseBB && "Expected to find the fallthrough block!"); + } + + // Then scan all the instructions. + BBI.NonPredSize = 0; + BBI.ClobbersPred = false; + for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); + I != E; ++I) { + const TargetInstrDesc &TID = I->getDesc(); + if (TID.isNotDuplicable()) + BBI.CannotBeCopied = true; + + bool isPredicated = TII->isPredicated(I); + bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch(); + + if (!isCondBr) { + if (!isPredicated) + BBI.NonPredSize++; + else if (!AlreadyPredicated) { + // FIXME: This instruction is already predicated before the + // if-conversion pass. It's probably something like a conditional move. + // Mark this block unpredicable for now. + BBI.IsUnpredicable = true; + return; + } + } + + if (BBI.ClobbersPred && !isPredicated) { + // Predicate modification instruction should end the block (except for + // already predicated instructions and end of block branches). + if (isCondBr) { + // A conditional branch is not predicable, but it may be eliminated. + continue; + } + + // Predicate may have been modified, the subsequent (currently) + // unpredicated instructions cannot be correctly predicated. + BBI.IsUnpredicable = true; + return; + } + + // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are + // still potentially predicable. + std::vector<MachineOperand> PredDefs; + if (TII->DefinesPredicate(I, PredDefs)) + BBI.ClobbersPred = true; + + if (!TID.isPredicable()) { + BBI.IsUnpredicable = true; + return; + } + } +} + +/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be +/// predicated by the specified predicate. +bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, + SmallVectorImpl<MachineOperand> &Pred, + bool isTriangle, bool RevBranch) { + // If the block is dead or unpredicable, then it cannot be predicated. + if (BBI.IsDone || BBI.IsUnpredicable) + return false; + + // If it is already predicated, check if its predicate subsumes the new + // predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred)) + return false; + + if (BBI.BrCond.size()) { + if (!isTriangle) + return false; + + // Test predicate subsumption. + SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end()); + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (RevBranch) { + if (TII->ReverseBranchCondition(Cond)) + return false; + } + if (TII->ReverseBranchCondition(RevPred) || + !TII->SubsumesPredicate(Cond, RevPred)) + return false; + } + + return true; +} + +/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from +/// the specified block. Record its successors and whether it looks like an +/// if-conversion candidate. +IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, + std::vector<IfcvtToken*> &Tokens) { + BBInfo &BBI = BBAnalysis[BB->getNumber()]; + + if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed) + return BBI; + + BBI.BB = BB; + BBI.IsBeingAnalyzed = true; + + ScanInstructions(BBI); + + // Unanalyzable or ends with fallthrough or unconditional branch. + if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + // Do not ifcvt if either path is a back edge to the entry block. + if (BBI.TrueBB == BB || BBI.FalseBB == BB) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens); + BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens); + + if (TrueBBI.IsDone && FalseBBI.IsDone) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + bool CanRevCond = !TII->ReverseBranchCondition(RevCond); + + unsigned Dups = 0; + unsigned Dups2 = 0; + bool TNeedSub = TrueBBI.Predicate.size() > 0; + bool FNeedSub = FalseBBI.Predicate.size() > 0; + bool Enqueued = false; + if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + // Diamond: + // EBB + // / \_ + // | | + // TBB FBB + // \ / + // TailBB + // Note TailBB can be empty. + Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups, + Dups2)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { + // Triangle: + // EBB + // | \_ + // | | + // | TBB + // | / + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(TrueBBI, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { + // Simple (split, no rejoin): + // EBB + // | \_ + // | | + // | TBB---> exit + // | + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups)); + Enqueued = true; + } + + if (CanRevCond) { + // Try the other path... + if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(FalseBBI, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); + Enqueued = true; + } + } + + BBI.IsEnqueued = Enqueued; + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; +} + +/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion +/// candidates. It returns true if any CFG restructuring is done to expose more +/// if-conversion opportunities. +bool IfConverter::AnalyzeBlocks(MachineFunction &MF, + std::vector<IfcvtToken*> &Tokens) { + bool Change = false; + std::set<MachineBasicBlock*> Visited; + for (unsigned i = 0, e = Roots.size(); i != e; ++i) { + for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited), + E = idf_ext_end(Roots[i], Visited); I != E; ++I) { + MachineBasicBlock *BB = *I; + AnalyzeBlock(BB, Tokens); + } + } + + // Sort to favor more complex ifcvt scheme. + std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); + + return Change; +} + +/// canFallThroughTo - Returns true either if ToBB is the next block after BB or +/// that all the intervening blocks are empty (given BB can fall through to its +/// next block). +static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { + MachineFunction::iterator I = BB; + MachineFunction::iterator TI = ToBB; + MachineFunction::iterator E = BB->getParent()->end(); + while (++I != TI) + if (I == E || !I->empty()) + return false; + return true; +} + +/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed +/// to determine if it can be if-converted. If predecessor is already enqueued, +/// dequeue it! +void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()]; + if (PBBI.IsDone || PBBI.BB == BB) + continue; + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } +} + +/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB. +/// +static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, + const TargetInstrInfo *TII) { + SmallVector<MachineOperand, 0> NoCond; + TII->InsertBranch(*BB, ToBB, NULL, NoCond); +} + +/// RemoveExtraEdges - Remove true / false edges if either / both are no longer +/// successors. +void IfConverter::RemoveExtraEdges(BBInfo &BBI) { + MachineBasicBlock *TBB = NULL, *FBB = NULL; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond)) + BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); +} + +/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. +/// +bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (Kind == ICSimpleFalse) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (Kind == ICSimpleFalse) + if (TII->ReverseBranchCondition(Cond)) + assert(false && "Unable to reverse branch condition!"); + + if (CvtBBI->BB->pred_size() > 1) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them, and add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond); + } else { + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + + // Merge converted block into entry block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI); + } + + bool IterIfcvt = true; + if (!canFallThroughTo(BBI.BB, NextBBI->BB)) { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + // Now ifcvt'd block will look like this: + // BB: + // ... + // t, f = cmp + // if t op + // b BBf + // + // We cannot further ifcvt this block because the unconditional branch + // will have to be predicated on the new condition, that will not be + // available if cmp executes. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertTriangle - If convert a triangle sub-CFG. +/// +bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + if (TII->ReverseBranchCondition(Cond)) + assert(false && "Unable to reverse branch condition!"); + + if (Kind == ICTriangleRev || Kind == ICTriangleFRev) { + if (ReverseBranchCondition(*CvtBBI)) { + // BB has been changed, modify its predecessors (except for this + // one) so they don't get ifcvt'ed based on bad intel. + for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(), + E = CvtBBI->BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PBB = *PI; + if (PBB == BBI.BB) + continue; + BBInfo &PBBI = BBAnalysis[PBB->getNumber()]; + if (PBBI.IsEnqueued) { + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } + } + } + } + + bool HasEarlyExit = CvtBBI->FalseBB != NULL; + bool DupBB = CvtBBI->BB->pred_size() > 1; + if (DupBB) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them, and add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); + } else { + // Predicate the 'true' block after removing its branch. + CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + + // Now merge the entry of the triangle with the true block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI); + } + + // If 'true' block has a 'false' successor, add an exit branch to it. + if (HasEarlyExit) { + SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(), + CvtBBI->BrCond.end()); + if (TII->ReverseBranchCondition(RevCond)) + assert(false && "Unable to reverse branch condition!"); + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond); + BBI.BB->addSuccessor(CvtBBI->FalseBB); + } + + // Merge in the 'false' block if the 'false' block has no other + // predecessors. Otherwise, add an unconditional branch to 'false'. + bool FalseBBDead = false; + bool IterIfcvt = true; + bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB); + if (!isFallThrough) { + // Only merge them if the true block does not fallthrough to the false + // block. By not merging them, we make it possible to iteratively + // ifcvt the blocks. + if (!HasEarlyExit && + NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) { + MergeBlocks(BBI, *NextBBI); + FalseBBDead = true; + } else { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + } + // Mixed predicated and unpredicated code. This cannot be iteratively + // predicated. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + if (FalseBBDead) + NextBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertDiamond - If convert a diamond sub-CFG. +/// +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + MachineBasicBlock *TailBB = TrueBBI.TrueBB; + // True block must fall through or end with an unanalyzable terminator. + if (!TailBB) { + if (blockAlwaysFallThrough(TrueBBI)) + TailBB = FalseBBI.TrueBB; + assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); + } + + if (TrueBBI.IsDone || FalseBBI.IsDone || + TrueBBI.BB->pred_size() > 1 || + FalseBBI.BB->pred_size() > 1) { + // Something has changed. It's no longer safe to predicate these blocks. + BBI.IsAnalyzed = false; + TrueBBI.IsAnalyzed = false; + FalseBBI.IsAnalyzed = false; + return false; + } + + // Merge the 'true' and 'false' blocks by copying the instructions + // from the 'false' block to the 'true' block. That is, unless the true + // block would clobber the predicate, in that case, do the opposite. + BBInfo *BBI1 = &TrueBBI; + BBInfo *BBI2 = &FalseBBI; + SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (TII->ReverseBranchCondition(RevCond)) + assert(false && "Unable to reverse branch condition!"); + SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond; + SmallVector<MachineOperand, 4> *Cond2 = &RevCond; + + // Figure out the more profitable ordering. + bool DoSwap = false; + if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred) + DoSwap = true; + else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) { + if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) + DoSwap = true; + } + if (DoSwap) { + std::swap(BBI1, BBI2); + std::swap(Cond1, Cond2); + } + + // Remove the conditional branch from entry to the blocks. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + + // Remove the duplicated instructions at the beginnings of both paths. + MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); + MachineBasicBlock::iterator DI2 = BBI2->BB->begin(); + BBI1->NonPredSize -= NumDups1; + BBI2->NonPredSize -= NumDups1; + while (NumDups1 != 0) { + ++DI1; + ++DI2; + --NumDups1; + } + BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); + BBI2->BB->erase(BBI2->BB->begin(), DI2); + + // Predicate the 'true' block after removing its branch. + BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + DI1 = BBI1->BB->end(); + for (unsigned i = 0; i != NumDups2; ++i) + --DI1; + BBI1->BB->erase(DI1, BBI1->BB->end()); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1); + + // Predicate the 'false' block. + BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); + DI2 = BBI2->BB->end(); + while (NumDups2 != 0) { + --DI2; + --NumDups2; + } + PredicateBlock(*BBI2, DI2, *Cond2); + + // Merge the true block into the entry of the diamond. + MergeBlocks(BBI, *BBI1); + MergeBlocks(BBI, *BBI2); + + // If the if-converted block falls through or unconditionally branches into + // the tail block, and the tail block does not have other predecessors, then + // fold the tail block in as well. Otherwise, unless it falls through to the + // tail, add a unconditional branch to it. + if (TailBB) { + BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; + if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, TailBBI); + TailBBI.IsDone = true; + } else { + InsertUncondBranch(BBI.BB, TailBB, TII); + BBI.HasFallThrough = false; + } + } + + RemoveExtraEdges(BBI); + + // Update block info. + BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; + InvalidatePreds(BBI.BB); + + // FIXME: Must maintain LiveIns. + return true; +} + +/// PredicateBlock - Predicate instructions from the start of the block to the +/// specified end with the specified condition. +void IfConverter::PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + SmallVectorImpl<MachineOperand> &Cond) { + for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { + if (TII->isPredicated(I)) + continue; + if (!TII->PredicateInstruction(I, Cond)) { + cerr << "Unable to predicate " << *I << "!\n"; + abort(); + } + } + + std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); + + BBI.IsAnalyzed = false; + BBI.NonPredSize = 0; + + NumIfConvBBs++; +} + +/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to +/// the destination block. Skip end of block branches if IgnoreBr is true. +void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + SmallVectorImpl<MachineOperand> &Cond, + bool IgnoreBr) { + MachineFunction &MF = *ToBBI.BB->getParent(); + + for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), + E = FromBBI.BB->end(); I != E; ++I) { + const TargetInstrDesc &TID = I->getDesc(); + bool isPredicated = TII->isPredicated(I); + // Do not copy the end of the block branches. + if (IgnoreBr && !isPredicated && TID.isBranch()) + break; + + MachineInstr *MI = MF.CloneMachineInstr(I); + ToBBI.BB->insert(ToBBI.BB->end(), MI); + ToBBI.NonPredSize++; + + if (!isPredicated) + if (!TII->PredicateInstruction(MI, Cond)) { + cerr << "Unable to predicate " << *MI << "!\n"; + abort(); + } + } + + std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + ToBBI.BB->addSuccessor(Succ); + } + + std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), + std::back_inserter(ToBBI.Predicate)); + std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate)); + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.IsAnalyzed = false; + + NumDupBBs++; +} + +/// MergeBlocks - Move all instructions from FromBB to the end of ToBB. +/// +void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) { + ToBBI.BB->splice(ToBBI.BB->end(), + FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); + + // Redirect all branches to FromBB to ToBB. + std::vector<MachineBasicBlock *> Preds(FromBBI.BB->pred_begin(), + FromBBI.BB->pred_end()); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + MachineBasicBlock *Pred = Preds[i]; + if (Pred == ToBBI.BB) + continue; + Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB); + } + + std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + FromBBI.BB->removeSuccessor(Succ); + ToBBI.BB->addSuccessor(Succ); + } + + // Now FromBBI always falls through to the next block! + if (NBB && !FromBBI.BB->isSuccessor(NBB)) + FromBBI.BB->addSuccessor(NBB); + + std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), + std::back_inserter(ToBBI.Predicate)); + FromBBI.Predicate.clear(); + + ToBBI.NonPredSize += FromBBI.NonPredSize; + FromBBI.NonPredSize = 0; + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.HasFallThrough = FromBBI.HasFallThrough; + ToBBI.IsAnalyzed = false; + FromBBI.IsAnalyzed = false; +} diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp new file mode 100644 index 0000000..e6912b8 --- /dev/null +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -0,0 +1,892 @@ +//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IntrinsicLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +template <class ArgIt> +static void EnsureFunctionExists(Module &M, const char *Name, + ArgIt ArgBegin, ArgIt ArgEnd, + const Type *RetTy) { + // Insert a correctly-typed definition now. + std::vector<const Type *> ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back(I->getType()); + M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); +} + +static void EnsureFPIntrinsicsExist(Module &M, Function *Fn, + const char *FName, + const char *DName, const char *LDName) { + // Insert definitions for all the floating point types. + switch((int)Fn->arg_begin()->getType()->getTypeID()) { + case Type::FloatTyID: + EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(), + Type::FloatTy); + break; + case Type::DoubleTyID: + EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(), + Type::DoubleTy); + break; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(), + Fn->arg_begin()->getType()); + break; + } +} + +/// ReplaceCallWith - This function is used when we want to lower an intrinsic +/// call to a call of an external function. This handles hard cases such as +/// when there was already a prototype for the external function, and if that +/// prototype doesn't match the arguments we expect to pass in. +template <class ArgIt> +static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, + ArgIt ArgBegin, ArgIt ArgEnd, + const Type *RetTy, Constant *&FCache) { + if (!FCache) { + // If we haven't already looked up this function, check to see if the + // program already contains a function with this name. + Module *M = CI->getParent()->getParent()->getParent(); + // Get or insert the definition now. + std::vector<const Type *> ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back((*I)->getType()); + FCache = M->getOrInsertFunction(NewFn, + FunctionType::get(RetTy, ParamTys, false)); + } + + IRBuilder<> Builder(CI->getParent(), CI); + SmallVector<Value *, 8> Args(ArgBegin, ArgEnd); + CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end()); + NewCI->setName(CI->getName()); + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewCI); + return NewCI; +} + +void IntrinsicLowering::AddPrototypes(Module &M) { + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->isDeclaration() && !I->use_empty()) + switch (I->getIntrinsicID()) { + default: break; + case Intrinsic::setjmp: + EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(), + Type::Int32Ty); + break; + case Intrinsic::longjmp: + EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(), + Type::VoidTy); + break; + case Intrinsic::siglongjmp: + EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(), + Type::VoidTy); + break; + case Intrinsic::memcpy: + M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + TD.getIntPtrType(), (Type *)0); + break; + case Intrinsic::memmove: + M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + TD.getIntPtrType(), (Type *)0); + break; + case Intrinsic::memset: + M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + Type::Int32Ty, + TD.getIntPtrType(), (Type *)0); + break; + case Intrinsic::sqrt: + EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); + break; + case Intrinsic::sin: + EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl"); + break; + case Intrinsic::cos: + EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl"); + break; + case Intrinsic::pow: + EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl"); + break; + case Intrinsic::log: + EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl"); + break; + case Intrinsic::log2: + EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l"); + break; + case Intrinsic::log10: + EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l"); + break; + case Intrinsic::exp: + EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl"); + break; + case Intrinsic::exp2: + EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l"); + break; + } +} + +/// LowerBSWAP - Emit the code to lower bswap of V before the specified +/// instruction IP. +static Value *LowerBSWAP(Value *V, Instruction *IP) { + assert(V->getType()->isInteger() && "Can't bswap a non-integer type!"); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + + IRBuilder<> Builder(IP->getParent(), IP); + + switch(BitSize) { + default: assert(0 && "Unhandled type size of value to byteswap!"); + case 16: { + Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.2"); + Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.1"); + V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16"); + break; + } + case 32: { + Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24), + "bswap.4"); + Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.3"); + Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.2"); + Value *Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24), + "bswap.1"); + Tmp3 = Builder.CreateAnd(Tmp3, ConstantInt::get(Type::Int32Ty, 0xFF0000), + "bswap.and3"); + Tmp2 = Builder.CreateAnd(Tmp2, ConstantInt::get(Type::Int32Ty, 0xFF00), + "bswap.and2"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1"); + Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2"); + V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32"); + break; + } + case 64: { + Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56), + "bswap.8"); + Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40), + "bswap.7"); + Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24), + "bswap.6"); + Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.5"); + Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.4"); + Value* Tmp3 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24), + "bswap.3"); + Value* Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 40), + "bswap.2"); + Value* Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 56), + "bswap.1"); + Tmp7 = Builder.CreateAnd(Tmp7, + ConstantInt::get(Type::Int64Ty, + 0xFF000000000000ULL), + "bswap.and7"); + Tmp6 = Builder.CreateAnd(Tmp6, + ConstantInt::get(Type::Int64Ty, + 0xFF0000000000ULL), + "bswap.and6"); + Tmp5 = Builder.CreateAnd(Tmp5, + ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL), + "bswap.and5"); + Tmp4 = Builder.CreateAnd(Tmp4, + ConstantInt::get(Type::Int64Ty, 0xFF000000ULL), + "bswap.and4"); + Tmp3 = Builder.CreateAnd(Tmp3, + ConstantInt::get(Type::Int64Ty, 0xFF0000ULL), + "bswap.and3"); + Tmp2 = Builder.CreateAnd(Tmp2, + ConstantInt::get(Type::Int64Ty, 0xFF00ULL), + "bswap.and2"); + Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1"); + Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3"); + Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4"); + Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6"); + V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64"); + break; + } + } + return V; +} + +/// LowerCTPOP - Emit the code to lower ctpop of V before the specified +/// instruction IP. +static Value *LowerCTPOP(Value *V, Instruction *IP) { + assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!"); + + static const uint64_t MaskValues[6] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, + 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, + 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL + }; + + IRBuilder<> Builder(IP->getParent(), IP); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + unsigned WordSize = (BitSize + 63) / 64; + Value *Count = ConstantInt::get(V->getType(), 0); + + for (unsigned n = 0; n < WordSize; ++n) { + Value *PartValue = V; + for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); + i <<= 1, ++ct) { + Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]); + Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1"); + Value *VShift = Builder.CreateLShr(PartValue, + ConstantInt::get(V->getType(), i), + "ctpop.sh"); + Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2"); + PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step"); + } + Count = Builder.CreateAdd(PartValue, Count, "ctpop.part"); + if (BitSize > 64) { + V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64), + "ctpop.part.sh"); + BitSize -= 64; + } + } + + return Count; +} + +/// LowerCTLZ - Emit the code to lower ctlz of V before the specified +/// instruction IP. +static Value *LowerCTLZ(Value *V, Instruction *IP) { + + IRBuilder<> Builder(IP->getParent(), IP); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + for (unsigned i = 1; i < BitSize; i <<= 1) { + Value *ShVal = ConstantInt::get(V->getType(), i); + ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh"); + V = Builder.CreateOr(V, ShVal, "ctlz.step"); + } + + V = Builder.CreateNot(V); + return LowerCTPOP(V, IP); +} + +/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes +/// three integer arguments. The first argument is the Value from which the +/// bits will be selected. It may be of any bit width. The second and third +/// arguments specify a range of bits to select with the second argument +/// specifying the low bit and the third argument specifying the high bit. Both +/// must be type i32. The result is the corresponding selected bits from the +/// Value in the same width as the Value (first argument). If the low bit index +/// is higher than the high bit index then the inverse selection is done and +/// the bits are returned in inverse order. +/// @brief Lowering of llvm.part.select intrinsic. +static Instruction *LowerPartSelect(CallInst *CI) { + IRBuilder<> Builder; + + // Make sure we're dealing with a part select intrinsic here + Function *F = CI->getCalledFunction(); + const FunctionType *FT = F->getFunctionType(); + if (!F->isDeclaration() || !FT->getReturnType()->isInteger() || + FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() || + !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger()) + return CI; + + // Get the intrinsic implementation function by converting all the . to _ + // in the intrinsic's function name and then reconstructing the function + // declaration. + std::string Name(F->getName()); + for (unsigned i = 4; i < Name.length(); ++i) + if (Name[i] == '.') + Name[i] = '_'; + Module* M = F->getParent(); + F = cast<Function>(M->getOrInsertFunction(Name, FT)); + F->setLinkage(GlobalValue::WeakAnyLinkage); + + // If we haven't defined the impl function yet, do so now + if (F->isDeclaration()) { + + // Get the arguments to the function + Function::arg_iterator args = F->arg_begin(); + Value* Val = args++; Val->setName("Val"); + Value* Lo = args++; Lo->setName("Lo"); + Value* Hi = args++; Hi->setName("High"); + + // We want to select a range of bits here such that [Hi, Lo] is shifted + // down to the low bits. However, it is quite possible that Hi is smaller + // than Lo in which case the bits have to be reversed. + + // Create the blocks we will need for the two cases (forward, reverse) + BasicBlock* CurBB = BasicBlock::Create("entry", F); + BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent()); + BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent()); + BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent()); + BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent()); + BasicBlock *RsltBlk = BasicBlock::Create("result", CurBB->getParent()); + + Builder.SetInsertPoint(CurBB); + + // Cast Hi and Lo to the size of Val so the widths are all the same + if (Hi->getType() != Val->getType()) + Hi = Builder.CreateIntCast(Hi, Val->getType(), /* isSigned */ false, + "tmp"); + if (Lo->getType() != Val->getType()) + Lo = Builder.CreateIntCast(Lo, Val->getType(), /* isSigned */ false, + "tmp"); + + // Compute a few things that both cases will need, up front. + Constant* Zero = ConstantInt::get(Val->getType(), 0); + Constant* One = ConstantInt::get(Val->getType(), 1); + Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType()); + + // Compare the Hi and Lo bit positions. This is used to determine + // which case we have (forward or reverse) + Value *Cmp = Builder.CreateICmpULT(Hi, Lo, "less"); + Builder.CreateCondBr(Cmp, RevSize, FwdSize); + + // First, compute the number of bits in the forward case. + Builder.SetInsertPoint(FwdSize); + Value* FBitSize = Builder.CreateSub(Hi, Lo, "fbits"); + Builder.CreateBr(Compute); + + // Second, compute the number of bits in the reverse case. + Builder.SetInsertPoint(RevSize); + Value* RBitSize = Builder.CreateSub(Lo, Hi, "rbits"); + Builder.CreateBr(Compute); + + // Now, compute the bit range. Start by getting the bitsize and the shift + // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for + // the number of bits we want in the range. We shift the bits down to the + // least significant bits, apply the mask to zero out unwanted high bits, + // and we have computed the "forward" result. It may still need to be + // reversed. + Builder.SetInsertPoint(Compute); + + // Get the BitSize from one of the two subtractions + PHINode *BitSize = Builder.CreatePHI(Val->getType(), "bits"); + BitSize->reserveOperandSpace(2); + BitSize->addIncoming(FBitSize, FwdSize); + BitSize->addIncoming(RBitSize, RevSize); + + // Get the ShiftAmount as the smaller of Hi/Lo + PHINode *ShiftAmt = Builder.CreatePHI(Val->getType(), "shiftamt"); + ShiftAmt->reserveOperandSpace(2); + ShiftAmt->addIncoming(Lo, FwdSize); + ShiftAmt->addIncoming(Hi, RevSize); + + // Increment the bit size + Value *BitSizePlusOne = Builder.CreateAdd(BitSize, One, "bits"); + + // Create a Mask to zero out the high order bits. + Value* Mask = Builder.CreateShl(AllOnes, BitSizePlusOne, "mask"); + Mask = Builder.CreateNot(Mask, "mask"); + + // Shift the bits down and apply the mask + Value* FRes = Builder.CreateLShr(Val, ShiftAmt, "fres"); + FRes = Builder.CreateAnd(FRes, Mask, "fres"); + Builder.CreateCondBr(Cmp, Reverse, RsltBlk); + + // In the Reverse block we have the mask already in FRes but we must reverse + // it by shifting FRes bits right and putting them in RRes by shifting them + // in from left. + Builder.SetInsertPoint(Reverse); + + // First set up our loop counters + PHINode *Count = Builder.CreatePHI(Val->getType(), "count"); + Count->reserveOperandSpace(2); + Count->addIncoming(BitSizePlusOne, Compute); + + // Next, get the value that we are shifting. + PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val"); + BitsToShift->reserveOperandSpace(2); + BitsToShift->addIncoming(FRes, Compute); + + // Finally, get the result of the last computation + PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres"); + RRes->reserveOperandSpace(2); + RRes->addIncoming(Zero, Compute); + + // Decrement the counter + Value *Decr = Builder.CreateSub(Count, One, "decr"); + Count->addIncoming(Decr, Reverse); + + // Compute the Bit that we want to move + Value *Bit = Builder.CreateAnd(BitsToShift, One, "bit"); + + // Compute the new value for next iteration. + Value *NewVal = Builder.CreateLShr(BitsToShift, One, "rshift"); + BitsToShift->addIncoming(NewVal, Reverse); + + // Shift the bit into the low bits of the result. + Value *NewRes = Builder.CreateShl(RRes, One, "lshift"); + NewRes = Builder.CreateOr(NewRes, Bit, "addbit"); + RRes->addIncoming(NewRes, Reverse); + + // Terminate loop if we've moved all the bits. + Value *Cond = Builder.CreateICmpEQ(Decr, Zero, "cond"); + Builder.CreateCondBr(Cond, RsltBlk, Reverse); + + // Finally, in the result block, select one of the two results with a PHI + // node and return the result; + Builder.SetInsertPoint(RsltBlk); + PHINode *BitSelect = Builder.CreatePHI(Val->getType(), "part_select"); + BitSelect->reserveOperandSpace(2); + BitSelect->addIncoming(FRes, Compute); + BitSelect->addIncoming(NewRes, Reverse); + Builder.CreateRet(BitSelect); + } + + // Return a call to the implementation function + Builder.SetInsertPoint(CI->getParent(), CI); + CallInst *NewCI = Builder.CreateCall3(F, CI->getOperand(1), + CI->getOperand(2), CI->getOperand(3)); + NewCI->setName(CI->getName()); + return NewCI; +} + +/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes +/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High) +/// The first two arguments can be any bit width. The result is the same width +/// as %Value. The operation replaces bits between %Low and %High with the value +/// in %Replacement. If %Replacement is not the same width, it is truncated or +/// zero extended as appropriate to fit the bits being replaced. If %Low is +/// greater than %High then the inverse set of bits are replaced. +/// @brief Lowering of llvm.bit.part.set intrinsic. +static Instruction *LowerPartSet(CallInst *CI) { + IRBuilder<> Builder; + + // Make sure we're dealing with a part select intrinsic here + Function *F = CI->getCalledFunction(); + const FunctionType *FT = F->getFunctionType(); + if (!F->isDeclaration() || !FT->getReturnType()->isInteger() || + FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() || + !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() || + !FT->getParamType(3)->isInteger()) + return CI; + + // Get the intrinsic implementation function by converting all the . to _ + // in the intrinsic's function name and then reconstructing the function + // declaration. + std::string Name(F->getName()); + for (unsigned i = 4; i < Name.length(); ++i) + if (Name[i] == '.') + Name[i] = '_'; + Module* M = F->getParent(); + F = cast<Function>(M->getOrInsertFunction(Name, FT)); + F->setLinkage(GlobalValue::WeakAnyLinkage); + + // If we haven't defined the impl function yet, do so now + if (F->isDeclaration()) { + // Get the arguments for the function. + Function::arg_iterator args = F->arg_begin(); + Value* Val = args++; Val->setName("Val"); + Value* Rep = args++; Rep->setName("Rep"); + Value* Lo = args++; Lo->setName("Lo"); + Value* Hi = args++; Hi->setName("Hi"); + + // Get some types we need + const IntegerType* ValTy = cast<IntegerType>(Val->getType()); + const IntegerType* RepTy = cast<IntegerType>(Rep->getType()); + uint32_t RepBits = RepTy->getBitWidth(); + + // Constant Definitions + ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits); + ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy); + ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy); + ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1); + ConstantInt* ValOne = ConstantInt::get(ValTy, 1); + ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0); + ConstantInt* ValZero = ConstantInt::get(ValTy, 0); + + // Basic blocks we fill in below. + BasicBlock* entry = BasicBlock::Create("entry", F, 0); + BasicBlock* large = BasicBlock::Create("large", F, 0); + BasicBlock* small = BasicBlock::Create("small", F, 0); + BasicBlock* reverse = BasicBlock::Create("reverse", F, 0); + BasicBlock* result = BasicBlock::Create("result", F, 0); + + // BASIC BLOCK: entry + Builder.SetInsertPoint(entry); + // First, get the number of bits that we're placing as an i32 + Value* is_forward = Builder.CreateICmpULT(Lo, Hi); + Value* Hi_pn = Builder.CreateSelect(is_forward, Hi, Lo); + Value* Lo_pn = Builder.CreateSelect(is_forward, Lo, Hi); + Value* NumBits = Builder.CreateSub(Hi_pn, Lo_pn); + NumBits = Builder.CreateAdd(NumBits, One); + // Now, convert Lo and Hi to ValTy bit width + Lo = Builder.CreateIntCast(Lo_pn, ValTy, /* isSigned */ false); + // Determine if the replacement bits are larger than the number of bits we + // are replacing and deal with it. + Value* is_large = Builder.CreateICmpULT(NumBits, RepBitWidth); + Builder.CreateCondBr(is_large, large, small); + + // BASIC BLOCK: large + Builder.SetInsertPoint(large); + Value* MaskBits = Builder.CreateSub(RepBitWidth, NumBits); + MaskBits = Builder.CreateIntCast(MaskBits, RepMask->getType(), + /* isSigned */ false); + Value* Mask1 = Builder.CreateLShr(RepMask, MaskBits); + Value* Rep2 = Builder.CreateAnd(Mask1, Rep); + Builder.CreateBr(small); + + // BASIC BLOCK: small + Builder.SetInsertPoint(small); + PHINode* Rep3 = Builder.CreatePHI(RepTy); + Rep3->reserveOperandSpace(2); + Rep3->addIncoming(Rep2, large); + Rep3->addIncoming(Rep, entry); + Value* Rep4 = Builder.CreateIntCast(Rep3, ValTy, /* isSigned */ false); + Builder.CreateCondBr(is_forward, result, reverse); + + // BASIC BLOCK: reverse (reverses the bits of the replacement) + Builder.SetInsertPoint(reverse); + // Set up our loop counter as a PHI so we can decrement on each iteration. + // We will loop for the number of bits in the replacement value. + PHINode *Count = Builder.CreatePHI(Type::Int32Ty, "count"); + Count->reserveOperandSpace(2); + Count->addIncoming(NumBits, small); + + // Get the value that we are shifting bits out of as a PHI because + // we'll change this with each iteration. + PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val"); + BitsToShift->reserveOperandSpace(2); + BitsToShift->addIncoming(Rep4, small); + + // Get the result of the last computation or zero on first iteration + PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres"); + RRes->reserveOperandSpace(2); + RRes->addIncoming(ValZero, small); + + // Decrement the loop counter by one + Value *Decr = Builder.CreateSub(Count, One); + Count->addIncoming(Decr, reverse); + + // Get the bit that we want to move into the result + Value *Bit = Builder.CreateAnd(BitsToShift, ValOne); + + // Compute the new value of the bits to shift for the next iteration. + Value *NewVal = Builder.CreateLShr(BitsToShift, ValOne); + BitsToShift->addIncoming(NewVal, reverse); + + // Shift the bit we extracted into the low bit of the result. + Value *NewRes = Builder.CreateShl(RRes, ValOne); + NewRes = Builder.CreateOr(NewRes, Bit); + RRes->addIncoming(NewRes, reverse); + + // Terminate loop if we've moved all the bits. + Value *Cond = Builder.CreateICmpEQ(Decr, Zero); + Builder.CreateCondBr(Cond, result, reverse); + + // BASIC BLOCK: result + Builder.SetInsertPoint(result); + PHINode *Rplcmnt = Builder.CreatePHI(Val->getType()); + Rplcmnt->reserveOperandSpace(2); + Rplcmnt->addIncoming(NewRes, reverse); + Rplcmnt->addIncoming(Rep4, small); + Value* t0 = Builder.CreateIntCast(NumBits, ValTy, /* isSigned */ false); + Value* t1 = Builder.CreateShl(ValMask, Lo); + Value* t2 = Builder.CreateNot(t1); + Value* t3 = Builder.CreateShl(t1, t0); + Value* t4 = Builder.CreateOr(t2, t3); + Value* t5 = Builder.CreateAnd(t4, Val); + Value* t6 = Builder.CreateShl(Rplcmnt, Lo); + Value* Rslt = Builder.CreateOr(t5, t6, "part_set"); + Builder.CreateRet(Rslt); + } + + // Return a call to the implementation function + Builder.SetInsertPoint(CI->getParent(), CI); + CallInst *NewCI = Builder.CreateCall4(F, CI->getOperand(1), + CI->getOperand(2), CI->getOperand(3), + CI->getOperand(4)); + NewCI->setName(CI->getName()); + return NewCI; +} + +static void ReplaceFPIntrinsicWithCall(CallInst *CI, Constant *FCache, + Constant *DCache, Constant *LDCache, + const char *Fname, const char *Dname, + const char *LDname) { + switch (CI->getOperand(1)->getType()->getTypeID()) { + default: assert(0 && "Invalid type in intrinsic"); abort(); + case Type::FloatTyID: + ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(), + Type::FloatTy, FCache); + break; + case Type::DoubleTyID: + ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(), + Type::DoubleTy, DCache); + break; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(), + CI->getOperand(1)->getType(), LDCache); + break; + } +} + +void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { + IRBuilder<> Builder(CI->getParent(), CI); + + Function *Callee = CI->getCalledFunction(); + assert(Callee && "Cannot lower an indirect call!"); + + switch (Callee->getIntrinsicID()) { + case Intrinsic::not_intrinsic: + cerr << "Cannot lower a call to a non-intrinsic function '" + << Callee->getName() << "'!\n"; + abort(); + default: + cerr << "Error: Code generator does not support intrinsic function '" + << Callee->getName() << "'!\n"; + abort(); + + // The setjmp/longjmp intrinsics should only exist in the code if it was + // never optimized (ie, right out of the CFE), or if it has been hacked on + // by the lowerinvoke pass. In both cases, the right thing to do is to + // convert the call to an explicit setjmp or longjmp call. + case Intrinsic::setjmp: { + static Constant *SetjmpFCache = 0; + Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(), + Type::Int32Ty, SetjmpFCache); + if (CI->getType() != Type::VoidTy) + CI->replaceAllUsesWith(V); + break; + } + case Intrinsic::sigsetjmp: + if (CI->getType() != Type::VoidTy) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + + case Intrinsic::longjmp: { + static Constant *LongjmpFCache = 0; + ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(), + Type::VoidTy, LongjmpFCache); + break; + } + + case Intrinsic::siglongjmp: { + // Insert the call to abort + static Constant *AbortFCache = 0; + ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), + Type::VoidTy, AbortFCache); + break; + } + case Intrinsic::ctpop: + CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI)); + break; + + case Intrinsic::bswap: + CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI)); + break; + + case Intrinsic::ctlz: + CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI)); + break; + + case Intrinsic::cttz: { + // cttz(x) -> ctpop(~X & (X-1)) + Value *Src = CI->getOperand(1); + Value *NotSrc = Builder.CreateNot(Src); + NotSrc->setName(Src->getName() + ".not"); + Value *SrcM1 = ConstantInt::get(Src->getType(), 1); + SrcM1 = Builder.CreateSub(Src, SrcM1); + Src = LowerCTPOP(Builder.CreateAnd(NotSrc, SrcM1), CI); + CI->replaceAllUsesWith(Src); + break; + } + + case Intrinsic::part_select: + CI->replaceAllUsesWith(LowerPartSelect(CI)); + break; + + case Intrinsic::part_set: + CI->replaceAllUsesWith(LowerPartSet(CI)); + break; + + case Intrinsic::stacksave: + case Intrinsic::stackrestore: { + static bool Warned = false; + if (!Warned) + cerr << "WARNING: this target does not support the llvm.stack" + << (Callee->getIntrinsicID() == Intrinsic::stacksave ? + "save" : "restore") << " intrinsic.\n"; + Warned = true; + if (Callee->getIntrinsicID() == Intrinsic::stacksave) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + } + + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + cerr << "WARNING: this target does not support the llvm." + << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? + "return" : "frame") << "address intrinsic.\n"; + CI->replaceAllUsesWith(ConstantPointerNull::get( + cast<PointerType>(CI->getType()))); + break; + + case Intrinsic::prefetch: + break; // Simply strip out prefetches on unsupported architectures + + case Intrinsic::pcmarker: + break; // Simply strip out pcmarker on unsupported architectures + case Intrinsic::readcyclecounter: { + cerr << "WARNING: this target does not support the llvm.readcyclecoun" + << "ter intrinsic. It is being lowered to a constant 0\n"; + CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0)); + break; + } + + case Intrinsic::dbg_stoppoint: + case Intrinsic::dbg_region_start: + case Intrinsic::dbg_region_end: + case Intrinsic::dbg_func_start: + case Intrinsic::dbg_declare: + break; // Simply strip out debugging intrinsics + + case Intrinsic::eh_exception: + case Intrinsic::eh_selector_i32: + case Intrinsic::eh_selector_i64: + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + + case Intrinsic::eh_typeid_for_i32: + case Intrinsic::eh_typeid_for_i64: + // Return something different to eh_selector. + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); + break; + + case Intrinsic::var_annotation: + break; // Strip out annotate intrinsic + + case Intrinsic::memcpy: { + static Constant *MemcpyFCache = 0; + const IntegerType *IntPtr = TD.getIntPtrType(); + Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getOperand(1); + Ops[1] = CI->getOperand(2); + Ops[2] = Size; + ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(), + MemcpyFCache); + break; + } + case Intrinsic::memmove: { + static Constant *MemmoveFCache = 0; + const IntegerType *IntPtr = TD.getIntPtrType(); + Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getOperand(1); + Ops[1] = CI->getOperand(2); + Ops[2] = Size; + ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(), + MemmoveFCache); + break; + } + case Intrinsic::memset: { + static Constant *MemsetFCache = 0; + const IntegerType *IntPtr = TD.getIntPtrType(); + Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getOperand(1); + // Extend the amount to i32. + Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::Int32Ty, + /* isSigned */ false); + Ops[2] = Size; + ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(), + MemsetFCache); + break; + } + case Intrinsic::sqrt: { + static Constant *sqrtFCache = 0; + static Constant *sqrtDCache = 0; + static Constant *sqrtLDCache = 0; + ReplaceFPIntrinsicWithCall(CI, sqrtFCache, sqrtDCache, sqrtLDCache, + "sqrtf", "sqrt", "sqrtl"); + break; + } + case Intrinsic::log: { + static Constant *logFCache = 0; + static Constant *logDCache = 0; + static Constant *logLDCache = 0; + ReplaceFPIntrinsicWithCall(CI, logFCache, logDCache, logLDCache, + "logf", "log", "logl"); + break; + } + case Intrinsic::log2: { + static Constant *log2FCache = 0; + static Constant *log2DCache = 0; + static Constant *log2LDCache = 0; + ReplaceFPIntrinsicWithCall(CI, log2FCache, log2DCache, log2LDCache, + "log2f", "log2", "log2l"); + break; + } + case Intrinsic::log10: { + static Constant *log10FCache = 0; + static Constant *log10DCache = 0; + static Constant *log10LDCache = 0; + ReplaceFPIntrinsicWithCall(CI, log10FCache, log10DCache, log10LDCache, + "log10f", "log10", "log10l"); + break; + } + case Intrinsic::exp: { + static Constant *expFCache = 0; + static Constant *expDCache = 0; + static Constant *expLDCache = 0; + ReplaceFPIntrinsicWithCall(CI, expFCache, expDCache, expLDCache, + "expf", "exp", "expl"); + break; + } + case Intrinsic::exp2: { + static Constant *exp2FCache = 0; + static Constant *exp2DCache = 0; + static Constant *exp2LDCache = 0; + ReplaceFPIntrinsicWithCall(CI, exp2FCache, exp2DCache, exp2LDCache, + "exp2f", "exp2", "exp2l"); + break; + } + case Intrinsic::pow: { + static Constant *powFCache = 0; + static Constant *powDCache = 0; + static Constant *powLDCache = 0; + ReplaceFPIntrinsicWithCall(CI, powFCache, powDCache, powLDCache, + "powf", "pow", "powl"); + break; + } + case Intrinsic::flt_rounds: + // Lower to "round to the nearest" + if (CI->getType() != Type::VoidTy) + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); + break; + } + + assert(CI->use_empty() && + "Lowering should have eliminated any uses of the intrinsic call!"); + CI->eraseFromParent(); +} diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp new file mode 100644 index 0000000..b3c60e6 --- /dev/null +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -0,0 +1,289 @@ +//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVMTargetMachine class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetMachine.h" +#include "llvm/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace llvm { + bool EnableFastISel; +} + +static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, + cl::desc("Print LLVM IR produced by the loop-reduce pass")); +static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, + cl::desc("Print LLVM IR input to isel pass")); +static cl::opt<bool> PrintEmittedAsm("print-emitted-asm", cl::Hidden, + cl::desc("Dump emitter generated instructions as assembly")); +static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, + cl::desc("Dump garbage collector data")); +static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, + cl::desc("Verify generated machine code"), + cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); + +// When this works it will be on by default. +static cl::opt<bool> +DisablePostRAScheduler("disable-post-RA-scheduler", + cl::desc("Disable scheduling after register allocation"), + cl::init(true)); + +// Enable or disable FastISel. Both options are needed, because +// FastISel is enabled by default with -fast, and we wish to be +// able to enable or disable fast-isel independently from -fast. +static cl::opt<cl::boolOrDefault> +EnableFastISelOption("fast-isel", cl::Hidden, + cl::desc("Enable the experimental \"fast\" instruction selector")); + +FileModel::Model +LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel) { + // Add common CodeGen passes. + if (addCommonCodeGenPasses(PM, OptLevel)) + return FileModel::Error; + + // Fold redundant debug labels. + PM.add(createDebugLabelFoldingPass()); + + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + if (OptLevel != CodeGenOpt::None) + PM.add(createCodePlacementOptPass()); + + switch (FileType) { + default: + break; + case TargetMachine::AssemblyFile: + if (addAssemblyEmitter(PM, OptLevel, getAsmVerbosityDefault(), Out)) + return FileModel::Error; + return FileModel::AsmFile; + case TargetMachine::ObjectFile: + if (getMachOWriterInfo()) + return FileModel::MachOFile; + else if (getELFWriterInfo()) + return FileModel::ElfFile; + } + + return FileModel::Error; +} + +/// addPassesToEmitFileFinish - If the passes to emit the specified file had to +/// be split up (e.g., to add an object writer pass), this method can be used to +/// finish up adding passes to emit the file, if necessary. +bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, + MachineCodeEmitter *MCE, + CodeGenOpt::Level OptLevel) { + if (MCE) + addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *MCE); + + PM.add(createGCInfoDeleter()); + + // Delete machine code for this function + PM.add(createMachineCodeDeleter()); + + return false; // success! +} + +/// addPassesToEmitFileFinish - If the passes to emit the specified file had to +/// be split up (e.g., to add an object writer pass), this method can be used to +/// finish up adding passes to emit the file, if necessary. +bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, + JITCodeEmitter *JCE, + CodeGenOpt::Level OptLevel) { + if (JCE) + addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *JCE); + + PM.add(createGCInfoDeleter()); + + // Delete machine code for this function + PM.add(createMachineCodeDeleter()); + + return false; // success! +} + +/// addPassesToEmitMachineCode - Add passes to the specified pass manager to +/// get machine code emitted. This uses a MachineCodeEmitter object to handle +/// actually outputting the machine code and resolving things like the address +/// of functions. This method should returns true if machine code emission is +/// not supported. +/// +bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, + MachineCodeEmitter &MCE, + CodeGenOpt::Level OptLevel) { + // Add common CodeGen passes. + if (addCommonCodeGenPasses(PM, OptLevel)) + return true; + + if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + addCodeEmitter(PM, OptLevel, PrintEmittedAsm, MCE); + + PM.add(createGCInfoDeleter()); + + // Delete machine code for this function + PM.add(createMachineCodeDeleter()); + + return false; // success! +} + +/// addPassesToEmitMachineCode - Add passes to the specified pass manager to +/// get machine code emitted. This uses a MachineCodeEmitter object to handle +/// actually outputting the machine code and resolving things like the address +/// of functions. This method should returns true if machine code emission is +/// not supported. +/// +bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, + JITCodeEmitter &JCE, + CodeGenOpt::Level OptLevel) { + // Add common CodeGen passes. + if (addCommonCodeGenPasses(PM, OptLevel)) + return true; + + if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + addCodeEmitter(PM, OptLevel, PrintEmittedAsm, JCE); + + PM.add(createGCInfoDeleter()); + + // Delete machine code for this function + PM.add(createMachineCodeDeleter()); + + return false; // success! +} + +static void printAndVerify(PassManagerBase &PM, + bool allowDoubleDefs = false) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + if (VerifyMachineCode) + PM.add(createMachineVerifierPass(allowDoubleDefs)); +} + +/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both +/// emitting to assembly files or machine code output. +/// +bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Standard LLVM-Level Passes. + + // Run loop strength reduction before anything else. + if (OptLevel != CodeGenOpt::None) { + PM.add(createLoopStrengthReducePass(getTargetLowering())); + if (PrintLSR) + PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs())); + } + + // Turn exception handling constructs into something the code generators can + // handle. + if (!getTargetAsmInfo()->doesSupportExceptionHandling()) + PM.add(createLowerInvokePass(getTargetLowering())); + else + PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + + PM.add(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + + if (OptLevel != CodeGenOpt::None) + PM.add(createCodeGenPreparePass(getTargetLowering())); + + PM.add(createStackProtectorPass(getTargetLowering())); + + if (PrintISelInput) + PM.add(createPrintFunctionPass("\n\n" + "*** Final LLVM Code input to ISel ***\n", + &errs())); + + // Standard Lower-Level Passes. + + // Enable FastISel with -fast, but allow that to be overridden. + if (EnableFastISelOption == cl::BOU_TRUE || + (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE)) + EnableFastISel = true; + + // Ask the target for an isel. + if (addInstSelector(PM, OptLevel)) + return true; + + // Print the instruction selected machine code... + printAndVerify(PM, /* allowDoubleDefs= */ true); + + if (OptLevel != CodeGenOpt::None) { + PM.add(createMachineLICMPass()); + PM.add(createMachineSinkingPass()); + printAndVerify(PM, /* allowDoubleDefs= */ true); + } + + // Run pre-ra passes. + if (addPreRegAlloc(PM, OptLevel)) + printAndVerify(PM); + + // Perform register allocation. + PM.add(createRegisterAllocator()); + + // Perform stack slot coloring. + if (OptLevel != CodeGenOpt::None) + PM.add(createStackSlotColoringPass(OptLevel >= CodeGenOpt::Aggressive)); + + printAndVerify(PM); // Print the register-allocated code + + // Run post-ra passes. + if (addPostRegAlloc(PM, OptLevel)) + printAndVerify(PM); + + PM.add(createLowerSubregsPass()); + printAndVerify(PM); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + PM.add(createPrologEpilogCodeInserter()); + printAndVerify(PM); + + // Second pass scheduler. + if (OptLevel != CodeGenOpt::None && !DisablePostRAScheduler) { + PM.add(createPostRAScheduler()); + printAndVerify(PM); + } + + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (OptLevel != CodeGenOpt::None) { + PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); + printAndVerify(PM); + } + + PM.add(createGCMachineCodeAnalysisPass()); + printAndVerify(PM); + + if (PrintGCInfo) + PM.add(createGCInfoPrinter(*cerr)); + + return false; +} diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp new file mode 100644 index 0000000..2e7b89c --- /dev/null +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -0,0 +1,114 @@ +//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LatencyPriorityQueue class, which is a +// SchedulingPriorityQueue that schedules using latency information to +// reduce the length of the critical path through the basic block. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scheduler" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + + return OnlyAvailablePred; +} + +void LatencyPriorityQueue::push_impl(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + + Queue.push(SU); +} + + +// ScheduledNode - As nodes are scheduled, we look to see if there are any +// successor nodes that have a single unscheduled predecessor. If so, that +// single predecessor has a higher priority, since scheduling it will make +// the node available. +void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + AdjustPriorityOfUnscheduledPreds(I->getSUnit()); +} + +/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp new file mode 100644 index 0000000..67120b8 --- /dev/null +++ b/lib/CodeGen/LiveInterval.cpp @@ -0,0 +1,853 @@ +//===-- LiveInterval.cpp - Live Interval Representation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveRange and LiveInterval classes. Given some +// numbering of each the machine instructions an interval [i, j) is said to be a +// live interval for register v if there is no instruction with number j' > j +// such that v is live at j' abd there is no instruction with number i' < i such +// that v is live at i'. In this implementation intervals can have holes, +// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each +// individual range is represented as an instance of LiveRange, and the whole +// interval is represented as an instance of LiveInterval. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Streams.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <ostream> +using namespace llvm; + +// An example for liveAt(): +// +// this = [1,4), liveAt(0) will return false. The instruction defining this +// spans slots [0,3]. The interval belongs to an spilled definition of the +// variable it represents. This is because slot 1 is used (def slot) and spans +// up to slot 3 (store slot). +// +bool LiveInterval::liveAt(unsigned I) const { + Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); + + if (r == ranges.begin()) + return false; + + --r; + return r->contains(I); +} + +// liveBeforeAndAt - Check if the interval is live at the index and the index +// just before it. If index is liveAt, check if it starts a new live range. +// If it does, then check if the previous live range ends at index-1. +bool LiveInterval::liveBeforeAndAt(unsigned I) const { + Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); + + if (r == ranges.begin()) + return false; + + --r; + if (!r->contains(I)) + return false; + if (I != r->start) + return true; + // I is the start of a live range. Check if the previous live range ends + // at I-1. + if (r == ranges.begin()) + return false; + return r->end == I; +} + +// overlaps - Return true if the intersection of the two live intervals is +// not empty. +// +// An example for overlaps(): +// +// 0: A = ... +// 4: B = ... +// 8: C = A + B ;; last use of A +// +// The live intervals should look like: +// +// A = [3, 11) +// B = [7, x) +// C = [11, y) +// +// A->overlaps(C) should return false since we want to be able to join +// A and C. +// +bool LiveInterval::overlapsFrom(const LiveInterval& other, + const_iterator StartPos) const { + const_iterator i = begin(); + const_iterator ie = end(); + const_iterator j = StartPos; + const_iterator je = other.end(); + + assert((StartPos->start <= i->start || StartPos == other.begin()) && + StartPos != other.end() && "Bogus start position hint!"); + + if (i->start < j->start) { + i = std::upper_bound(i, ie, j->start); + if (i != ranges.begin()) --i; + } else if (j->start < i->start) { + ++StartPos; + if (StartPos != other.end() && StartPos->start <= i->start) { + assert(StartPos < other.end() && i < end()); + j = std::upper_bound(j, je, i->start); + if (j != other.ranges.begin()) --j; + } + } else { + return true; + } + + if (j == je) return false; + + while (i != ie) { + if (i->start > j->start) { + std::swap(i, j); + std::swap(ie, je); + } + + if (i->end > j->start) + return true; + ++i; + } + + return false; +} + +/// overlaps - Return true if the live interval overlaps a range specified +/// by [Start, End). +bool LiveInterval::overlaps(unsigned Start, unsigned End) const { + assert(Start < End && "Invalid range"); + const_iterator I = begin(); + const_iterator E = end(); + const_iterator si = std::upper_bound(I, E, Start); + const_iterator ei = std::upper_bound(I, E, End); + if (si != ei) + return true; + if (si == I) + return false; + --si; + return si->contains(Start); +} + +/// extendIntervalEndTo - This method is used when we want to extend the range +/// specified by I to end at the specified endpoint. To do this, we should +/// merge and eliminate all ranges that this will overlap with. The iterator is +/// not invalidated. +void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) { + assert(I != ranges.end() && "Not a valid interval!"); + VNInfo *ValNo = I->valno; + unsigned OldEnd = I->end; + + // Search for the first interval that we can't merge with. + Ranges::iterator MergeTo = next(I); + for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) { + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + } + + // If NewEnd was in the middle of an interval, make sure to get its endpoint. + I->end = std::max(NewEnd, prior(MergeTo)->end); + + // Erase any dead ranges. + ranges.erase(next(I), MergeTo); + + // Update kill info. + removeKills(ValNo, OldEnd, I->end-1); + + // If the newly formed range now touches the range after it and if they have + // the same value number, merge the two ranges into one range. + Ranges::iterator Next = next(I); + if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) { + I->end = Next->end; + ranges.erase(Next); + } +} + + +/// extendIntervalStartTo - This method is used when we want to extend the range +/// specified by I to start at the specified endpoint. To do this, we should +/// merge and eliminate all ranges that this will overlap with. +LiveInterval::Ranges::iterator +LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) { + assert(I != ranges.end() && "Not a valid interval!"); + VNInfo *ValNo = I->valno; + + // Search for the first interval that we can't merge with. + Ranges::iterator MergeTo = I; + do { + if (MergeTo == ranges.begin()) { + I->start = NewStart; + ranges.erase(MergeTo, I); + return I; + } + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + --MergeTo; + } while (NewStart <= MergeTo->start); + + // If we start in the middle of another interval, just delete a range and + // extend that interval. + if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { + MergeTo->end = I->end; + } else { + // Otherwise, extend the interval right after. + ++MergeTo; + MergeTo->start = NewStart; + MergeTo->end = I->end; + } + + ranges.erase(next(MergeTo), next(I)); + return MergeTo; +} + +LiveInterval::iterator +LiveInterval::addRangeFrom(LiveRange LR, iterator From) { + unsigned Start = LR.start, End = LR.end; + iterator it = std::upper_bound(From, ranges.end(), Start); + + // If the inserted interval starts in the middle or right at the end of + // another interval, just extend that interval to contain the range of LR. + if (it != ranges.begin()) { + iterator B = prior(it); + if (LR.valno == B->valno) { + if (B->start <= Start && B->end >= Start) { + extendIntervalEndTo(B, End); + return B; + } + } else { + // Check to make sure that we are not overlapping two live ranges with + // different valno's. + assert(B->end <= Start && + "Cannot overlap two LiveRanges with differing ValID's" + " (did you def the same reg twice in a MachineInstr?)"); + } + } + + // Otherwise, if this range ends in the middle of, or right next to, another + // interval, merge it into that interval. + if (it != ranges.end()) { + if (LR.valno == it->valno) { + if (it->start <= End) { + it = extendIntervalStartTo(it, Start); + + // If LR is a complete superset of an interval, we may need to grow its + // endpoint as well. + if (End > it->end) + extendIntervalEndTo(it, End); + else if (End < it->end) + // Overlapping intervals, there might have been a kill here. + removeKill(it->valno, End); + return it; + } + } else { + // Check to make sure that we are not overlapping two live ranges with + // different valno's. + assert(it->start >= End && + "Cannot overlap two LiveRanges with differing ValID's"); + } + } + + // Otherwise, this is just a new range that doesn't interact with anything. + // Insert it. + return ranges.insert(it, LR); +} + +/// isInOneLiveRange - Return true if the range specified is entirely in the +/// a single LiveRange of the live interval. +bool LiveInterval::isInOneLiveRange(unsigned Start, unsigned End) { + Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); + if (I == ranges.begin()) + return false; + --I; + return I->contains(Start) && I->contains(End-1); +} + + +/// removeRange - Remove the specified range from this interval. Note that +/// the range must be in a single LiveRange in its entirety. +void LiveInterval::removeRange(unsigned Start, unsigned End, + bool RemoveDeadValNo) { + // Find the LiveRange containing this span. + Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); + assert(I != ranges.begin() && "Range is not in interval!"); + --I; + assert(I->contains(Start) && I->contains(End-1) && + "Range is not entirely in interval!"); + + // If the span we are removing is at the start of the LiveRange, adjust it. + VNInfo *ValNo = I->valno; + if (I->start == Start) { + if (I->end == End) { + removeKills(I->valno, Start, End); + if (RemoveDeadValNo) { + // Check if val# is dead. + bool isDead = true; + for (const_iterator II = begin(), EE = end(); II != EE; ++II) + if (II != I && II->valno == ValNo) { + isDead = false; + break; + } + if (isDead) { + // Now that ValNo is dead, remove it. If it is the largest value + // number, just nuke it (and any other deleted values neighboring it), + // otherwise mark it as ~1U so it can be nuked later. + if (ValNo->id == getNumValNums()-1) { + do { + VNInfo *VNI = valnos.back(); + valnos.pop_back(); + VNI->~VNInfo(); + } while (!valnos.empty() && valnos.back()->def == ~1U); + } else { + ValNo->def = ~1U; + } + } + } + + ranges.erase(I); // Removed the whole LiveRange. + } else + I->start = End; + return; + } + + // Otherwise if the span we are removing is at the end of the LiveRange, + // adjust the other way. + if (I->end == End) { + removeKills(ValNo, Start, End); + I->end = Start; + return; + } + + // Otherwise, we are splitting the LiveRange into two pieces. + unsigned OldEnd = I->end; + I->end = Start; // Trim the old interval. + + // Insert the new one. + ranges.insert(next(I), LiveRange(End, OldEnd, ValNo)); +} + +/// removeValNo - Remove all the ranges defined by the specified value#. +/// Also remove the value# from value# list. +void LiveInterval::removeValNo(VNInfo *ValNo) { + if (empty()) return; + Ranges::iterator I = ranges.end(); + Ranges::iterator E = ranges.begin(); + do { + --I; + if (I->valno == ValNo) + ranges.erase(I); + } while (I != E); + // Now that ValNo is dead, remove it. If it is the largest value + // number, just nuke it (and any other deleted values neighboring it), + // otherwise mark it as ~1U so it can be nuked later. + if (ValNo->id == getNumValNums()-1) { + do { + VNInfo *VNI = valnos.back(); + valnos.pop_back(); + VNI->~VNInfo(); + } while (!valnos.empty() && valnos.back()->def == ~1U); + } else { + ValNo->def = ~1U; + } +} + +/// scaleNumbering - Renumber VNI and ranges to provide gaps for new +/// instructions. +void LiveInterval::scaleNumbering(unsigned factor) { + // Scale ranges. + for (iterator RI = begin(), RE = end(); RI != RE; ++RI) { + RI->start = InstrSlots::scale(RI->start, factor); + RI->end = InstrSlots::scale(RI->end, factor); + } + + // Scale VNI info. + for (vni_iterator VNI = vni_begin(), VNIE = vni_end(); VNI != VNIE; ++VNI) { + VNInfo *vni = *VNI; + if (vni->def != ~0U && vni->def != ~1U) { + vni->def = InstrSlots::scale(vni->def, factor); + } + + for (unsigned i = 0; i < vni->kills.size(); ++i) { + if (vni->kills[i] != 0) + vni->kills[i] = InstrSlots::scale(vni->kills[i], factor); + } + } +} + +/// getLiveRangeContaining - Return the live range that contains the +/// specified index, or null if there is none. +LiveInterval::const_iterator +LiveInterval::FindLiveRangeContaining(unsigned Idx) const { + const_iterator It = std::upper_bound(begin(), end(), Idx); + if (It != ranges.begin()) { + --It; + if (It->contains(Idx)) + return It; + } + + return end(); +} + +LiveInterval::iterator +LiveInterval::FindLiveRangeContaining(unsigned Idx) { + iterator It = std::upper_bound(begin(), end(), Idx); + if (It != begin()) { + --It; + if (It->contains(Idx)) + return It; + } + + return end(); +} + +/// findDefinedVNInfo - Find the VNInfo that's defined at the specified index +/// (register interval) or defined by the specified register (stack inteval). +VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const { + VNInfo *VNI = NULL; + for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end(); + i != e; ++i) + if ((*i)->def == DefIdxOrReg) { + VNI = *i; + break; + } + return VNI; +} + + +/// join - Join two live intervals (this, and other) together. This applies +/// mappings to the value numbers in the LHS/RHS intervals as specified. If +/// the intervals are not joinable, this aborts. +void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments, + const int *RHSValNoAssignments, + SmallVector<VNInfo*, 16> &NewVNInfo) { + // Determine if any of our live range values are mapped. This is uncommon, so + // we want to avoid the interval scan if not. + bool MustMapCurValNos = false; + unsigned NumVals = getNumValNums(); + unsigned NumNewVals = NewVNInfo.size(); + for (unsigned i = 0; i != NumVals; ++i) { + unsigned LHSValID = LHSValNoAssignments[i]; + if (i != LHSValID || + (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) + MustMapCurValNos = true; + } + + // If we have to apply a mapping to our base interval assignment, rewrite it + // now. + if (MustMapCurValNos) { + // Map the first live range. + iterator OutIt = begin(); + OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; + ++OutIt; + for (iterator I = OutIt, E = end(); I != E; ++I) { + OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]]; + + // If this live range has the same value # as its immediate predecessor, + // and if they are neighbors, remove one LiveRange. This happens when we + // have [0,3:0)[4,7:1) and map 0/1 onto the same value #. + if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) { + (OutIt-1)->end = OutIt->end; + } else { + if (I != OutIt) { + OutIt->start = I->start; + OutIt->end = I->end; + } + + // Didn't merge, on to the next one. + ++OutIt; + } + } + + // If we merge some live ranges, chop off the end. + ranges.erase(OutIt, end()); + } + + // Remember assignements because val# ids are changing. + SmallVector<unsigned, 16> OtherAssignments; + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) + OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]); + + // Update val# info. Renumber them and make sure they all belong to this + // LiveInterval now. Also remove dead val#'s. + unsigned NumValNos = 0; + for (unsigned i = 0; i < NumNewVals; ++i) { + VNInfo *VNI = NewVNInfo[i]; + if (VNI) { + if (NumValNos >= NumVals) + valnos.push_back(VNI); + else + valnos[NumValNos] = VNI; + VNI->id = NumValNos++; // Renumber val#. + } + } + if (NumNewVals < NumVals) + valnos.resize(NumNewVals); // shrinkify + + // Okay, now insert the RHS live ranges into the LHS. + iterator InsertPos = begin(); + unsigned RangeNo = 0; + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) { + // Map the valno in the other live range to the current live range. + I->valno = NewVNInfo[OtherAssignments[RangeNo]]; + assert(I->valno && "Adding a dead range?"); + InsertPos = addRangeFrom(*I, InsertPos); + } + + weight += Other.weight; + if (Other.preference && !preference) + preference = Other.preference; +} + +/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live +/// interval as the specified value number. The LiveRanges in RHS are +/// allowed to overlap with LiveRanges in the current interval, but only if +/// the overlapping LiveRanges have the specified value number. +void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, + VNInfo *LHSValNo) { + // TODO: Make this more efficient. + iterator InsertPos = begin(); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + // Map the valno in the other live range to the current live range. + LiveRange Tmp = *I; + Tmp.valno = LHSValNo; + InsertPos = addRangeFrom(Tmp, InsertPos); + } +} + + +/// MergeValueInAsValue - Merge all of the live ranges of a specific val# +/// in RHS into this live interval as the specified value number. +/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the +/// current interval, it will replace the value numbers of the overlaped +/// live ranges with the specified value number. +void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, + const VNInfo *RHSValNo, VNInfo *LHSValNo) { + SmallVector<VNInfo*, 4> ReplacedValNos; + iterator IP = begin(); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + if (I->valno != RHSValNo) + continue; + unsigned Start = I->start, End = I->end; + IP = std::upper_bound(IP, end(), Start); + // If the start of this range overlaps with an existing liverange, trim it. + if (IP != begin() && IP[-1].end > Start) { + if (IP[-1].valno != LHSValNo) { + ReplacedValNos.push_back(IP[-1].valno); + IP[-1].valno = LHSValNo; // Update val#. + } + Start = IP[-1].end; + // Trimmed away the whole range? + if (Start >= End) continue; + } + // If the end of this range overlaps with an existing liverange, trim it. + if (IP != end() && End > IP->start) { + if (IP->valno != LHSValNo) { + ReplacedValNos.push_back(IP->valno); + IP->valno = LHSValNo; // Update val#. + } + End = IP->start; + // If this trimmed away the whole range, ignore it. + if (Start == End) continue; + } + + // Map the valno in the other live range to the current live range. + IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP); + } + + + SmallSet<VNInfo*, 4> Seen; + for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) { + VNInfo *V1 = ReplacedValNos[i]; + if (Seen.insert(V1)) { + bool isDead = true; + for (const_iterator I = begin(), E = end(); I != E; ++I) + if (I->valno == V1) { + isDead = false; + break; + } + if (isDead) { + // Now that V1 is dead, remove it. If it is the largest value number, + // just nuke it (and any other deleted values neighboring it), otherwise + // mark it as ~1U so it can be nuked later. + if (V1->id == getNumValNums()-1) { + do { + VNInfo *VNI = valnos.back(); + valnos.pop_back(); + VNI->~VNInfo(); + } while (!valnos.empty() && valnos.back()->def == ~1U); + } else { + V1->def = ~1U; + } + } + } + } +} + + +/// MergeInClobberRanges - For any live ranges that are not defined in the +/// current interval, but are defined in the Clobbers interval, mark them +/// used with an unknown definition value. +void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers, + BumpPtrAllocator &VNInfoAllocator) { + if (Clobbers.empty()) return; + + DenseMap<VNInfo*, VNInfo*> ValNoMaps; + VNInfo *UnusedValNo = 0; + iterator IP = begin(); + for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) { + // For every val# in the Clobbers interval, create a new "unknown" val#. + VNInfo *ClobberValNo = 0; + DenseMap<VNInfo*, VNInfo*>::iterator VI = ValNoMaps.find(I->valno); + if (VI != ValNoMaps.end()) + ClobberValNo = VI->second; + else if (UnusedValNo) + ClobberValNo = UnusedValNo; + else { + UnusedValNo = ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator); + ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo)); + } + + bool Done = false; + unsigned Start = I->start, End = I->end; + // If a clobber range starts before an existing range and ends after + // it, the clobber range will need to be split into multiple ranges. + // Loop until the entire clobber range is handled. + while (!Done) { + Done = true; + IP = std::upper_bound(IP, end(), Start); + unsigned SubRangeStart = Start; + unsigned SubRangeEnd = End; + + // If the start of this range overlaps with an existing liverange, trim it. + if (IP != begin() && IP[-1].end > SubRangeStart) { + SubRangeStart = IP[-1].end; + // Trimmed away the whole range? + if (SubRangeStart >= SubRangeEnd) continue; + } + // If the end of this range overlaps with an existing liverange, trim it. + if (IP != end() && SubRangeEnd > IP->start) { + // If the clobber live range extends beyond the existing live range, + // it'll need at least another live range, so set the flag to keep + // iterating. + if (SubRangeEnd > IP->end) { + Start = IP->end; + Done = false; + } + SubRangeEnd = IP->start; + // If this trimmed away the whole range, ignore it. + if (SubRangeStart == SubRangeEnd) continue; + } + + // Insert the clobber interval. + IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo), + IP); + UnusedValNo = 0; + } + } + + if (UnusedValNo) { + // Delete the last unused val#. + valnos.pop_back(); + UnusedValNo->~VNInfo(); + } +} + +void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End, + BumpPtrAllocator &VNInfoAllocator) { + // Find a value # to use for the clobber ranges. If there is already a value# + // for unknown values, use it. + VNInfo *ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator); + + iterator IP = begin(); + IP = std::upper_bound(IP, end(), Start); + + // If the start of this range overlaps with an existing liverange, trim it. + if (IP != begin() && IP[-1].end > Start) { + Start = IP[-1].end; + // Trimmed away the whole range? + if (Start >= End) return; + } + // If the end of this range overlaps with an existing liverange, trim it. + if (IP != end() && End > IP->start) { + End = IP->start; + // If this trimmed away the whole range, ignore it. + if (Start == End) return; + } + + // Insert the clobber interval. + addRangeFrom(LiveRange(Start, End, ClobberValNo), IP); +} + +/// MergeValueNumberInto - This method is called when two value nubmers +/// are found to be equivalent. This eliminates V1, replacing all +/// LiveRanges with the V1 value number with the V2 value number. This can +/// cause merging of V1/V2 values numbers and compaction of the value space. +VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { + assert(V1 != V2 && "Identical value#'s are always equivalent!"); + + // This code actually merges the (numerically) larger value number into the + // smaller value number, which is likely to allow us to compactify the value + // space. The only thing we have to be careful of is to preserve the + // instruction that defines the result value. + + // Make sure V2 is smaller than V1. + if (V1->id < V2->id) { + copyValNumInfo(V1, V2); + std::swap(V1, V2); + } + + // Merge V1 live ranges into V2. + for (iterator I = begin(); I != end(); ) { + iterator LR = I++; + if (LR->valno != V1) continue; // Not a V1 LiveRange. + + // Okay, we found a V1 live range. If it had a previous, touching, V2 live + // range, extend it. + if (LR != begin()) { + iterator Prev = LR-1; + if (Prev->valno == V2 && Prev->end == LR->start) { + Prev->end = LR->end; + + // Erase this live-range. + ranges.erase(LR); + I = Prev+1; + LR = Prev; + } + } + + // Okay, now we have a V1 or V2 live range that is maximally merged forward. + // Ensure that it is a V2 live-range. + LR->valno = V2; + + // If we can merge it into later V2 live ranges, do so now. We ignore any + // following V1 live ranges, as they will be merged in subsequent iterations + // of the loop. + if (I != end()) { + if (I->start == LR->end && I->valno == V2) { + LR->end = I->end; + ranges.erase(I); + I = LR+1; + } + } + } + + // Now that V1 is dead, remove it. If it is the largest value number, just + // nuke it (and any other deleted values neighboring it), otherwise mark it as + // ~1U so it can be nuked later. + if (V1->id == getNumValNums()-1) { + do { + VNInfo *VNI = valnos.back(); + valnos.pop_back(); + VNI->~VNInfo(); + } while (valnos.back()->def == ~1U); + } else { + V1->def = ~1U; + } + + return V2; +} + +void LiveInterval::Copy(const LiveInterval &RHS, + BumpPtrAllocator &VNInfoAllocator) { + ranges.clear(); + valnos.clear(); + preference = RHS.preference; + weight = RHS.weight; + for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) { + const VNInfo *VNI = RHS.getValNumInfo(i); + VNInfo *NewVNI = getNextValue(~0U, 0, VNInfoAllocator); + copyValNumInfo(NewVNI, VNI); + } + for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) { + const LiveRange &LR = RHS.ranges[i]; + addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id))); + } +} + +unsigned LiveInterval::getSize() const { + unsigned Sum = 0; + for (const_iterator I = begin(), E = end(); I != E; ++I) + Sum += I->end - I->start; + return Sum; +} + +std::ostream& llvm::operator<<(std::ostream& os, const LiveRange &LR) { + return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; +} + +void LiveRange::dump() const { + cerr << *this << "\n"; +} + +void LiveInterval::print(std::ostream &OS, + const TargetRegisterInfo *TRI) const { + if (isStackSlot()) + OS << "SS#" << getStackSlotIndex(); + else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg)) + OS << TRI->getName(reg); + else + OS << "%reg" << reg; + + OS << ',' << weight; + + if (empty()) + OS << " EMPTY"; + else { + OS << " = "; + for (LiveInterval::Ranges::const_iterator I = ranges.begin(), + E = ranges.end(); I != E; ++I) + OS << *I; + } + + // Print value number info. + if (getNumValNums()) { + OS << " "; + unsigned vnum = 0; + for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e; + ++i, ++vnum) { + const VNInfo *vni = *i; + if (vnum) OS << " "; + OS << vnum << "@"; + if (vni->def == ~1U) { + OS << "x"; + } else { + if (vni->def == ~0U) + OS << "?"; + else + OS << vni->def; + unsigned ee = vni->kills.size(); + if (ee || vni->hasPHIKill) { + OS << "-("; + for (unsigned j = 0; j != ee; ++j) { + OS << vni->kills[j]; + if (j != ee-1) + OS << " "; + } + if (vni->hasPHIKill) { + if (ee) + OS << " "; + OS << "phi"; + } + OS << ")"; + } + } + } + } +} + +void LiveInterval::dump() const { + cerr << *this << "\n"; +} + + +void LiveRange::print(std::ostream &os) const { + os << *this; +} diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp new file mode 100644 index 0000000..cf0a648 --- /dev/null +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -0,0 +1,2298 @@ +//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveInterval analysis pass which is used +// by the Linear Scan Register allocator. This pass linearizes the +// basic blocks of the function in DFS order and uses the +// LiveVariables pass to conservatively compute live intervals for +// each virtual and physical register. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "liveintervals" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "VirtRegMap.h" +#include "llvm/Value.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <limits> +#include <cmath> +using namespace llvm; + +// Hidden options for help debugging. +static cl::opt<bool> DisableReMat("disable-rematerialization", + cl::init(false), cl::Hidden); + +static cl::opt<bool> SplitAtBB("split-intervals-at-bb", + cl::init(true), cl::Hidden); +static cl::opt<int> SplitLimit("split-limit", + cl::init(-1), cl::Hidden); + +static cl::opt<bool> EnableAggressiveRemat("aggressive-remat", cl::Hidden); + +static cl::opt<bool> EnableFastSpilling("fast-spill", + cl::init(false), cl::Hidden); + +STATISTIC(numIntervals, "Number of original intervals"); +STATISTIC(numFolds , "Number of loads/stores folded into instructions"); +STATISTIC(numSplits , "Number of intervals split"); + +char LiveIntervals::ID = 0; +static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis"); + +void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); + AU.addPreserved<LiveVariables>(); + AU.addRequired<LiveVariables>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + + if (!StrongPHIElim) { + AU.addPreservedID(PHIEliminationID); + AU.addRequiredID(PHIEliminationID); + } + + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void LiveIntervals::releaseMemory() { + // Free the live intervals themselves. + for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(), + E = r2iMap_.end(); I != E; ++I) + delete I->second; + + MBB2IdxMap.clear(); + Idx2MBBMap.clear(); + mi2iMap_.clear(); + i2miMap_.clear(); + r2iMap_.clear(); + // Release VNInfo memroy regions after all VNInfo objects are dtor'd. + VNInfoAllocator.Reset(); + while (!ClonedMIs.empty()) { + MachineInstr *MI = ClonedMIs.back(); + ClonedMIs.pop_back(); + mf_->DeleteMachineInstr(MI); + } +} + +void LiveIntervals::computeNumbering() { + Index2MiMap OldI2MI = i2miMap_; + std::vector<IdxMBBPair> OldI2MBB = Idx2MBBMap; + + Idx2MBBMap.clear(); + MBB2IdxMap.clear(); + mi2iMap_.clear(); + i2miMap_.clear(); + + FunctionSize = 0; + + // Number MachineInstrs and MachineBasicBlocks. + // Initialize MBB indexes to a sentinal. + MBB2IdxMap.resize(mf_->getNumBlockIDs(), std::make_pair(~0U,~0U)); + + unsigned MIIndex = 0; + for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end(); + MBB != E; ++MBB) { + unsigned StartIdx = MIIndex; + + // Insert an empty slot at the beginning of each block. + MIIndex += InstrSlots::NUM; + i2miMap_.push_back(0); + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second; + assert(inserted && "multiple MachineInstr -> index mappings"); + inserted = true; + i2miMap_.push_back(I); + MIIndex += InstrSlots::NUM; + FunctionSize++; + + // Insert max(1, numdefs) empty slots after every instruction. + unsigned Slots = I->getDesc().getNumDefs(); + if (Slots == 0) + Slots = 1; + MIIndex += InstrSlots::NUM * Slots; + while (Slots--) + i2miMap_.push_back(0); + } + + // Set the MBB2IdxMap entry for this MBB. + MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, MIIndex - 1); + Idx2MBBMap.push_back(std::make_pair(StartIdx, MBB)); + } + std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare()); + + if (!OldI2MI.empty()) + for (iterator OI = begin(), OE = end(); OI != OE; ++OI) { + for (LiveInterval::iterator LI = OI->second->begin(), + LE = OI->second->end(); LI != LE; ++LI) { + + // Remap the start index of the live range to the corresponding new + // number, or our best guess at what it _should_ correspond to if the + // original instruction has been erased. This is either the following + // instruction or its predecessor. + unsigned index = LI->start / InstrSlots::NUM; + unsigned offset = LI->start % InstrSlots::NUM; + if (offset == InstrSlots::LOAD) { + std::vector<IdxMBBPair>::const_iterator I = + std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->start); + // Take the pair containing the index + std::vector<IdxMBBPair>::const_iterator J = + (I == OldI2MBB.end() && OldI2MBB.size()>0) ? (I-1): I; + + LI->start = getMBBStartIdx(J->second); + } else { + LI->start = mi2iMap_[OldI2MI[index]] + offset; + } + + // Remap the ending index in the same way that we remapped the start, + // except for the final step where we always map to the immediately + // following instruction. + index = (LI->end - 1) / InstrSlots::NUM; + offset = LI->end % InstrSlots::NUM; + if (offset == InstrSlots::LOAD) { + // VReg dies at end of block. + std::vector<IdxMBBPair>::const_iterator I = + std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->end); + --I; + + LI->end = getMBBEndIdx(I->second) + 1; + } else { + unsigned idx = index; + while (index < OldI2MI.size() && !OldI2MI[index]) ++index; + + if (index != OldI2MI.size()) + LI->end = mi2iMap_[OldI2MI[index]] + (idx == index ? offset : 0); + else + LI->end = InstrSlots::NUM * i2miMap_.size(); + } + } + + for (LiveInterval::vni_iterator VNI = OI->second->vni_begin(), + VNE = OI->second->vni_end(); VNI != VNE; ++VNI) { + VNInfo* vni = *VNI; + + // Remap the VNInfo def index, which works the same as the + // start indices above. VN's with special sentinel defs + // don't need to be remapped. + if (vni->def != ~0U && vni->def != ~1U) { + unsigned index = vni->def / InstrSlots::NUM; + unsigned offset = vni->def % InstrSlots::NUM; + if (offset == InstrSlots::LOAD) { + std::vector<IdxMBBPair>::const_iterator I = + std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->def); + // Take the pair containing the index + std::vector<IdxMBBPair>::const_iterator J = + (I == OldI2MBB.end() && OldI2MBB.size()>0) ? (I-1): I; + + vni->def = getMBBStartIdx(J->second); + } else { + vni->def = mi2iMap_[OldI2MI[index]] + offset; + } + } + + // Remap the VNInfo kill indices, which works the same as + // the end indices above. + for (size_t i = 0; i < vni->kills.size(); ++i) { + // PHI kills don't need to be remapped. + if (!vni->kills[i]) continue; + + unsigned index = (vni->kills[i]-1) / InstrSlots::NUM; + unsigned offset = vni->kills[i] % InstrSlots::NUM; + if (offset == InstrSlots::LOAD) { + std::vector<IdxMBBPair>::const_iterator I = + std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]); + --I; + + vni->kills[i] = getMBBEndIdx(I->second); + } else { + unsigned idx = index; + while (index < OldI2MI.size() && !OldI2MI[index]) ++index; + + if (index != OldI2MI.size()) + vni->kills[i] = mi2iMap_[OldI2MI[index]] + + (idx == index ? offset : 0); + else + vni->kills[i] = InstrSlots::NUM * i2miMap_.size(); + } + } + } + } +} + +void LiveIntervals::scaleNumbering(int factor) { + // Need to + // * scale MBB begin and end points + // * scale all ranges. + // * Update VNI structures. + // * Scale instruction numberings + + // Scale the MBB indices. + Idx2MBBMap.clear(); + for (MachineFunction::iterator MBB = mf_->begin(), MBBE = mf_->end(); + MBB != MBBE; ++MBB) { + std::pair<unsigned, unsigned> &mbbIndices = MBB2IdxMap[MBB->getNumber()]; + mbbIndices.first = InstrSlots::scale(mbbIndices.first, factor); + mbbIndices.second = InstrSlots::scale(mbbIndices.second, factor); + Idx2MBBMap.push_back(std::make_pair(mbbIndices.first, MBB)); + } + std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare()); + + // Scale the intervals. + for (iterator LI = begin(), LE = end(); LI != LE; ++LI) { + LI->second->scaleNumbering(factor); + } + + // Scale MachineInstrs. + Mi2IndexMap oldmi2iMap = mi2iMap_; + unsigned highestSlot = 0; + for (Mi2IndexMap::iterator MI = oldmi2iMap.begin(), ME = oldmi2iMap.end(); + MI != ME; ++MI) { + unsigned newSlot = InstrSlots::scale(MI->second, factor); + mi2iMap_[MI->first] = newSlot; + highestSlot = std::max(highestSlot, newSlot); + } + + i2miMap_.clear(); + i2miMap_.resize(highestSlot + 1); + for (Mi2IndexMap::iterator MI = mi2iMap_.begin(), ME = mi2iMap_.end(); + MI != ME; ++MI) { + i2miMap_[MI->second] = MI->first; + } + +} + + +/// runOnMachineFunction - Register allocate the whole function +/// +bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &mf_->getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + aa_ = &getAnalysis<AliasAnalysis>(); + lv_ = &getAnalysis<LiveVariables>(); + allocatableRegs_ = tri_->getAllocatableSet(fn); + + computeNumbering(); + computeIntervals(); + + numIntervals += getNumIntervals(); + + DEBUG(dump()); + return true; +} + +/// print - Implement the dump method. +void LiveIntervals::print(std::ostream &O, const Module* ) const { + O << "********** INTERVALS **********\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) { + I->second->print(O, tri_); + O << "\n"; + } + + O << "********** MACHINEINSTRS **********\n"; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + O << ((Value*)mbbi->getBasicBlock())->getName() << ":\n"; + for (MachineBasicBlock::iterator mii = mbbi->begin(), + mie = mbbi->end(); mii != mie; ++mii) { + O << getInstructionIndex(mii) << '\t' << *mii; + } + } +} + +/// conflictsWithPhysRegDef - Returns true if the specified register +/// is defined during the duration of the specified interval. +bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li, + VirtRegMap &vrm, unsigned reg) { + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + for (unsigned index = getBaseIndex(I->start), + end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end; + index += InstrSlots::NUM) { + // skip deleted instructions + while (index != end && !getInstructionFromIndex(index)) + index += InstrSlots::NUM; + if (index == end) break; + + MachineInstr *MI = getInstructionFromIndex(index); + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (SrcReg == li.reg || DstReg == li.reg) + continue; + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& mop = MI->getOperand(i); + if (!mop.isReg()) + continue; + unsigned PhysReg = mop.getReg(); + if (PhysReg == 0 || PhysReg == li.reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(PhysReg)) { + if (!vrm.hasPhys(PhysReg)) + continue; + PhysReg = vrm.getPhys(PhysReg); + } + if (PhysReg && tri_->regsOverlap(PhysReg, reg)) + return true; + } + } + } + + return false; +} + +/// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except +/// it can check use as well. +bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li, + unsigned Reg, bool CheckUse, + SmallPtrSet<MachineInstr*,32> &JoinedCopies) { + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + for (unsigned index = getBaseIndex(I->start), + end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end; + index += InstrSlots::NUM) { + // Skip deleted instructions. + MachineInstr *MI = 0; + while (index != end) { + MI = getInstructionFromIndex(index); + if (MI) + break; + index += InstrSlots::NUM; + } + if (index == end) break; + + if (JoinedCopies.count(MI)) + continue; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isUse() && !CheckUse) + continue; + unsigned PhysReg = MO.getReg(); + if (PhysReg == 0 || TargetRegisterInfo::isVirtualRegister(PhysReg)) + continue; + if (tri_->isSubRegister(Reg, PhysReg)) + return true; + } + } + } + + return false; +} + + +void LiveIntervals::printRegName(unsigned reg) const { + if (TargetRegisterInfo::isPhysicalRegister(reg)) + cerr << tri_->getName(reg); + else + cerr << "%reg" << reg; +} + +void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, + MachineBasicBlock::iterator mi, + unsigned MIIdx, MachineOperand& MO, + unsigned MOIdx, + LiveInterval &interval) { + DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg)); + LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); + + if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + DOUT << "is a implicit_def\n"; + return; + } + + // Virtual registers may be defined multiple times (due to phi + // elimination and 2-addr elimination). Much of what we do only has to be + // done once for the vreg. We use an empty interval to detect the first + // time we see a vreg. + if (interval.empty()) { + // Get the Idx of the defining instructions. + unsigned defIndex = getDefIndex(MIIdx); + // Earlyclobbers move back one. + if (MO.isEarlyClobber()) + defIndex = getUseIndex(MIIdx); + VNInfo *ValNo; + MachineInstr *CopyMI = NULL; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + CopyMI = mi; + // Earlyclobbers move back one. + ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); + + assert(ValNo->id == 0 && "First value in interval is not 0?"); + + // Loop over all of the blocks that the vreg is defined in. There are + // two cases we have to handle here. The most common case is a vreg + // whose lifetime is contained within a basic block. In this case there + // will be a single kill, in MBB, which comes after the definition. + if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) { + // FIXME: what about dead vars? + unsigned killIdx; + if (vi.Kills[0] != mi) + killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1; + else + killIdx = defIndex+1; + + // If the kill happens after the definition, we have an intra-block + // live range. + if (killIdx > defIndex) { + assert(vi.AliveBlocks.empty() && + "Shouldn't be alive across any blocks!"); + LiveRange LR(defIndex, killIdx, ValNo); + interval.addRange(LR); + DOUT << " +" << LR << "\n"; + interval.addKill(ValNo, killIdx); + return; + } + } + + // The other case we handle is when a virtual register lives to the end + // of the defining block, potentially live across some blocks, then is + // live into some number of blocks, but gets killed. Start by adding a + // range that goes from this definition to the end of the defining block. + LiveRange NewLR(defIndex, getMBBEndIdx(mbb)+1, ValNo); + DOUT << " +" << NewLR; + interval.addRange(NewLR); + + // Iterate over all of the blocks that the variable is completely + // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the + // live interval. + for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), + E = vi.AliveBlocks.end(); I != E; ++I) { + LiveRange LR(getMBBStartIdx(*I), + getMBBEndIdx(*I)+1, // MBB ends at -1. + ValNo); + interval.addRange(LR); + DOUT << " +" << LR; + } + + // Finally, this virtual register is live from the start of any killing + // block to the 'use' slot of the killing instruction. + for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { + MachineInstr *Kill = vi.Kills[i]; + unsigned killIdx = getUseIndex(getInstructionIndex(Kill))+1; + LiveRange LR(getMBBStartIdx(Kill->getParent()), + killIdx, ValNo); + interval.addRange(LR); + interval.addKill(ValNo, killIdx); + DOUT << " +" << LR; + } + + } else { + // If this is the second time we see a virtual register definition, it + // must be due to phi elimination or two addr elimination. If this is + // the result of two address elimination, then the vreg is one of the + // def-and-use register operand. + if (mi->isRegTiedToUseOperand(MOIdx)) { + // If this is a two-address definition, then we have already processed + // the live range. The only problem is that we didn't realize there + // are actually two values in the live interval. Because of this we + // need to take the LiveRegion that defines this register and split it + // into two values. + assert(interval.containsOneValue()); + unsigned DefIndex = getDefIndex(interval.getValNumInfo(0)->def); + unsigned RedefIndex = getDefIndex(MIIdx); + if (MO.isEarlyClobber()) + RedefIndex = getUseIndex(MIIdx); + + const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex-1); + VNInfo *OldValNo = OldLR->valno; + + // Delete the initial value, which should be short and continuous, + // because the 2-addr copy must be in the same MBB as the redef. + interval.removeRange(DefIndex, RedefIndex); + + // Two-address vregs should always only be redefined once. This means + // that at this point, there should be exactly one value number in it. + assert(interval.containsOneValue() && "Unexpected 2-addr liveint!"); + + // The new value number (#1) is defined by the instruction we claimed + // defined value #0. + VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy, + VNInfoAllocator); + + // Value#0 is now defined by the 2-addr instruction. + OldValNo->def = RedefIndex; + OldValNo->copy = 0; + if (MO.isEarlyClobber()) + OldValNo->redefByEC = true; + + // Add the new live interval which replaces the range for the input copy. + LiveRange LR(DefIndex, RedefIndex, ValNo); + DOUT << " replace range with " << LR; + interval.addRange(LR); + interval.addKill(ValNo, RedefIndex); + + // If this redefinition is dead, we need to add a dummy unit live + // range covering the def slot. + if (MO.isDead()) + interval.addRange(LiveRange(RedefIndex, RedefIndex+1, OldValNo)); + + DOUT << " RESULT: "; + interval.print(DOUT, tri_); + + } else { + // Otherwise, this must be because of phi elimination. If this is the + // first redefinition of the vreg that we have seen, go back and change + // the live range in the PHI block to be a different value number. + if (interval.containsOneValue()) { + assert(vi.Kills.size() == 1 && + "PHI elimination vreg should have one kill, the PHI itself!"); + + // Remove the old range that we now know has an incorrect number. + VNInfo *VNI = interval.getValNumInfo(0); + MachineInstr *Killer = vi.Kills[0]; + unsigned Start = getMBBStartIdx(Killer->getParent()); + unsigned End = getUseIndex(getInstructionIndex(Killer))+1; + DOUT << " Removing [" << Start << "," << End << "] from: "; + interval.print(DOUT, tri_); DOUT << "\n"; + interval.removeRange(Start, End); + VNI->hasPHIKill = true; + DOUT << " RESULT: "; interval.print(DOUT, tri_); + + // Replace the interval with one of a NEW value number. Note that this + // value number isn't actually defined by an instruction, weird huh? :) + LiveRange LR(Start, End, interval.getNextValue(~0, 0, VNInfoAllocator)); + DOUT << " replace range with " << LR; + interval.addRange(LR); + interval.addKill(LR.valno, End); + DOUT << " RESULT: "; interval.print(DOUT, tri_); + } + + // In the case of PHI elimination, each variable definition is only + // live until the end of the block. We've already taken care of the + // rest of the live range. + unsigned defIndex = getDefIndex(MIIdx); + if (MO.isEarlyClobber()) + defIndex = getUseIndex(MIIdx); + + VNInfo *ValNo; + MachineInstr *CopyMI = NULL; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + CopyMI = mi; + ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); + + unsigned killIndex = getMBBEndIdx(mbb) + 1; + LiveRange LR(defIndex, killIndex, ValNo); + interval.addRange(LR); + interval.addKill(ValNo, killIndex); + ValNo->hasPHIKill = true; + DOUT << " +" << LR; + } + } + + DOUT << '\n'; +} + +void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, + MachineBasicBlock::iterator mi, + unsigned MIIdx, + MachineOperand& MO, + LiveInterval &interval, + MachineInstr *CopyMI) { + // A physical register cannot be live across basic block, so its + // lifetime must end somewhere in its defining basic block. + DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg)); + + unsigned baseIndex = MIIdx; + unsigned start = getDefIndex(baseIndex); + // Earlyclobbers move back one. + if (MO.isEarlyClobber()) + start = getUseIndex(MIIdx); + unsigned end = start; + + // If it is not used after definition, it is considered dead at + // the instruction defining it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + if (MO.isDead()) { + DOUT << " dead"; + end = start + 1; + goto exit; + } + + // If it is not dead on definition, it must be killed by a + // subsequent instruction. Hence its interval is: + // [defSlot(def), useSlot(kill)+1) + baseIndex += InstrSlots::NUM; + while (++mi != MBB->end()) { + while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + getInstructionFromIndex(baseIndex) == 0) + baseIndex += InstrSlots::NUM; + if (mi->killsRegister(interval.reg, tri_)) { + DOUT << " killed"; + end = getUseIndex(baseIndex) + 1; + goto exit; + } else { + int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_); + if (DefIdx != -1) { + if (mi->isRegTiedToUseOperand(DefIdx)) { + // Two-address instruction. + end = getDefIndex(baseIndex); + if (mi->getOperand(DefIdx).isEarlyClobber()) + end = getUseIndex(baseIndex); + } else { + // Another instruction redefines the register before it is ever read. + // Then the register is essentially dead at the instruction that defines + // it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + DOUT << " dead"; + end = start + 1; + } + goto exit; + } + } + + baseIndex += InstrSlots::NUM; + } + + // The only case we should have a dead physreg here without a killing or + // instruction where we know it's dead is if it is live-in to the function + // and never used. Another possible case is the implicit use of the + // physical register has been deleted by two-address pass. + end = start + 1; + +exit: + assert(start < end && "did not find end of interval?"); + + // Already exists? Extend old live interval. + LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start); + bool Extend = OldLR != interval.end(); + VNInfo *ValNo = Extend + ? OldLR->valno : interval.getNextValue(start, CopyMI, VNInfoAllocator); + if (MO.isEarlyClobber() && Extend) + ValNo->redefByEC = true; + LiveRange LR(start, end, ValNo); + interval.addRange(LR); + interval.addKill(LR.valno, end); + DOUT << " +" << LR << '\n'; +} + +void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MI, + unsigned MIIdx, + MachineOperand& MO, + unsigned MOIdx) { + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, + getOrCreateInterval(MO.getReg())); + else if (allocatableRegs_[MO.getReg()]) { + MachineInstr *CopyMI = NULL; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || + tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + CopyMI = MI; + handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, + getOrCreateInterval(MO.getReg()), CopyMI); + // Def of a register also defines its sub-registers. + for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS) + // If MI also modifies the sub-register explicitly, avoid processing it + // more than once. Do not pass in TRI here so it checks for exact match. + if (!MI->modifiesRegister(*AS)) + handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, + getOrCreateInterval(*AS), 0); + } +} + +void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, + unsigned MIIdx, + LiveInterval &interval, bool isAlias) { + DOUT << "\t\tlivein register: "; DEBUG(printRegName(interval.reg)); + + // Look for kills, if it reaches a def before it's killed, then it shouldn't + // be considered a livein. + MachineBasicBlock::iterator mi = MBB->begin(); + unsigned baseIndex = MIIdx; + unsigned start = baseIndex; + while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + getInstructionFromIndex(baseIndex) == 0) + baseIndex += InstrSlots::NUM; + unsigned end = baseIndex; + bool SeenDefUse = false; + + while (mi != MBB->end()) { + if (mi->killsRegister(interval.reg, tri_)) { + DOUT << " killed"; + end = getUseIndex(baseIndex) + 1; + SeenDefUse = true; + goto exit; + } else if (mi->modifiesRegister(interval.reg, tri_)) { + // Another instruction redefines the register before it is ever read. + // Then the register is essentially dead at the instruction that defines + // it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + DOUT << " dead"; + end = getDefIndex(start) + 1; + SeenDefUse = true; + goto exit; + } + + baseIndex += InstrSlots::NUM; + ++mi; + if (mi != MBB->end()) { + while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + getInstructionFromIndex(baseIndex) == 0) + baseIndex += InstrSlots::NUM; + } + } + +exit: + // Live-in register might not be used at all. + if (!SeenDefUse) { + if (isAlias) { + DOUT << " dead"; + end = getDefIndex(MIIdx) + 1; + } else { + DOUT << " live through"; + end = baseIndex; + } + } + + LiveRange LR(start, end, interval.getNextValue(~0U, 0, VNInfoAllocator)); + interval.addRange(LR); + interval.addKill(LR.valno, end); + DOUT << " +" << LR << '\n'; +} + +/// computeIntervals - computes the live intervals for virtual +/// registers. for some ordering of the machine instructions [1,N] a +/// live interval is an interval [i, j) where 1 <= i <= j < N for +/// which a variable is live +void LiveIntervals::computeIntervals() { + + DOUT << "********** COMPUTING LIVE INTERVALS **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'; + + for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + // Track the index of the current machine instr. + unsigned MIIndex = getMBBStartIdx(MBB); + DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n"; + + MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); + + // Create intervals for live-ins to this BB first. + for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(), + LE = MBB->livein_end(); LI != LE; ++LI) { + handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); + // Multiple live-ins can alias the same register. + for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS) + if (!hasInterval(*AS)) + handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS), + true); + } + + // Skip over empty initial indices. + while (MIIndex / InstrSlots::NUM < i2miMap_.size() && + getInstructionFromIndex(MIIndex) == 0) + MIIndex += InstrSlots::NUM; + + for (; MI != miEnd; ++MI) { + DOUT << MIIndex << "\t" << *MI; + + // Handle defs. + for (int i = MI->getNumOperands() - 1; i >= 0; --i) { + MachineOperand &MO = MI->getOperand(i); + // handle register defs - build intervals + if (MO.isReg() && MO.getReg() && MO.isDef()) { + handleRegisterDef(MBB, MI, MIIndex, MO, i); + } + } + + // Skip over the empty slots after each instruction. + unsigned Slots = MI->getDesc().getNumDefs(); + if (Slots == 0) + Slots = 1; + MIIndex += InstrSlots::NUM * Slots; + + // Skip over empty indices. + while (MIIndex / InstrSlots::NUM < i2miMap_.size() && + getInstructionFromIndex(MIIndex) == 0) + MIIndex += InstrSlots::NUM; + } + } +} + +bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End, + SmallVectorImpl<MachineBasicBlock*> &MBBs) const { + std::vector<IdxMBBPair>::const_iterator I = + std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start); + + bool ResVal = false; + while (I != Idx2MBBMap.end()) { + if (I->first >= End) + break; + MBBs.push_back(I->second); + ResVal = true; + ++I; + } + return ResVal; +} + +bool LiveIntervals::findReachableMBBs(unsigned Start, unsigned End, + SmallVectorImpl<MachineBasicBlock*> &MBBs) const { + std::vector<IdxMBBPair>::const_iterator I = + std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start); + + bool ResVal = false; + while (I != Idx2MBBMap.end()) { + if (I->first > End) + break; + MachineBasicBlock *MBB = I->second; + if (getMBBEndIdx(MBB) > End) + break; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + MBBs.push_back(*SI); + ResVal = true; + ++I; + } + return ResVal; +} + +LiveInterval* LiveIntervals::createInterval(unsigned reg) { + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F; + return new LiveInterval(reg, Weight); +} + +/// dupInterval - Duplicate a live interval. The caller is responsible for +/// managing the allocated memory. +LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { + LiveInterval *NewLI = createInterval(li->reg); + NewLI->Copy(*li, getVNInfoAllocator()); + return NewLI; +} + +/// getVNInfoSourceReg - Helper function that parses the specified VNInfo +/// copy field and returns the source register that defines it. +unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { + if (!VNI->copy) + return 0; + + if (VNI->copy->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + // If it's extracting out of a physical register, return the sub-register. + unsigned Reg = VNI->copy->getOperand(1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Reg = tri_->getSubReg(Reg, VNI->copy->getOperand(2).getImm()); + return Reg; + } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + VNI->copy->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) + return VNI->copy->getOperand(2).getReg(); + + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(*VNI->copy, SrcReg, DstReg, SrcSubReg, DstSubReg)) + return SrcReg; + assert(0 && "Unrecognized copy instruction!"); + return 0; +} + +//===----------------------------------------------------------------------===// +// Register allocator hooks. +// + +/// getReMatImplicitUse - If the remat definition MI has one (for now, we only +/// allow one) virtual register operand, then its uses are implicitly using +/// the register. Returns the virtual register. +unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, + MachineInstr *MI) const { + unsigned RegOp = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || Reg == li.reg) + continue; + // FIXME: For now, only remat MI with at most one register operand. + assert(!RegOp && + "Can't rematerialize instruction with multiple register operand!"); + RegOp = MO.getReg(); +#ifndef NDEBUG + break; +#endif + } + return RegOp; +} + +/// isValNoAvailableAt - Return true if the val# of the specified interval +/// which reaches the given instruction also reaches the specified use index. +bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, + unsigned UseIdx) const { + unsigned Index = getInstructionIndex(MI); + VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno; + LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx); + return UI != li.end() && UI->valno == ValNo; +} + +/// isReMaterializable - Returns true if the definition MI of the specified +/// val# of the specified interval is re-materializable. +bool LiveIntervals::isReMaterializable(const LiveInterval &li, + const VNInfo *ValNo, MachineInstr *MI, + SmallVectorImpl<LiveInterval*> &SpillIs, + bool &isLoad) { + if (DisableReMat) + return false; + + if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + return true; + + int FrameIdx = 0; + if (tii_->isLoadFromStackSlot(MI, FrameIdx) && + mf_->getFrameInfo()->isImmutableObjectIndex(FrameIdx)) + // FIXME: Let target specific isReallyTriviallyReMaterializable determines + // this but remember this is not safe to fold into a two-address + // instruction. + // This is a load from fixed stack slot. It can be rematerialized. + return true; + + // If the target-specific rules don't identify an instruction as + // being trivially rematerializable, use some target-independent + // rules. + if (!MI->getDesc().isRematerializable() || + !tii_->isTriviallyReMaterializable(MI)) { + if (!EnableAggressiveRemat) + return false; + + // If the instruction accesses memory but the memoperands have been lost, + // we can't analyze it. + const TargetInstrDesc &TID = MI->getDesc(); + if ((TID.mayLoad() || TID.mayStore()) && MI->memoperands_empty()) + return false; + + // Avoid instructions obviously unsafe for remat. + if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable()) + return false; + + // If the instruction accesses memory and the memory could be non-constant, + // assume the instruction is not rematerializable. + for (std::list<MachineMemOperand>::const_iterator + I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I){ + const MachineMemOperand &MMO = *I; + if (MMO.isVolatile() || MMO.isStore()) + return false; + const Value *V = MMO.getValue(); + if (!V) + return false; + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + if (!PSV->isConstant(mf_->getFrameInfo())) + return false; + } else if (!aa_->pointsToConstantMemory(V)) + return false; + } + + // If any of the registers accessed are non-constant, conservatively assume + // the instruction is not rematerializable. + unsigned ImpUse = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return false; + + // Only allow one def, and that in the first operand. + if (MO.isDef() != (i == 0)) + return false; + + // Only allow constant-valued registers. + bool IsLiveIn = mri_->isLiveIn(Reg); + MachineRegisterInfo::def_iterator I = mri_->def_begin(Reg), + E = mri_->def_end(); + + // For the def, it should be the only def of that register. + if (MO.isDef() && (next(I) != E || IsLiveIn)) + return false; + + if (MO.isUse()) { + // Only allow one use other register use, as that's all the + // remat mechanisms support currently. + if (Reg != li.reg) { + if (ImpUse == 0) + ImpUse = Reg; + else if (Reg != ImpUse) + return false; + } + // For the use, there should be only one associated def. + if (I != E && (next(I) != E || IsLiveIn)) + return false; + } + } + } + } + + unsigned ImpUse = getReMatImplicitUse(li, MI); + if (ImpUse) { + const LiveInterval &ImpLi = getInterval(ImpUse); + for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg), + re = mri_->use_end(); ri != re; ++ri) { + MachineInstr *UseMI = &*ri; + unsigned UseIdx = getInstructionIndex(UseMI); + if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo) + continue; + if (!isValNoAvailableAt(ImpLi, MI, UseIdx)) + return false; + } + + // If a register operand of the re-materialized instruction is going to + // be spilled next, then it's not legal to re-materialize this instruction. + for (unsigned i = 0, e = SpillIs.size(); i != e; ++i) + if (ImpUse == SpillIs[i]->reg) + return false; + } + return true; +} + +/// isReMaterializable - Returns true if the definition MI of the specified +/// val# of the specified interval is re-materializable. +bool LiveIntervals::isReMaterializable(const LiveInterval &li, + const VNInfo *ValNo, MachineInstr *MI) { + SmallVector<LiveInterval*, 4> Dummy1; + bool Dummy2; + return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2); +} + +/// isReMaterializable - Returns true if every definition of MI of every +/// val# of the specified interval is re-materializable. +bool LiveIntervals::isReMaterializable(const LiveInterval &li, + SmallVectorImpl<LiveInterval*> &SpillIs, + bool &isLoad) { + isLoad = false; + for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); + i != e; ++i) { + const VNInfo *VNI = *i; + unsigned DefIdx = VNI->def; + if (DefIdx == ~1U) + continue; // Dead val#. + // Is the def for the val# rematerializable? + if (DefIdx == ~0u) + return false; + MachineInstr *ReMatDefMI = getInstructionFromIndex(DefIdx); + bool DefIsLoad = false; + if (!ReMatDefMI || + !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad)) + return false; + isLoad |= DefIsLoad; + } + return true; +} + +/// FilterFoldedOps - Filter out two-address use operands. Return +/// true if it finds any issue with the operands that ought to prevent +/// folding. +static bool FilterFoldedOps(MachineInstr *MI, + SmallVector<unsigned, 2> &Ops, + unsigned &MRInfo, + SmallVector<unsigned, 2> &FoldOps) { + MRInfo = 0; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + unsigned OpIdx = Ops[i]; + MachineOperand &MO = MI->getOperand(OpIdx); + // FIXME: fold subreg use. + if (MO.getSubReg()) + return true; + if (MO.isDef()) + MRInfo |= (unsigned)VirtRegMap::isMod; + else { + // Filter out two-address use operand(s). + if (MI->isRegTiedToDefOperand(OpIdx)) { + MRInfo = VirtRegMap::isModRef; + continue; + } + MRInfo |= (unsigned)VirtRegMap::isRef; + } + FoldOps.push_back(OpIdx); + } + return false; +} + + +/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from +/// slot / to reg or any rematerialized load into ith operand of specified +/// MI. If it is successul, MI is updated with the newly created MI and +/// returns true. +bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, + VirtRegMap &vrm, MachineInstr *DefMI, + unsigned InstrIdx, + SmallVector<unsigned, 2> &Ops, + bool isSS, int Slot, unsigned Reg) { + // If it is an implicit def instruction, just delete it. + if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++numFolds; + return true; + } + + // Filter the list of operand indexes that are to be folded. Abort if + // any operand will prevent folding. + unsigned MRInfo = 0; + SmallVector<unsigned, 2> FoldOps; + if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) + return false; + + // The only time it's safe to fold into a two address instruction is when + // it's folding reload and spill from / into a spill stack slot. + if (DefMI && (MRInfo & VirtRegMap::isMod)) + return false; + + MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(*mf_, MI, FoldOps, Slot) + : tii_->foldMemoryOperand(*mf_, MI, FoldOps, DefMI); + if (fmi) { + // Remember this instruction uses the spill slot. + if (isSS) vrm.addSpillSlotUse(Slot, fmi); + + // Attempt to fold the memory reference into the instruction. If + // we can do this, we don't need to insert spill code. + MachineBasicBlock &MBB = *MI->getParent(); + if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot)) + vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo); + vrm.transferSpillPts(MI, fmi); + vrm.transferRestorePts(MI, fmi); + vrm.transferEmergencySpills(MI, fmi); + mi2iMap_.erase(MI); + i2miMap_[InstrIdx /InstrSlots::NUM] = fmi; + mi2iMap_[fmi] = InstrIdx; + MI = MBB.insert(MBB.erase(MI), fmi); + ++numFolds; + return true; + } + return false; +} + +/// canFoldMemoryOperand - Returns true if the specified load / store +/// folding is possible. +bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI, + SmallVector<unsigned, 2> &Ops, + bool ReMat) const { + // Filter the list of operand indexes that are to be folded. Abort if + // any operand will prevent folding. + unsigned MRInfo = 0; + SmallVector<unsigned, 2> FoldOps; + if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) + return false; + + // It's only legal to remat for a use, not a def. + if (ReMat && (MRInfo & VirtRegMap::isMod)) + return false; + + return tii_->canFoldMemoryOperand(MI, FoldOps); +} + +bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const { + SmallPtrSet<MachineBasicBlock*, 4> MBBs; + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + std::vector<IdxMBBPair>::const_iterator II = + std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), I->start); + if (II == Idx2MBBMap.end()) + continue; + if (I->end > II->first) // crossing a MBB. + return false; + MBBs.insert(II->second); + if (MBBs.size() > 1) + return false; + } + return true; +} + +/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of +/// interval on to-be re-materialized operands of MI) with new register. +void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, + MachineInstr *MI, unsigned NewVReg, + VirtRegMap &vrm) { + // There is an implicit use. That means one of the other operand is + // being remat'ed and the remat'ed instruction has li.reg as an + // use operand. Make sure we rewrite that as well. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (!vrm.isReMaterialized(Reg)) + continue; + MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg); + MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg); + if (UseMO) + UseMO->setReg(NewVReg); + } +} + +/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions +/// for addIntervalsForSpills to rewrite uses / defs for the given live range. +bool LiveIntervals:: +rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, + bool TrySplit, unsigned index, unsigned end, MachineInstr *MI, + MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, + unsigned Slot, int LdSlot, + bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, + VirtRegMap &vrm, + const TargetRegisterClass* rc, + SmallVector<int, 4> &ReMatIds, + const MachineLoopInfo *loopInfo, + unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse, + DenseMap<unsigned,unsigned> &MBBVRegsMap, + std::vector<LiveInterval*> &NewLIs) { + bool CanFold = false; + RestartInstruction: + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& mop = MI->getOperand(i); + if (!mop.isReg()) + continue; + unsigned Reg = mop.getReg(); + unsigned RegI = Reg; + if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (Reg != li.reg) + continue; + + bool TryFold = !DefIsReMat; + bool FoldSS = true; // Default behavior unless it's a remat. + int FoldSlot = Slot; + if (DefIsReMat) { + // If this is the rematerializable definition MI itself and + // all of its uses are rematerialized, simply delete it. + if (MI == ReMatOrigDefMI && CanDelete) { + DOUT << "\t\t\t\tErasing re-materlizable def: "; + DOUT << MI << '\n'; + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + break; + } + + // If def for this use can't be rematerialized, then try folding. + // If def is rematerializable and it's a load, also try folding. + TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad)); + if (isLoad) { + // Try fold loads (from stack slot, constant pool, etc.) into uses. + FoldSS = isLoadSS; + FoldSlot = LdSlot; + } + } + + // Scan all of the operands of this instruction rewriting operands + // to use NewVReg instead of li.reg as appropriate. We do this for + // two reasons: + // + // 1. If the instr reads the same spilled vreg multiple times, we + // want to reuse the NewVReg. + // 2. If the instr is a two-addr instruction, we are required to + // keep the src/dst regs pinned. + // + // Keep track of whether we replace a use and/or def so that we can + // create the spill interval with the appropriate range. + + HasUse = mop.isUse(); + HasDef = mop.isDef(); + SmallVector<unsigned, 2> Ops; + Ops.push_back(i); + for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) { + const MachineOperand &MOj = MI->getOperand(j); + if (!MOj.isReg()) + continue; + unsigned RegJ = MOj.getReg(); + if (RegJ == 0 || TargetRegisterInfo::isPhysicalRegister(RegJ)) + continue; + if (RegJ == RegI) { + Ops.push_back(j); + HasUse |= MOj.isUse(); + HasDef |= MOj.isDef(); + } + } + + if (HasUse && !li.liveAt(getUseIndex(index))) + // Must be defined by an implicit def. It should not be spilled. Note, + // this is for correctness reason. e.g. + // 8 %reg1024<def> = IMPLICIT_DEF + // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 + // The live range [12, 14) are not part of the r1024 live interval since + // it's defined by an implicit def. It will not conflicts with live + // interval of r1025. Now suppose both registers are spilled, you can + // easily see a situation where both registers are reloaded before + // the INSERT_SUBREG and both target registers that would overlap. + HasUse = false; + + // Create a new virtual register for the spill interval. + // Create the new register now so we can map the fold instruction + // to the new register so when it is unfolded we get the correct + // answer. + bool CreatedNewVReg = false; + if (NewVReg == 0) { + NewVReg = mri_->createVirtualRegister(rc); + vrm.grow(); + CreatedNewVReg = true; + } + + if (!TryFold) + CanFold = false; + else { + // Do not fold load / store here if we are splitting. We'll find an + // optimal point to insert a load / store later. + if (!TrySplit) { + if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, + Ops, FoldSS, FoldSlot, NewVReg)) { + // Folding the load/store can completely change the instruction in + // unpredictable ways, rescan it from the beginning. + + if (FoldSS) { + // We need to give the new vreg the same stack slot as the + // spilled interval. + vrm.assignVirt2StackSlot(NewVReg, FoldSlot); + } + + HasUse = false; + HasDef = false; + CanFold = false; + if (isNotInMIMap(MI)) + break; + goto RestartInstruction; + } + } else { + // We'll try to fold it later if it's profitable. + CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat); + } + } + + mop.setReg(NewVReg); + if (mop.isImplicit()) + rewriteImplicitOps(li, MI, NewVReg, vrm); + + // Reuse NewVReg for other reads. + for (unsigned j = 0, e = Ops.size(); j != e; ++j) { + MachineOperand &mopj = MI->getOperand(Ops[j]); + mopj.setReg(NewVReg); + if (mopj.isImplicit()) + rewriteImplicitOps(li, MI, NewVReg, vrm); + } + + if (CreatedNewVReg) { + if (DefIsReMat) { + vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI/*, CanDelete*/); + if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) { + // Each valnum may have its own remat id. + ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg); + } else { + vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]); + } + if (!CanDelete || (HasUse && HasDef)) { + // If this is a two-addr instruction then its use operands are + // rematerializable but its def is not. It should be assigned a + // stack slot. + vrm.assignVirt2StackSlot(NewVReg, Slot); + } + } else { + vrm.assignVirt2StackSlot(NewVReg, Slot); + } + } else if (HasUse && HasDef && + vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) { + // If this interval hasn't been assigned a stack slot (because earlier + // def is a deleted remat def), do it now. + assert(Slot != VirtRegMap::NO_STACK_SLOT); + vrm.assignVirt2StackSlot(NewVReg, Slot); + } + + // Re-matting an instruction with virtual register use. Add the + // register as an implicit use on the use MI. + if (DefIsReMat && ImpUse) + MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); + + // Create a new register interval for this spill / remat. + LiveInterval &nI = getOrCreateInterval(NewVReg); + if (CreatedNewVReg) { + NewLIs.push_back(&nI); + MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg)); + if (TrySplit) + vrm.setIsSplitFromReg(NewVReg, li.reg); + } + + if (HasUse) { + if (CreatedNewVReg) { + LiveRange LR(getLoadIndex(index), getUseIndex(index)+1, + nI.getNextValue(~0U, 0, VNInfoAllocator)); + DOUT << " +" << LR; + nI.addRange(LR); + } else { + // Extend the split live interval to this def / use. + unsigned End = getUseIndex(index)+1; + LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End, + nI.getValNumInfo(nI.getNumValNums()-1)); + DOUT << " +" << LR; + nI.addRange(LR); + } + } + if (HasDef) { + LiveRange LR(getDefIndex(index), getStoreIndex(index), + nI.getNextValue(~0U, 0, VNInfoAllocator)); + DOUT << " +" << LR; + nI.addRange(LR); + } + + DOUT << "\t\t\t\tAdded new interval: "; + nI.print(DOUT, tri_); + DOUT << '\n'; + } + return CanFold; +} +bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, + const VNInfo *VNI, + MachineBasicBlock *MBB, unsigned Idx) const { + unsigned End = getMBBEndIdx(MBB); + for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) { + unsigned KillIdx = VNI->kills[j]; + if (KillIdx > Idx && KillIdx < End) + return true; + } + return false; +} + +/// RewriteInfo - Keep track of machine instrs that will be rewritten +/// during spilling. +namespace { + struct RewriteInfo { + unsigned Index; + MachineInstr *MI; + bool HasUse; + bool HasDef; + RewriteInfo(unsigned i, MachineInstr *mi, bool u, bool d) + : Index(i), MI(mi), HasUse(u), HasDef(d) {} + }; + + struct RewriteInfoCompare { + bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const { + return LHS.Index < RHS.Index; + } + }; +} + +void LiveIntervals:: +rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, + LiveInterval::Ranges::const_iterator &I, + MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, + unsigned Slot, int LdSlot, + bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, + VirtRegMap &vrm, + const TargetRegisterClass* rc, + SmallVector<int, 4> &ReMatIds, + const MachineLoopInfo *loopInfo, + BitVector &SpillMBBs, + DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes, + BitVector &RestoreMBBs, + DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes, + DenseMap<unsigned,unsigned> &MBBVRegsMap, + std::vector<LiveInterval*> &NewLIs) { + bool AllCanFold = true; + unsigned NewVReg = 0; + unsigned start = getBaseIndex(I->start); + unsigned end = getBaseIndex(I->end-1) + InstrSlots::NUM; + + // First collect all the def / use in this live range that will be rewritten. + // Make sure they are sorted according to instruction index. + std::vector<RewriteInfo> RewriteMIs; + for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), + re = mri_->reg_end(); ri != re; ) { + MachineInstr *MI = &*ri; + MachineOperand &O = ri.getOperand(); + ++ri; + assert(!O.isImplicit() && "Spilling register that's used as implicit use?"); + unsigned index = getInstructionIndex(MI); + if (index < start || index >= end) + continue; + if (O.isUse() && !li.liveAt(getUseIndex(index))) + // Must be defined by an implicit def. It should not be spilled. Note, + // this is for correctness reason. e.g. + // 8 %reg1024<def> = IMPLICIT_DEF + // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 + // The live range [12, 14) are not part of the r1024 live interval since + // it's defined by an implicit def. It will not conflicts with live + // interval of r1025. Now suppose both registers are spilled, you can + // easily see a situation where both registers are reloaded before + // the INSERT_SUBREG and both target registers that would overlap. + continue; + RewriteMIs.push_back(RewriteInfo(index, MI, O.isUse(), O.isDef())); + } + std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare()); + + unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0; + // Now rewrite the defs and uses. + for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) { + RewriteInfo &rwi = RewriteMIs[i]; + ++i; + unsigned index = rwi.Index; + bool MIHasUse = rwi.HasUse; + bool MIHasDef = rwi.HasDef; + MachineInstr *MI = rwi.MI; + // If MI def and/or use the same register multiple times, then there + // are multiple entries. + unsigned NumUses = MIHasUse; + while (i != e && RewriteMIs[i].MI == MI) { + assert(RewriteMIs[i].Index == index); + bool isUse = RewriteMIs[i].HasUse; + if (isUse) ++NumUses; + MIHasUse |= isUse; + MIHasDef |= RewriteMIs[i].HasDef; + ++i; + } + MachineBasicBlock *MBB = MI->getParent(); + + if (ImpUse && MI != ReMatDefMI) { + // Re-matting an instruction with virtual register use. Update the + // register interval's spill weight to HUGE_VALF to prevent it from + // being spilled. + LiveInterval &ImpLi = getInterval(ImpUse); + ImpLi.weight = HUGE_VALF; + } + + unsigned MBBId = MBB->getNumber(); + unsigned ThisVReg = 0; + if (TrySplit) { + DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId); + if (NVI != MBBVRegsMap.end()) { + ThisVReg = NVI->second; + // One common case: + // x = use + // ... + // ... + // def = ... + // = use + // It's better to start a new interval to avoid artifically + // extend the new interval. + if (MIHasDef && !MIHasUse) { + MBBVRegsMap.erase(MBB->getNumber()); + ThisVReg = 0; + } + } + } + + bool IsNew = ThisVReg == 0; + if (IsNew) { + // This ends the previous live interval. If all of its def / use + // can be folded, give it a low spill weight. + if (NewVReg && TrySplit && AllCanFold) { + LiveInterval &nI = getOrCreateInterval(NewVReg); + nI.weight /= 10.0F; + } + AllCanFold = true; + } + NewVReg = ThisVReg; + + bool HasDef = false; + bool HasUse = false; + bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit, + index, end, MI, ReMatOrigDefMI, ReMatDefMI, + Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, + CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg, + ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs); + if (!HasDef && !HasUse) + continue; + + AllCanFold &= CanFold; + + // Update weight of spill interval. + LiveInterval &nI = getOrCreateInterval(NewVReg); + if (!TrySplit) { + // The spill weight is now infinity as it cannot be spilled again. + nI.weight = HUGE_VALF; + continue; + } + + // Keep track of the last def and first use in each MBB. + if (HasDef) { + if (MI != ReMatOrigDefMI || !CanDelete) { + bool HasKill = false; + if (!HasUse) + HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, getDefIndex(index)); + else { + // If this is a two-address code, then this index starts a new VNInfo. + const VNInfo *VNI = li.findDefinedVNInfo(getDefIndex(index)); + if (VNI) + HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, getDefIndex(index)); + } + DenseMap<unsigned, std::vector<SRInfo> >::iterator SII = + SpillIdxes.find(MBBId); + if (!HasKill) { + if (SII == SpillIdxes.end()) { + std::vector<SRInfo> S; + S.push_back(SRInfo(index, NewVReg, true)); + SpillIdxes.insert(std::make_pair(MBBId, S)); + } else if (SII->second.back().vreg != NewVReg) { + SII->second.push_back(SRInfo(index, NewVReg, true)); + } else if ((int)index > SII->second.back().index) { + // If there is an earlier def and this is a two-address + // instruction, then it's not possible to fold the store (which + // would also fold the load). + SRInfo &Info = SII->second.back(); + Info.index = index; + Info.canFold = !HasUse; + } + SpillMBBs.set(MBBId); + } else if (SII != SpillIdxes.end() && + SII->second.back().vreg == NewVReg && + (int)index > SII->second.back().index) { + // There is an earlier def that's not killed (must be two-address). + // The spill is no longer needed. + SII->second.pop_back(); + if (SII->second.empty()) { + SpillIdxes.erase(MBBId); + SpillMBBs.reset(MBBId); + } + } + } + } + + if (HasUse) { + DenseMap<unsigned, std::vector<SRInfo> >::iterator SII = + SpillIdxes.find(MBBId); + if (SII != SpillIdxes.end() && + SII->second.back().vreg == NewVReg && + (int)index > SII->second.back().index) + // Use(s) following the last def, it's not safe to fold the spill. + SII->second.back().canFold = false; + DenseMap<unsigned, std::vector<SRInfo> >::iterator RII = + RestoreIdxes.find(MBBId); + if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg) + // If we are splitting live intervals, only fold if it's the first + // use and there isn't another use later in the MBB. + RII->second.back().canFold = false; + else if (IsNew) { + // Only need a reload if there isn't an earlier def / use. + if (RII == RestoreIdxes.end()) { + std::vector<SRInfo> Infos; + Infos.push_back(SRInfo(index, NewVReg, true)); + RestoreIdxes.insert(std::make_pair(MBBId, Infos)); + } else { + RII->second.push_back(SRInfo(index, NewVReg, true)); + } + RestoreMBBs.set(MBBId); + } + } + + // Update spill weight. + unsigned loopDepth = loopInfo->getLoopDepth(MBB); + nI.weight += getSpillWeight(HasDef, HasUse, loopDepth); + } + + if (NewVReg && TrySplit && AllCanFold) { + // If all of its def / use can be folded, give it a low spill weight. + LiveInterval &nI = getOrCreateInterval(NewVReg); + nI.weight /= 10.0F; + } +} + +bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr, + BitVector &RestoreMBBs, + DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) { + if (!RestoreMBBs[Id]) + return false; + std::vector<SRInfo> &Restores = RestoreIdxes[Id]; + for (unsigned i = 0, e = Restores.size(); i != e; ++i) + if (Restores[i].index == index && + Restores[i].vreg == vr && + Restores[i].canFold) + return true; + return false; +} + +void LiveIntervals::eraseRestoreInfo(int Id, int index, unsigned vr, + BitVector &RestoreMBBs, + DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) { + if (!RestoreMBBs[Id]) + return; + std::vector<SRInfo> &Restores = RestoreIdxes[Id]; + for (unsigned i = 0, e = Restores.size(); i != e; ++i) + if (Restores[i].index == index && Restores[i].vreg) + Restores[i].index = -1; +} + +/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being +/// spilled and create empty intervals for their uses. +void +LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm, + const TargetRegisterClass* rc, + std::vector<LiveInterval*> &NewLIs) { + for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), + re = mri_->reg_end(); ri != re; ) { + MachineOperand &O = ri.getOperand(); + MachineInstr *MI = &*ri; + ++ri; + if (O.isDef()) { + assert(MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF && + "Register def was not rewritten?"); + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } else { + // This must be an use of an implicit_def so it's not part of the live + // interval. Create a new empty live interval for it. + // FIXME: Can we simply erase some of the instructions? e.g. Stores? + unsigned NewVReg = mri_->createVirtualRegister(rc); + vrm.grow(); + vrm.setIsImplicitlyDefined(NewVReg); + NewLIs.push_back(&getOrCreateInterval(NewVReg)); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == li.reg) + MO.setReg(NewVReg); + } + } + } +} + +std::vector<LiveInterval*> LiveIntervals:: +addIntervalsForSpillsFast(const LiveInterval &li, + const MachineLoopInfo *loopInfo, + VirtRegMap &vrm) { + unsigned slot = vrm.assignVirt2StackSlot(li.reg); + + std::vector<LiveInterval*> added; + + assert(li.weight != HUGE_VALF && + "attempt to spill already spilled interval!"); + + DOUT << "\t\t\t\tadding intervals for spills for interval: "; + DEBUG(li.dump()); + DOUT << '\n'; + + const TargetRegisterClass* rc = mri_->getRegClass(li.reg); + + MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg); + while (RI != mri_->reg_end()) { + MachineInstr* MI = &*RI; + + SmallVector<unsigned, 2> Indices; + bool HasUse = false; + bool HasDef = false; + + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& mop = MI->getOperand(i); + if (!mop.isReg() || mop.getReg() != li.reg) continue; + + HasUse |= MI->getOperand(i).isUse(); + HasDef |= MI->getOperand(i).isDef(); + + Indices.push_back(i); + } + + if (!tryFoldMemoryOperand(MI, vrm, NULL, getInstructionIndex(MI), + Indices, true, slot, li.reg)) { + unsigned NewVReg = mri_->createVirtualRegister(rc); + vrm.grow(); + vrm.assignVirt2StackSlot(NewVReg, slot); + + // create a new register for this spill + LiveInterval &nI = getOrCreateInterval(NewVReg); + + // the spill weight is now infinity as it + // cannot be spilled again + nI.weight = HUGE_VALF; + + // Rewrite register operands to use the new vreg. + for (SmallVectorImpl<unsigned>::iterator I = Indices.begin(), + E = Indices.end(); I != E; ++I) { + MI->getOperand(*I).setReg(NewVReg); + + if (MI->getOperand(*I).isUse()) + MI->getOperand(*I).setIsKill(true); + } + + // Fill in the new live interval. + unsigned index = getInstructionIndex(MI); + if (HasUse) { + LiveRange LR(getLoadIndex(index), getUseIndex(index), + nI.getNextValue(~0U, 0, getVNInfoAllocator())); + DOUT << " +" << LR; + nI.addRange(LR); + vrm.addRestorePoint(NewVReg, MI); + } + if (HasDef) { + LiveRange LR(getDefIndex(index), getStoreIndex(index), + nI.getNextValue(~0U, 0, getVNInfoAllocator())); + DOUT << " +" << LR; + nI.addRange(LR); + vrm.addSpillPoint(NewVReg, true, MI); + } + + added.push_back(&nI); + + DOUT << "\t\t\t\tadded new interval: "; + DEBUG(nI.dump()); + DOUT << '\n'; + } + + + RI = mri_->reg_begin(li.reg); + } + + return added; +} + +std::vector<LiveInterval*> LiveIntervals:: +addIntervalsForSpills(const LiveInterval &li, + SmallVectorImpl<LiveInterval*> &SpillIs, + const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { + + if (EnableFastSpilling) + return addIntervalsForSpillsFast(li, loopInfo, vrm); + + assert(li.weight != HUGE_VALF && + "attempt to spill already spilled interval!"); + + DOUT << "\t\t\t\tadding intervals for spills for interval: "; + li.print(DOUT, tri_); + DOUT << '\n'; + + // Each bit specify whether a spill is required in the MBB. + BitVector SpillMBBs(mf_->getNumBlockIDs()); + DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes; + BitVector RestoreMBBs(mf_->getNumBlockIDs()); + DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes; + DenseMap<unsigned,unsigned> MBBVRegsMap; + std::vector<LiveInterval*> NewLIs; + const TargetRegisterClass* rc = mri_->getRegClass(li.reg); + + unsigned NumValNums = li.getNumValNums(); + SmallVector<MachineInstr*, 4> ReMatDefs; + ReMatDefs.resize(NumValNums, NULL); + SmallVector<MachineInstr*, 4> ReMatOrigDefs; + ReMatOrigDefs.resize(NumValNums, NULL); + SmallVector<int, 4> ReMatIds; + ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT); + BitVector ReMatDelete(NumValNums); + unsigned Slot = VirtRegMap::MAX_STACK_SLOT; + + // Spilling a split live interval. It cannot be split any further. Also, + // it's also guaranteed to be a single val# / range interval. + if (vrm.getPreSplitReg(li.reg)) { + vrm.setIsSplitFromReg(li.reg, 0); + // Unset the split kill marker on the last use. + unsigned KillIdx = vrm.getKillPoint(li.reg); + if (KillIdx) { + MachineInstr *KillMI = getInstructionFromIndex(KillIdx); + assert(KillMI && "Last use disappeared?"); + int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true); + assert(KillOp != -1 && "Last use disappeared?"); + KillMI->getOperand(KillOp).setIsKill(false); + } + vrm.removeKillPoint(li.reg); + bool DefIsReMat = vrm.isReMaterialized(li.reg); + Slot = vrm.getStackSlot(li.reg); + assert(Slot != VirtRegMap::MAX_STACK_SLOT); + MachineInstr *ReMatDefMI = DefIsReMat ? + vrm.getReMaterializedMI(li.reg) : NULL; + int LdSlot = 0; + bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); + bool isLoad = isLoadSS || + (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad())); + bool IsFirstRange = true; + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + // If this is a split live interval with multiple ranges, it means there + // are two-address instructions that re-defined the value. Only the + // first def can be rematerialized! + if (IsFirstRange) { + // Note ReMatOrigDefMI has already been deleted. + rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI, + Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, + false, vrm, rc, ReMatIds, loopInfo, + SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, + MBBVRegsMap, NewLIs); + } else { + rewriteInstructionsForSpills(li, false, I, NULL, 0, + Slot, 0, false, false, false, + false, vrm, rc, ReMatIds, loopInfo, + SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, + MBBVRegsMap, NewLIs); + } + IsFirstRange = false; + } + + handleSpilledImpDefs(li, vrm, rc, NewLIs); + return NewLIs; + } + + bool TrySplit = SplitAtBB && !intervalIsInOneMBB(li); + if (SplitLimit != -1 && (int)numSplits >= SplitLimit) + TrySplit = false; + if (TrySplit) + ++numSplits; + bool NeedStackSlot = false; + for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); + i != e; ++i) { + const VNInfo *VNI = *i; + unsigned VN = VNI->id; + unsigned DefIdx = VNI->def; + if (DefIdx == ~1U) + continue; // Dead val#. + // Is the def for the val# rematerializable? + MachineInstr *ReMatDefMI = (DefIdx == ~0u) + ? 0 : getInstructionFromIndex(DefIdx); + bool dummy; + if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) { + // Remember how to remat the def of this val#. + ReMatOrigDefs[VN] = ReMatDefMI; + // Original def may be modified so we have to make a copy here. + MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI); + ClonedMIs.push_back(Clone); + ReMatDefs[VN] = Clone; + + bool CanDelete = true; + if (VNI->hasPHIKill) { + // A kill is a phi node, not all of its uses can be rematerialized. + // It must not be deleted. + CanDelete = false; + // Need a stack slot if there is any live range where uses cannot be + // rematerialized. + NeedStackSlot = true; + } + if (CanDelete) + ReMatDelete.set(VN); + } else { + // Need a stack slot if there is any live range where uses cannot be + // rematerialized. + NeedStackSlot = true; + } + } + + // One stack slot per live interval. + if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) { + if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT) + Slot = vrm.assignVirt2StackSlot(li.reg); + + // This case only occurs when the prealloc splitter has already assigned + // a stack slot to this vreg. + else + Slot = vrm.getStackSlot(li.reg); + } + + // Create new intervals and rewrite defs and uses. + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id]; + MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id]; + bool DefIsReMat = ReMatDefMI != NULL; + bool CanDelete = ReMatDelete[I->valno->id]; + int LdSlot = 0; + bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); + bool isLoad = isLoadSS || + (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad()); + rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI, + Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, + CanDelete, vrm, rc, ReMatIds, loopInfo, + SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, + MBBVRegsMap, NewLIs); + } + + // Insert spills / restores if we are splitting. + if (!TrySplit) { + handleSpilledImpDefs(li, vrm, rc, NewLIs); + return NewLIs; + } + + SmallPtrSet<LiveInterval*, 4> AddedKill; + SmallVector<unsigned, 2> Ops; + if (NeedStackSlot) { + int Id = SpillMBBs.find_first(); + while (Id != -1) { + std::vector<SRInfo> &spills = SpillIdxes[Id]; + for (unsigned i = 0, e = spills.size(); i != e; ++i) { + int index = spills[i].index; + unsigned VReg = spills[i].vreg; + LiveInterval &nI = getOrCreateInterval(VReg); + bool isReMat = vrm.isReMaterialized(VReg); + MachineInstr *MI = getInstructionFromIndex(index); + bool CanFold = false; + bool FoundUse = false; + Ops.clear(); + if (spills[i].canFold) { + CanFold = true; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg() || MO.getReg() != VReg) + continue; + + Ops.push_back(j); + if (MO.isDef()) + continue; + if (isReMat || + (!FoundUse && !alsoFoldARestore(Id, index, VReg, + RestoreMBBs, RestoreIdxes))) { + // MI has two-address uses of the same register. If the use + // isn't the first and only use in the BB, then we can't fold + // it. FIXME: Move this to rewriteInstructionsForSpills. + CanFold = false; + break; + } + FoundUse = true; + } + } + // Fold the store into the def if possible. + bool Folded = false; + if (CanFold && !Ops.empty()) { + if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){ + Folded = true; + if (FoundUse) { + // Also folded uses, do not issue a load. + eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes); + nI.removeRange(getLoadIndex(index), getUseIndex(index)+1); + } + nI.removeRange(getDefIndex(index), getStoreIndex(index)); + } + } + + // Otherwise tell the spiller to issue a spill. + if (!Folded) { + LiveRange *LR = &nI.ranges[nI.ranges.size()-1]; + bool isKill = LR->end == getStoreIndex(index); + if (!MI->registerDefIsDead(nI.reg)) + // No need to spill a dead def. + vrm.addSpillPoint(VReg, isKill, MI); + if (isKill) + AddedKill.insert(&nI); + } + } + Id = SpillMBBs.find_next(Id); + } + } + + int Id = RestoreMBBs.find_first(); + while (Id != -1) { + std::vector<SRInfo> &restores = RestoreIdxes[Id]; + for (unsigned i = 0, e = restores.size(); i != e; ++i) { + int index = restores[i].index; + if (index == -1) + continue; + unsigned VReg = restores[i].vreg; + LiveInterval &nI = getOrCreateInterval(VReg); + bool isReMat = vrm.isReMaterialized(VReg); + MachineInstr *MI = getInstructionFromIndex(index); + bool CanFold = false; + Ops.clear(); + if (restores[i].canFold) { + CanFold = true; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg() || MO.getReg() != VReg) + continue; + + if (MO.isDef()) { + // If this restore were to be folded, it would have been folded + // already. + CanFold = false; + break; + } + Ops.push_back(j); + } + } + + // Fold the load into the use if possible. + bool Folded = false; + if (CanFold && !Ops.empty()) { + if (!isReMat) + Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg); + else { + MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg); + int LdSlot = 0; + bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); + // If the rematerializable def is a load, also try to fold it. + if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad()) + Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, + Ops, isLoadSS, LdSlot, VReg); + if (!Folded) { + unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI); + if (ImpUse) { + // Re-matting an instruction with virtual register use. Add the + // register as an implicit use on the use MI and update the register + // interval's spill weight to HUGE_VALF to prevent it from being + // spilled. + LiveInterval &ImpLi = getInterval(ImpUse); + ImpLi.weight = HUGE_VALF; + MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); + } + } + } + } + // If folding is not possible / failed, then tell the spiller to issue a + // load / rematerialization for us. + if (Folded) + nI.removeRange(getLoadIndex(index), getUseIndex(index)+1); + else + vrm.addRestorePoint(VReg, MI); + } + Id = RestoreMBBs.find_next(Id); + } + + // Finalize intervals: add kills, finalize spill weights, and filter out + // dead intervals. + std::vector<LiveInterval*> RetNewLIs; + for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) { + LiveInterval *LI = NewLIs[i]; + if (!LI->empty()) { + LI->weight /= InstrSlots::NUM * getApproximateInstructionCount(*LI); + if (!AddedKill.count(LI)) { + LiveRange *LR = &LI->ranges[LI->ranges.size()-1]; + unsigned LastUseIdx = getBaseIndex(LR->end); + MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx); + int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false); + assert(UseIdx != -1); + if (!LastUse->isRegTiedToDefOperand(UseIdx)) { + LastUse->getOperand(UseIdx).setIsKill(); + vrm.addKillPoint(LI->reg, LastUseIdx); + } + } + RetNewLIs.push_back(LI); + } + } + + handleSpilledImpDefs(li, vrm, rc, RetNewLIs); + return RetNewLIs; +} + +/// hasAllocatableSuperReg - Return true if the specified physical register has +/// any super register that's allocatable. +bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const { + for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) + if (allocatableRegs_[*AS] && hasInterval(*AS)) + return true; + return false; +} + +/// getRepresentativeReg - Find the largest super register of the specified +/// physical register. +unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const { + // Find the largest super-register that is allocatable. + unsigned BestReg = Reg; + for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) { + unsigned SuperReg = *AS; + if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) { + BestReg = SuperReg; + break; + } + } + return BestReg; +} + +/// getNumConflictsWithPhysReg - Return the number of uses and defs of the +/// specified interval that conflicts with the specified physical register. +unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li, + unsigned PhysReg) const { + unsigned NumConflicts = 0; + const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg)); + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), + E = mri_->reg_end(); I != E; ++I) { + MachineOperand &O = I.getOperand(); + MachineInstr *MI = O.getParent(); + unsigned Index = getInstructionIndex(MI); + if (pli.liveAt(Index)) + ++NumConflicts; + } + return NumConflicts; +} + +/// spillPhysRegAroundRegDefsUses - Spill the specified physical register +/// around all defs and uses of the specified interval. Return true if it +/// was able to cut its interval. +bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, + unsigned PhysReg, VirtRegMap &vrm) { + unsigned SpillReg = getRepresentativeReg(PhysReg); + + for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS) + // If there are registers which alias PhysReg, but which are not a + // sub-register of the chosen representative super register. Assert + // since we can't handle it yet. + assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) || + tri_->isSuperRegister(*AS, SpillReg)); + + bool Cut = false; + LiveInterval &pli = getInterval(SpillReg); + SmallPtrSet<MachineInstr*, 8> SeenMIs; + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), + E = mri_->reg_end(); I != E; ++I) { + MachineOperand &O = I.getOperand(); + MachineInstr *MI = O.getParent(); + if (SeenMIs.count(MI)) + continue; + SeenMIs.insert(MI); + unsigned Index = getInstructionIndex(MI); + if (pli.liveAt(Index)) { + vrm.addEmergencySpill(SpillReg, MI); + unsigned StartIdx = getLoadIndex(Index); + unsigned EndIdx = getStoreIndex(Index)+1; + if (pli.isInOneLiveRange(StartIdx, EndIdx)) { + pli.removeRange(StartIdx, EndIdx); + Cut = true; + } else { + cerr << "Ran out of registers during register allocation!\n"; + if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { + cerr << "Please check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(cerr.stream(), tm_); + } + exit(1); + } + for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) { + if (!hasInterval(*AS)) + continue; + LiveInterval &spli = getInterval(*AS); + if (spli.liveAt(Index)) + spli.removeRange(getLoadIndex(Index), getStoreIndex(Index)+1); + } + } + } + return Cut; +} + +LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, + MachineInstr* startInst) { + LiveInterval& Interval = getOrCreateInterval(reg); + VNInfo* VN = Interval.getNextValue( + getInstructionIndex(startInst) + InstrSlots::DEF, + startInst, getVNInfoAllocator()); + VN->hasPHIKill = true; + VN->kills.push_back(getMBBEndIdx(startInst->getParent())); + LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF, + getMBBEndIdx(startInst->getParent()) + 1, VN); + Interval.addRange(LR); + + return LR; +} diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp new file mode 100644 index 0000000..86f7ea2 --- /dev/null +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -0,0 +1,66 @@ +//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the live stack slot analysis pass. It is analogous to +// live interval analysis except it's analyzing liveness of stack slots rather +// than registers. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "livestacks" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include <limits> +using namespace llvm; + +char LiveStacks::ID = 0; +static RegisterPass<LiveStacks> X("livestacks", "Live Stack Slot Analysis"); + +void LiveStacks::scaleNumbering(int factor) { + // Scale the intervals. + for (iterator LI = begin(), LE = end(); LI != LE; ++LI) { + LI->second.scaleNumbering(factor); + } +} + +void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void LiveStacks::releaseMemory() { + // Release VNInfo memroy regions after all VNInfo objects are dtor'd. + VNInfoAllocator.Reset(); + S2IMap.clear(); + S2RCMap.clear(); +} + +bool LiveStacks::runOnMachineFunction(MachineFunction &) { + // FIXME: No analysis is being done right now. We are relying on the + // register allocators to provide the information. + return false; +} + +/// print - Implement the dump method. +void LiveStacks::print(std::ostream &O, const Module*) const { + O << "********** INTERVALS **********\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) { + I->second.print(O); + int Slot = I->first; + const TargetRegisterClass *RC = getIntervalRegClass(Slot); + if (RC) + O << " [" << RC->getName() << "]\n"; + else + O << " [Unknown]\n"; + } +} diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp new file mode 100644 index 0000000..c33d81e --- /dev/null +++ b/lib/CodeGen/LiveVariables.cpp @@ -0,0 +1,695 @@ +//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveVariable analysis pass. For each machine +// instruction in the function, this pass calculates the set of registers that +// are immediately dead after the instruction (i.e., the instruction calculates +// the value, but it is never used) and the set of registers that are used by +// the instruction, but are never used after the instruction (i.e., they are +// killed). +// +// This class computes live variables using are sparse implementation based on +// the machine code SSA form. This class computes live variable information for +// each virtual and _register allocatable_ physical register in a function. It +// uses the dominance properties of SSA form to efficiently compute live +// variables for virtual registers, and assumes that physical registers are only +// live within a single basic block (allowing it to do a single local analysis +// to resolve physical register lifetimes in each basic block). If a physical +// register is not register allocatable, it is not tracked. This is useful for +// things like the stack pointer and condition codes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/alloca.h" +#include <algorithm> +using namespace llvm; + +char LiveVariables::ID = 0; +static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis"); + + +void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(UnreachableMachineBlockElimID); + AU.setPreservesAll(); +} + +void LiveVariables::VarInfo::dump() const { + cerr << " Alive in blocks: "; + for (SparseBitVector<>::iterator I = AliveBlocks.begin(), + E = AliveBlocks.end(); I != E; ++I) + cerr << *I << ", "; + cerr << "\n Killed by:"; + if (Kills.empty()) + cerr << " No instructions.\n"; + else { + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + cerr << "\n #" << i << ": " << *Kills[i]; + cerr << "\n"; + } +} + +/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. +LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { + assert(TargetRegisterInfo::isVirtualRegister(RegIdx) && + "getVarInfo: not a virtual register!"); + RegIdx -= TargetRegisterInfo::FirstVirtualRegister; + if (RegIdx >= VirtRegInfo.size()) { + if (RegIdx >= 2*VirtRegInfo.size()) + VirtRegInfo.resize(RegIdx*2); + else + VirtRegInfo.resize(2*VirtRegInfo.size()); + } + return VirtRegInfo[RegIdx]; +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, + MachineBasicBlock *DefBlock, + MachineBasicBlock *MBB, + std::vector<MachineBasicBlock*> &WorkList) { + unsigned BBNum = MBB->getNumber(); + + // Check to see if this basic block is one of the killing blocks. If so, + // remove it. + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + if (VRInfo.Kills[i]->getParent() == MBB) { + VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry + break; + } + + if (MBB == DefBlock) return; // Terminate recursion + + if (VRInfo.AliveBlocks.test(BBNum)) + return; // We already know the block is live + + // Mark the variable known alive in this bb + VRInfo.AliveBlocks.set(BBNum); + + for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(), + E = MBB->pred_rend(); PI != E; ++PI) + WorkList.push_back(*PI); +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, + MachineBasicBlock *DefBlock, + MachineBasicBlock *MBB) { + std::vector<MachineBasicBlock*> WorkList; + MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList); + + while (!WorkList.empty()) { + MachineBasicBlock *Pred = WorkList.back(); + WorkList.pop_back(); + MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList); + } +} + +void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, + MachineInstr *MI) { + assert(MRI->getVRegDef(reg) && "Register use before def!"); + + unsigned BBNum = MBB->getNumber(); + + VarInfo& VRInfo = getVarInfo(reg); + VRInfo.NumUses++; + + // Check to see if this basic block is already a kill block. + if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) { + // Yes, this register is killed in this basic block already. Increase the + // live range by updating the kill instruction. + VRInfo.Kills.back() = MI; + return; + } + +#ifndef NDEBUG + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!"); +#endif + + // This situation can occur: + // + // ,------. + // | | + // | v + // | t2 = phi ... t1 ... + // | | + // | v + // | t1 = ... + // | ... = ... t1 ... + // | | + // `------' + // + // where there is a use in a PHI node that's a predecessor to the defining + // block. We don't want to mark all predecessors as having the value "alive" + // in this case. + if (MBB == MRI->getVRegDef(reg)->getParent()) return; + + // Add a new kill entry for this basic block. If this virtual register is + // already marked as alive in this basic block, that means it is alive in at + // least one of the successor blocks, it's not a kill. + if (!VRInfo.AliveBlocks.test(BBNum)) + VRInfo.Kills.push_back(MI); + + // Update all dominating blocks to mark them as "known live". + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + E = MBB->pred_end(); PI != E; ++PI) + MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI); +} + +void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) { + VarInfo &VRInfo = getVarInfo(Reg); + + if (VRInfo.AliveBlocks.empty()) + // If vr is not alive in any block, then defaults to dead. + VRInfo.Kills.push_back(MI); +} + +/// FindLastPartialDef - Return the last partial def of the specified register. +/// Also returns the sub-register that's defined. +MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, + unsigned &PartDefReg) { + unsigned LastDefReg = 0; + unsigned LastDefDist = 0; + MachineInstr *LastDef = NULL; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + MachineInstr *Def = PhysRegDef[SubReg]; + if (!Def) + continue; + unsigned Dist = DistanceMap[Def]; + if (Dist > LastDefDist) { + LastDefReg = SubReg; + LastDef = Def; + LastDefDist = Dist; + } + } + PartDefReg = LastDefReg; + return LastDef; +} + +/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add +/// implicit defs to a machine instruction if there was an earlier def of its +/// super-register. +void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { + // If there was a previous use or a "full" def all is well. + if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) { + // Otherwise, the last sub-register def implicitly defines this register. + // e.g. + // AH = + // AL = ... <imp-def EAX>, <imp-kill AH> + // = AH + // ... + // = EAX + // All of the sub-registers must have been defined before the use of Reg! + unsigned PartDefReg = 0; + MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefReg); + // If LastPartialDef is NULL, it must be using a livein register. + if (LastPartialDef) { + LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[Reg] = LastPartialDef; + SmallSet<unsigned, 8> Processed; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (Processed.count(SubReg)) + continue; + if (SubReg == PartDefReg || TRI->isSubRegister(PartDefReg, SubReg)) + continue; + // This part of Reg was defined before the last partial def. It's killed + // here. + LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg, + false/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[SubReg] = LastPartialDef; + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + Processed.insert(*SS); + } + } + } + + // There was an earlier def of a super-register. Add implicit def to that MI. + // + // A: EAX = ... + // B: ... = AX + // + // Add implicit def to A if there isn't a use of AX (or EAX) before B. + if (!PhysRegUse[Reg]) { + MachineInstr *Def = PhysRegDef[Reg]; + if (Def && !Def->modifiesRegister(Reg)) + Def->addOperand(MachineOperand::CreateReg(Reg, + true /*IsDef*/, + true /*IsImp*/)); + } + + // Remember this use. + PhysRegUse[Reg] = MI; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + PhysRegUse[SubReg] = MI; +} + +/// hasRegisterUseBelow - Return true if the specified register is used after +/// the current instruction and before it's next definition. +bool LiveVariables::hasRegisterUseBelow(unsigned Reg, + MachineBasicBlock::iterator I, + MachineBasicBlock *MBB) { + if (I == MBB->end()) + return false; + + // First find out if there are any uses / defs below. + bool hasDistInfo = true; + unsigned CurDist = DistanceMap[I]; + SmallVector<MachineInstr*, 4> Uses; + SmallVector<MachineInstr*, 4> Defs; + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), + RE = MRI->reg_end(); RI != RE; ++RI) { + MachineOperand &UDO = RI.getOperand(); + MachineInstr *UDMI = &*RI; + if (UDMI->getParent() != MBB) + continue; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI); + bool isBelow = false; + if (DI == DistanceMap.end()) { + // Must be below if it hasn't been assigned a distance yet. + isBelow = true; + hasDistInfo = false; + } else if (DI->second > CurDist) + isBelow = true; + if (isBelow) { + if (UDO.isUse()) + Uses.push_back(UDMI); + if (UDO.isDef()) + Defs.push_back(UDMI); + } + } + + if (Uses.empty()) + // No uses below. + return false; + else if (!Uses.empty() && Defs.empty()) + // There are uses below but no defs below. + return true; + // There are both uses and defs below. We need to know which comes first. + if (!hasDistInfo) { + // Complete DistanceMap for this MBB. This information is computed only + // once per MBB. + ++I; + ++CurDist; + for (MachineBasicBlock::iterator E = MBB->end(); I != E; ++I, ++CurDist) + DistanceMap.insert(std::make_pair(I, CurDist)); + } + + unsigned EarliestUse = DistanceMap[Uses[0]]; + for (unsigned i = 1, e = Uses.size(); i != e; ++i) { + unsigned Dist = DistanceMap[Uses[i]]; + if (Dist < EarliestUse) + EarliestUse = Dist; + } + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Dist = DistanceMap[Defs[i]]; + if (Dist < EarliestUse) + // The register is defined before its first use below. + return false; + } + return true; +} + +bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { + if (!PhysRegUse[Reg] && !PhysRegDef[Reg]) + return false; + + MachineInstr *LastRefOrPartRef = PhysRegUse[Reg] + ? PhysRegUse[Reg] : PhysRegDef[Reg]; + unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; + // The whole register is used. + // AL = + // AH = + // + // = AX + // = AL, AX<imp-use, kill> + // AX = + // + // Or whole register is defined, but not used at all. + // AX<dead> = + // ... + // AX = + // + // Or whole register is defined, but only partly used. + // AX<dead> = AL<imp-def> + // = AL<kill> + // AX = + SmallSet<unsigned, 8> PartUses; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (MachineInstr *Use = PhysRegUse[SubReg]) { + PartUses.insert(SubReg); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + PartUses.insert(*SS); + unsigned Dist = DistanceMap[Use]; + if (Dist > LastRefOrPartRefDist) { + LastRefOrPartRefDist = Dist; + LastRefOrPartRef = Use; + } + } + } + + if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) + // If the last reference is the last def, then it's not used at all. + // That is, unless we are currently processing the last reference itself. + LastRefOrPartRef->addRegisterDead(Reg, TRI, true); + + /* Partial uses. Mark register def dead and add implicit def of + sub-registers which are used. + FIXME: LiveIntervalAnalysis can't handle this yet! + EAX<dead> = op AL<imp-def> + That is, EAX def is dead but AL def extends pass it. + Enable this after live interval analysis is fixed to improve codegen! + else if (!PhysRegUse[Reg]) { + PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (PartUses.count(SubReg)) { + PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg, + true, true)); + LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + PartUses.erase(*SS); + } + } + } */ + else + LastRefOrPartRef->addRegisterKilled(Reg, TRI, true); + return true; +} + +void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) { + // What parts of the register are previously defined? + SmallSet<unsigned, 32> Live; + if (PhysRegDef[Reg] || PhysRegUse[Reg]) { + Live.insert(Reg); + for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) + Live.insert(*SS); + } else { + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + // If a register isn't itself defined, but all parts that make up of it + // are defined, then consider it also defined. + // e.g. + // AL = + // AH = + // = AX + if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { + Live.insert(SubReg); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + Live.insert(*SS); + } + } + } + + // Start from the largest piece, find the last time any part of the register + // is referenced. + if (!HandlePhysRegKill(Reg, MI)) { + // Only some of the sub-registers are used. + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (!Live.count(SubReg)) + // Skip if this sub-register isn't defined. + continue; + if (HandlePhysRegKill(SubReg, MI)) { + Live.erase(SubReg); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + Live.erase(*SS); + } + } + assert(Live.empty() && "Not all defined registers are killed / dead?"); + } + + if (MI) { + // Does this extend the live range of a super-register? + SmallSet<unsigned, 8> Processed; + for (const unsigned *SuperRegs = TRI->getSuperRegisters(Reg); + unsigned SuperReg = *SuperRegs; ++SuperRegs) { + if (Processed.count(SuperReg)) + continue; + MachineInstr *LastRef = PhysRegUse[SuperReg] + ? PhysRegUse[SuperReg] : PhysRegDef[SuperReg]; + if (LastRef && LastRef != MI) { + // The larger register is previously defined. Now a smaller part is + // being re-defined. Treat it as read/mod/write if there are uses + // below. + // EAX = + // AX = EAX<imp-use,kill>, EAX<imp-def> + // ... + /// = EAX + if (hasRegisterUseBelow(SuperReg, MI, MI->getParent())) { + MI->addOperand(MachineOperand::CreateReg(SuperReg, false/*IsDef*/, + true/*IsImp*/,true/*IsKill*/)); + MI->addOperand(MachineOperand::CreateReg(SuperReg, true/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[SuperReg] = MI; + PhysRegUse[SuperReg] = NULL; + Processed.insert(SuperReg); + for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) { + PhysRegDef[*SS] = MI; + PhysRegUse[*SS] = NULL; + Processed.insert(*SS); + } + } else { + // Otherwise, the super register is killed. + if (HandlePhysRegKill(SuperReg, MI)) { + PhysRegDef[SuperReg] = NULL; + PhysRegUse[SuperReg] = NULL; + for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) { + PhysRegDef[*SS] = NULL; + PhysRegUse[*SS] = NULL; + Processed.insert(*SS); + } + } + } + } + } + + // Remember this def. + PhysRegDef[Reg] = MI; + PhysRegUse[Reg] = NULL; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + PhysRegDef[SubReg] = MI; + PhysRegUse[SubReg] = NULL; + } + } +} + +bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + MRI = &mf.getRegInfo(); + TRI = MF->getTarget().getRegisterInfo(); + + ReservedRegisters = TRI->getReservedRegs(mf); + + unsigned NumRegs = TRI->getNumRegs(); + PhysRegDef = new MachineInstr*[NumRegs]; + PhysRegUse = new MachineInstr*[NumRegs]; + PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()]; + std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); + std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + + /// Get some space for a respectable number of registers. + VirtRegInfo.resize(64); + + analyzePHINodes(mf); + + // Calculate live variable information in depth first order on the CFG of the + // function. This guarantees that we will see the definition of a virtual + // register before its uses due to dominance properties of SSA (except for PHI + // nodes, which are treated as a special case). + MachineBasicBlock *Entry = MF->begin(); + SmallPtrSet<MachineBasicBlock*,16> Visited; + + for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + MachineBasicBlock *MBB = *DFI; + + // Mark live-in registers as live-in. + for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(), + EE = MBB->livein_end(); II != EE; ++II) { + assert(TargetRegisterInfo::isPhysicalRegister(*II) && + "Cannot have a live-in virtual register!"); + HandlePhysRegDef(*II, 0); + } + + // Loop over all of the instructions, processing them. + DistanceMap.clear(); + unsigned Dist = 0; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + MachineInstr *MI = I; + DistanceMap.insert(std::make_pair(MI, Dist++)); + + // Process all of the operands of the instruction... + unsigned NumOperandsToProcess = MI->getNumOperands(); + + // Unless it is a PHI node. In this case, ONLY process the DEF, not any + // of the uses. They will be handled in other basic blocks. + if (MI->getOpcode() == TargetInstrInfo::PHI) + NumOperandsToProcess = 1; + + SmallVector<unsigned, 4> UseRegs; + SmallVector<unsigned, 4> DefRegs; + for (unsigned i = 0; i != NumOperandsToProcess; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) + UseRegs.push_back(MOReg); + if (MO.isDef()) + DefRegs.push_back(MOReg); + } + + // Process all uses. + for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { + unsigned MOReg = UseRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegUse(MOReg, MBB, MI); + else if (!ReservedRegisters[MOReg]) + HandlePhysRegUse(MOReg, MI); + } + + // Process all defs. + for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { + unsigned MOReg = DefRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegDef(MOReg, MI); + else if (!ReservedRegisters[MOReg]) + HandlePhysRegDef(MOReg, MI); + } + } + + // Handle any virtual assignments from PHI nodes which might be at the + // bottom of this basic block. We check all of our successor blocks to see + // if they have PHI nodes, and if so, we simulate an assignment at the end + // of the current block. + if (!PHIVarInfo[MBB->getNumber()].empty()) { + SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()]; + + for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(), + E = VarInfoVec.end(); I != E; ++I) + // Mark it alive only in the block we are representing. + MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), + MBB); + } + + // Finally, if the last instruction in the block is a return, make sure to + // mark it as using all of the live-out values in the function. + if (!MBB->empty() && MBB->back().getDesc().isReturn()) { + MachineInstr *Ret = &MBB->back(); + + for (MachineRegisterInfo::liveout_iterator + I = MF->getRegInfo().liveout_begin(), + E = MF->getRegInfo().liveout_end(); I != E; ++I) { + assert(TargetRegisterInfo::isPhysicalRegister(*I) && + "Cannot have a live-out virtual register!"); + HandlePhysRegUse(*I, Ret); + + // Add live-out registers as implicit uses. + if (!Ret->readsRegister(*I)) + Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); + } + } + + // Loop over PhysRegDef / PhysRegUse, killing any registers that are + // available at the end of the basic block. + for (unsigned i = 0; i != NumRegs; ++i) + if (PhysRegDef[i] || PhysRegUse[i]) + HandlePhysRegDef(i, 0); + + std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); + std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + } + + // Convert and transfer the dead / killed information we have gathered into + // VirtRegInfo onto MI's. + for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) + for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j) + if (VirtRegInfo[i].Kills[j] == + MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister)) + VirtRegInfo[i] + .Kills[j]->addRegisterDead(i + + TargetRegisterInfo::FirstVirtualRegister, + TRI); + else + VirtRegInfo[i] + .Kills[j]->addRegisterKilled(i + + TargetRegisterInfo::FirstVirtualRegister, + TRI); + + // Check to make sure there are no unreachable blocks in the MC CFG for the + // function. If so, it is due to a bug in the instruction selector or some + // other part of the code generator if this happens. +#ifndef NDEBUG + for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) + assert(Visited.count(&*i) != 0 && "unreachable basic block found"); +#endif + + delete[] PhysRegDef; + delete[] PhysRegUse; + delete[] PHIVarInfo; + + return false; +} + +/// replaceKillInstruction - Update register kill info by replacing a kill +/// instruction with a new one. +void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI, + MachineInstr *NewMI) { + VarInfo &VI = getVarInfo(Reg); + std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI); +} + +/// removeVirtualRegistersKilled - Remove all killed info for the specified +/// instruction. +void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isKill()) { + MO.setIsKill(false); + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + bool removed = getVarInfo(Reg).removeKill(MI); + assert(removed && "kill not in register's VarInfo?"); + removed = true; + } + } + } +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the variable information of a virtual register +/// which is used in a PHI node. We map that to the BB the vreg is coming from. +/// +void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { + for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] + .push_back(BBI->getOperand(i).getReg()); +} diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp new file mode 100644 index 0000000..14acb71 --- /dev/null +++ b/lib/CodeGen/LowerSubregs.cpp @@ -0,0 +1,292 @@ +//===-- LowerSubregs.cpp - Subregister Lowering instruction pass ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a MachineFunction pass which runs after register +// allocation that turns subreg insert/extract instructions into register +// copies, as needed. This ensures correct codegen even if the coalescer +// isn't able to remove all subreg instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowersubregs" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN LowerSubregsInstructionPass + : public MachineFunctionPass { + static char ID; // Pass identification, replacement for typeid + LowerSubregsInstructionPass() : MachineFunctionPass(&ID) {} + + const char *getPassName() const { + return "Subregister lowering instruction pass"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - pass entry point + bool runOnMachineFunction(MachineFunction&); + + bool LowerExtract(MachineInstr *MI); + bool LowerInsert(MachineInstr *MI); + bool LowerSubregToReg(MachineInstr *MI); + + void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, + const TargetRegisterInfo &TRI); + void TransferKillFlag(MachineInstr *MI, unsigned SrcReg, + const TargetRegisterInfo &TRI); + }; + + char LowerSubregsInstructionPass::ID = 0; +} + +FunctionPass *llvm::createLowerSubregsPass() { + return new LowerSubregsInstructionPass(); +} + +/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, +/// and the lowered replacement instructions immediately precede it. +/// Mark the replacement instructions with the dead flag. +void +LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, + unsigned DstReg, + const TargetRegisterInfo &TRI) { + for (MachineBasicBlock::iterator MII = + prior(MachineBasicBlock::iterator(MI)); ; --MII) { + if (MII->addRegisterDead(DstReg, &TRI)) + break; + assert(MII != MI->getParent()->begin() && + "copyRegToReg output doesn't reference destination register!"); + } +} + +/// TransferKillFlag - MI is a pseudo-instruction with SrcReg killed, +/// and the lowered replacement instructions immediately precede it. +/// Mark the replacement instructions with the kill flag. +void +LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI, + unsigned SrcReg, + const TargetRegisterInfo &TRI) { + for (MachineBasicBlock::iterator MII = + prior(MachineBasicBlock::iterator(MI)); ; --MII) { + if (MII->addRegisterKilled(SrcReg, &TRI)) + break; + assert(MII != MI->getParent()->begin() && + "copyRegToReg output doesn't reference source register!"); + } +} + +bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + MI->getOperand(1).isReg() && MI->getOperand(1).isUse() && + MI->getOperand(2).isImm() && "Malformed extract_subreg"); + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SuperReg = MI->getOperand(1).getReg(); + unsigned SubIdx = MI->getOperand(2).getImm(); + unsigned SrcReg = TRI.getSubReg(SuperReg, SubIdx); + + assert(TargetRegisterInfo::isPhysicalRegister(SuperReg) && + "Extract supperg source must be a physical register"); + assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + "Extract destination must be in a physical register"); + + DOUT << "subreg: CONVERTING: " << *MI; + + if (SrcReg == DstReg) { + // No need to insert an identify copy instruction. + DOUT << "subreg: eliminated!"; + // Find the kill of the destination register's live range, and insert + // a kill of the source register at that point. + if (MI->getOperand(1).isKill() && !MI->getOperand(0).isDead()) + for (MachineBasicBlock::iterator MII = + next(MachineBasicBlock::iterator(MI)); + MII != MBB->end(); ++MII) + if (MII->killsRegister(DstReg, &TRI)) { + MII->addRegisterKilled(SuperReg, &TRI, /*AddIfNotFound=*/true); + break; + } + } else { + // Insert copy + const TargetRegisterClass *TRC = TRI.getPhysicalRegisterRegClass(DstReg); + assert(TRC == TRI.getPhysicalRegisterRegClass(SrcReg) && + "Extract subreg and Dst must be of same register class"); + TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRC, TRC); + // Transfer the kill/dead flags, if needed. + if (MI->getOperand(0).isDead()) + TransferDeadFlag(MI, DstReg, TRI); + if (MI->getOperand(1).isKill()) + TransferKillFlag(MI, SrcReg, TRI); + +#ifndef NDEBUG + MachineBasicBlock::iterator dMI = MI; + DOUT << "subreg: " << *(--dMI); +#endif + } + + DOUT << "\n"; + MBB->erase(MI); + return true; +} + +bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) && + MI->getOperand(1).isImm() && + (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && + MI->getOperand(3).isImm() && "Invalid subreg_to_reg"); + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned InsReg = MI->getOperand(2).getReg(); + unsigned InsSIdx = MI->getOperand(2).getSubReg(); + unsigned SubIdx = MI->getOperand(3).getImm(); + + assert(SubIdx != 0 && "Invalid index for insert_subreg"); + unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx); + + assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + "Insert destination must be in a physical register"); + assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && + "Inserted value must be in a physical register"); + + DOUT << "subreg: CONVERTING: " << *MI; + + if (DstSubReg == InsReg && InsSIdx == 0) { + // No need to insert an identify copy instruction. + // Watch out for case like this: + // %RAX<def> = ... + // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3 + // The first def is defining RAX, not EAX so the top bits were not + // zero extended. + DOUT << "subreg: eliminated!"; + } else { + // Insert sub-register copy + const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); + const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg); + TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + // Transfer the kill/dead flags, if needed. + if (MI->getOperand(0).isDead()) + TransferDeadFlag(MI, DstSubReg, TRI); + if (MI->getOperand(2).isKill()) + TransferKillFlag(MI, InsReg, TRI); + +#ifndef NDEBUG + MachineBasicBlock::iterator dMI = MI; + DOUT << "subreg: " << *(--dMI); +#endif + } + + DOUT << "\n"; + MBB->erase(MI); + return true; +} + +bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) && + (MI->getOperand(1).isReg() && MI->getOperand(1).isUse()) && + (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && + MI->getOperand(3).isImm() && "Invalid insert_subreg"); + + unsigned DstReg = MI->getOperand(0).getReg(); +#ifndef NDEBUG + unsigned SrcReg = MI->getOperand(1).getReg(); +#endif + unsigned InsReg = MI->getOperand(2).getReg(); + unsigned SubIdx = MI->getOperand(3).getImm(); + + assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?"); + assert(SubIdx != 0 && "Invalid index for insert_subreg"); + unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx); + + assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && + "Insert superreg source must be in a physical register"); + assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && + "Inserted value must be in a physical register"); + + DOUT << "subreg: CONVERTING: " << *MI; + + if (DstSubReg == InsReg) { + // No need to insert an identify copy instruction. + DOUT << "subreg: eliminated!"; + } else { + // Insert sub-register copy + const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); + const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg); + TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + // Transfer the kill/dead flags, if needed. + if (MI->getOperand(0).isDead()) + TransferDeadFlag(MI, DstSubReg, TRI); + if (MI->getOperand(1).isKill()) + TransferKillFlag(MI, InsReg, TRI); + +#ifndef NDEBUG + MachineBasicBlock::iterator dMI = MI; + DOUT << "subreg: " << *(--dMI); +#endif + } + + DOUT << "\n"; + MBB->erase(MI); + return true; +} + +/// runOnMachineFunction - Reduce subregister inserts and extracts to register +/// copies. +/// +bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { + DOUT << "Machine Function\n"; + + bool MadeChange = false; + + DOUT << "********** LOWERING SUBREG INSTRS **********\n"; + DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + + for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + mbbi != mbbe; ++mbbi) { + for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + mi != me;) { + MachineInstr *MI = mi++; + + if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + MadeChange |= LowerExtract(MI); + } else if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + MadeChange |= LowerInsert(MI); + } else if (MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + MadeChange |= LowerSubregToReg(MI); + } + } + } + + return MadeChange; +} diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp new file mode 100644 index 0000000..4332627 --- /dev/null +++ b/lib/CodeGen/MachOWriter.cpp @@ -0,0 +1,976 @@ +//===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the target-independent Mach-O writer. This file writes +// out the Mach-O file in the following order: +// +// #1 FatHeader (universal-only) +// #2 FatArch (universal-only, 1 per universal arch) +// Per arch: +// #3 Header +// #4 Load Commands +// #5 Sections +// #6 Relocations +// #7 Symbols +// #8 Strings +// +//===----------------------------------------------------------------------===// + +#include "MachOWriter.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/FileWriters.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/OutputBuffer.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstring> +using namespace llvm; + +/// AddMachOWriter - Concrete function to add the Mach-O writer to the function +/// pass manager. +MachineCodeEmitter *llvm::AddMachOWriter(PassManagerBase &PM, + raw_ostream &O, + TargetMachine &TM) { + MachOWriter *MOW = new MachOWriter(O, TM); + PM.add(MOW); + return &MOW->getMachineCodeEmitter(); +} + +//===----------------------------------------------------------------------===// +// MachOCodeEmitter Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code + /// for functions to the Mach-O file. + class MachOCodeEmitter : public MachineCodeEmitter { + MachOWriter &MOW; + + /// Target machine description. + TargetMachine &TM; + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating what header values and flags to set. + bool is64Bit, isLittleEndian; + + /// Relocations - These are the relocations that the function needs, as + /// emitted. + std::vector<MachineRelocation> Relocations; + + /// CPLocations - This is a map of constant pool indices to offsets from the + /// start of the section for that constant pool index. + std::vector<uintptr_t> CPLocations; + + /// CPSections - This is a map of constant pool indices to the MachOSection + /// containing the constant pool entry for that index. + std::vector<unsigned> CPSections; + + /// JTLocations - This is a map of jump table indices to offsets from the + /// start of the section for that jump table index. + std::vector<uintptr_t> JTLocations; + + /// MBBLocations - This vector is a mapping from MBB ID's to their address. + /// It is filled in by the StartMachineBasicBlock callback and queried by + /// the getMachineBasicBlockAddress callback. + std::vector<uintptr_t> MBBLocations; + + public: + MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) { + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + } + + virtual void startFunction(MachineFunction &MF); + virtual bool finishFunction(MachineFunction &MF); + + virtual void addRelocation(const MachineRelocation &MR) { + Relocations.push_back(MR); + } + + void emitConstantPool(MachineConstantPool *MCP); + void emitJumpTables(MachineJumpTableInfo *MJTI); + + virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { + assert(CPLocations.size() > Index && "CP not emitted!"); + return CPLocations[Index]; + } + virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { + assert(JTLocations.size() > Index && "JT not emitted!"); + return JTLocations[Index]; + } + + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); + } + + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; + } + + virtual uintptr_t getLabelAddress(uint64_t Label) const { + assert(0 && "get Label not implemented"); + abort(); + return 0; + } + + virtual void emitLabel(uint64_t LabelID) { + assert(0 && "emit Label not implemented"); + abort(); + } + + + virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { } + + /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! + virtual void startGVStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment = 1) { + assert(0 && "JIT specific function called!"); + abort(); + } + virtual void startGVStub(const GlobalValue* F, void *Buffer, + unsigned StubSize) { + assert(0 && "JIT specific function called!"); + abort(); + } + virtual void *finishGVStub(const GlobalValue* F) { + assert(0 && "JIT specific function called!"); + abort(); + return 0; + } + }; +} + +/// startFunction - This callback is invoked when a new machine function is +/// about to be emitted. +void MachOCodeEmitter::startFunction(MachineFunction &MF) { + const TargetData *TD = TM.getTargetData(); + const Function *F = MF.getFunction(); + + // Align the output buffer to the appropriate alignment, power of 2. + unsigned FnAlign = F->getAlignment(); + unsigned TDAlign = TD->getPrefTypeAlignment(F->getType()); + unsigned Align = Log2_32(std::max(FnAlign, TDAlign)); + assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); + + // Get the Mach-O Section that this function belongs in. + MachOWriter::MachOSection *MOS = MOW.getTextSection(); + + // FIXME: better memory management + MOS->SectionData.reserve(4096); + BufferBegin = &MOS->SectionData[0]; + BufferEnd = BufferBegin + MOS->SectionData.capacity(); + + // Upgrade the section alignment if required. + if (MOS->align < Align) MOS->align = Align; + + // Round the size up to the correct alignment for starting the new function. + if ((MOS->size & ((1 << Align) - 1)) != 0) { + MOS->size += (1 << Align); + MOS->size &= ~((1 << Align) - 1); + } + + // FIXME: Using MOS->size directly here instead of calculating it from the + // output buffer size (impossible because the code emitter deals only in raw + // bytes) forces us to manually synchronize size and write padding zero bytes + // to the output buffer for all non-text sections. For text sections, we do + // not synchonize the output buffer, and we just blow up if anyone tries to + // write non-code to it. An assert should probably be added to + // AddSymbolToSection to prevent calling it on the text section. + CurBufferPtr = BufferBegin + MOS->size; + + // Clear per-function data structures. + CPLocations.clear(); + CPSections.clear(); + JTLocations.clear(); + MBBLocations.clear(); +} + +/// finishFunction - This callback is invoked after the function is completely +/// finished. +bool MachOCodeEmitter::finishFunction(MachineFunction &MF) { + // Get the Mach-O Section that this function belongs in. + MachOWriter::MachOSection *MOS = MOW.getTextSection(); + + // Get a symbol for the function to add to the symbol table + // FIXME: it seems like we should call something like AddSymbolToSection + // in startFunction rather than changing the section size and symbol n_value + // here. + const GlobalValue *FuncV = MF.getFunction(); + MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TM); + FnSym.n_value = MOS->size; + MOS->size = CurBufferPtr - BufferBegin; + + // Emit constant pool to appropriate section(s) + emitConstantPool(MF.getConstantPool()); + + // Emit jump tables to appropriate section + emitJumpTables(MF.getJumpTableInfo()); + + // If we have emitted any relocations to function-specific objects such as + // basic blocks, constant pools entries, or jump tables, record their + // addresses now so that we can rewrite them with the correct addresses + // later. + for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { + MachineRelocation &MR = Relocations[i]; + intptr_t Addr; + + if (MR.isBasicBlock()) { + Addr = getMachineBasicBlockAddress(MR.getBasicBlock()); + MR.setConstantVal(MOS->Index); + MR.setResultPointer((void*)Addr); + } else if (MR.isJumpTableIndex()) { + Addr = getJumpTableEntryAddress(MR.getJumpTableIndex()); + MR.setConstantVal(MOW.getJumpTableSection()->Index); + MR.setResultPointer((void*)Addr); + } else if (MR.isConstantPoolIndex()) { + Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex()); + MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]); + MR.setResultPointer((void*)Addr); + } else if (MR.isGlobalValue()) { + // FIXME: This should be a set or something that uniques + MOW.PendingGlobals.push_back(MR.getGlobalValue()); + } else { + assert(0 && "Unhandled relocation type"); + } + MOS->Relocations.push_back(MR); + } + Relocations.clear(); + + // Finally, add it to the symtab. + MOW.SymbolTable.push_back(FnSym); + return false; +} + +/// emitConstantPool - For each constant pool entry, figure out which section +/// the constant should live in, allocate space for it, and emit it to the +/// Section data buffer. +void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { + const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); + if (CP.empty()) return; + + // FIXME: handle PIC codegen + assert(TM.getRelocationModel() != Reloc::PIC_ && + "PIC codegen not yet handled for mach-o jump tables!"); + + // Although there is no strict necessity that I am aware of, we will do what + // gcc for OS X does and put each constant pool entry in a section of constant + // objects of a certain size. That means that float constants go in the + // literal4 section, and double objects go in literal8, etc. + // + // FIXME: revisit this decision if we ever do the "stick everything into one + // "giant object for PIC" optimization. + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + const Type *Ty = CP[i].getType(); + unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); + + MachOWriter::MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal); + OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + + CPLocations.push_back(Sec->SectionData.size()); + CPSections.push_back(Sec->Index); + + // FIXME: remove when we have unified size + output buffer + Sec->size += Size; + + // Allocate space in the section for the global. + // FIXME: need alignment? + // FIXME: share between here and AddSymbolToSection? + for (unsigned j = 0; j < Size; ++j) + SecDataOut.outbyte(0); + + MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i], + TM.getTargetData(), Sec->Relocations); + } +} + +/// emitJumpTables - Emit all the jump tables for a given jump table info +/// record to the appropriate section. +void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + // FIXME: handle PIC codegen + assert(TM.getRelocationModel() != Reloc::PIC_ && + "PIC codegen not yet handled for mach-o jump tables!"); + + MachOWriter::MachOSection *Sec = MOW.getJumpTableSection(); + unsigned TextSecIndex = MOW.getTextSection()->Index; + OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + // For each jump table, record its offset from the start of the section, + // reserve space for the relocations to the MBBs, and add the relocations. + const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs; + JTLocations.push_back(Sec->SectionData.size()); + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { + MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(), + MBBs[mi])); + MR.setResultPointer((void *)JTLocations[i]); + MR.setConstantVal(TextSecIndex); + Sec->Relocations.push_back(MR); + SecDataOut.outaddr(0); + } + } + // FIXME: remove when we have unified size + output buffer + Sec->size = Sec->SectionData.size(); +} + +//===----------------------------------------------------------------------===// +// MachOWriter Implementation +//===----------------------------------------------------------------------===// + +char MachOWriter::ID = 0; +MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm) + : MachineFunctionPass(&ID), O(o), TM(tm) { + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + + // Create the machine code emitter object for this target. + MCE = new MachOCodeEmitter(*this); +} + +MachOWriter::~MachOWriter() { + delete MCE; +} + +void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { + const Type *Ty = GV->getType()->getElementType(); + unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = TM.getTargetData()->getPreferredAlignment(GV); + + // Reserve space in the .bss section for this symbol while maintaining the + // desired section alignment, which must be at least as much as required by + // this symbol. + OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + + if (Align) { + uint64_t OrigSize = Sec->size; + Align = Log2_32(Align); + Sec->align = std::max(unsigned(Sec->align), Align); + Sec->size = (Sec->size + Align - 1) & ~(Align-1); + + // Add alignment padding to buffer as well. + // FIXME: remove when we have unified size + output buffer + unsigned AlignedSize = Sec->size - OrigSize; + for (unsigned i = 0; i < AlignedSize; ++i) + SecDataOut.outbyte(0); + } + // Globals without external linkage apparently do not go in the symbol table. + if (!GV->hasLocalLinkage()) { + MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TM); + Sym.n_value = Sec->size; + SymbolTable.push_back(Sym); + } + + // Record the offset of the symbol, and then allocate space for it. + // FIXME: remove when we have unified size + output buffer + Sec->size += Size; + + // Now that we know what section the GlovalVariable is going to be emitted + // into, update our mappings. + // FIXME: We may also need to update this when outputting non-GlobalVariable + // GlobalValues such as functions. + GVSection[GV] = Sec; + GVOffset[GV] = Sec->SectionData.size(); + + // Allocate space in the section for the global. + for (unsigned i = 0; i < Size; ++i) + SecDataOut.outbyte(0); +} + +void MachOWriter::EmitGlobal(GlobalVariable *GV) { + const Type *Ty = GV->getType()->getElementType(); + unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); + bool NoInit = !GV->hasInitializer(); + + // If this global has a zero initializer, it is part of the .bss or common + // section. + if (NoInit || GV->getInitializer()->isNullValue()) { + // If this global is part of the common block, add it now. Variables are + // part of the common block if they are zero initialized and allowed to be + // merged with other symbols. + if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || + GV->hasCommonLinkage()) { + MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), MachOSym::NO_SECT,TM); + // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in + // bytes of the symbol. + ExtOrCommonSym.n_value = Size; + SymbolTable.push_back(ExtOrCommonSym); + // Remember that we've seen this symbol + GVOffset[GV] = Size; + return; + } + // Otherwise, this symbol is part of the .bss section. + MachOSection *BSS = getBSSSection(); + AddSymbolToSection(BSS, GV); + return; + } + + // Scalar read-only data goes in a literal section if the scalar is 4, 8, or + // 16 bytes, or a cstring. Other read only data goes into a regular const + // section. Read-write data goes in the data section. + MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) : + getDataSection(); + AddSymbolToSection(Sec, GV); + InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV], + TM.getTargetData(), Sec->Relocations); +} + + +bool MachOWriter::runOnMachineFunction(MachineFunction &MF) { + // Nothing to do here, this is all done through the MCE object. + return false; +} + +bool MachOWriter::doInitialization(Module &M) { + // Set the magic value, now that we know the pointer size and endianness + Header.setMagic(isLittleEndian, is64Bit); + + // Set the file type + // FIXME: this only works for object files, we do not support the creation + // of dynamic libraries or executables at this time. + Header.filetype = MachOHeader::MH_OBJECT; + + Mang = new Mangler(M); + return false; +} + +/// doFinalization - Now that the module has been completely processed, emit +/// the Mach-O file to 'O'. +bool MachOWriter::doFinalization(Module &M) { + // FIXME: we don't handle debug info yet, we should probably do that. + + // Okay, the.text section has been completed, build the .data, .bss, and + // "common" sections next. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + EmitGlobal(I); + + // Emit the header and load commands. + EmitHeaderAndLoadCommands(); + + // Emit the various sections and their relocation info. + EmitSections(); + + // Write the symbol table and the string table to the end of the file. + O.write((char*)&SymT[0], SymT.size()); + O.write((char*)&StrT[0], StrT.size()); + + // We are done with the abstract symbols. + SectionList.clear(); + SymbolTable.clear(); + DynamicSymbolTable.clear(); + + // Release the name mangler object. + delete Mang; Mang = 0; + return false; +} + +void MachOWriter::EmitHeaderAndLoadCommands() { + // Step #0: Fill in the segment load command size, since we need it to figure + // out the rest of the header fields + MachOSegment SEG("", is64Bit); + SEG.nsects = SectionList.size(); + SEG.cmdsize = SEG.cmdSize(is64Bit) + + SEG.nsects * SectionList[0]->cmdSize(is64Bit); + + // Step #1: calculate the number of load commands. We always have at least + // one, for the LC_SEGMENT load command, plus two for the normal + // and dynamic symbol tables, if there are any symbols. + Header.ncmds = SymbolTable.empty() ? 1 : 3; + + // Step #2: calculate the size of the load commands + Header.sizeofcmds = SEG.cmdsize; + if (!SymbolTable.empty()) + Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize; + + // Step #3: write the header to the file + // Local alias to shortenify coming code. + DataBuffer &FH = Header.HeaderData; + OutputBuffer FHOut(FH, is64Bit, isLittleEndian); + + FHOut.outword(Header.magic); + FHOut.outword(TM.getMachOWriterInfo()->getCPUType()); + FHOut.outword(TM.getMachOWriterInfo()->getCPUSubType()); + FHOut.outword(Header.filetype); + FHOut.outword(Header.ncmds); + FHOut.outword(Header.sizeofcmds); + FHOut.outword(Header.flags); + if (is64Bit) + FHOut.outword(Header.reserved); + + // Step #4: Finish filling in the segment load command and write it out + for (std::vector<MachOSection*>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) + SEG.filesize += (*I)->size; + + SEG.vmsize = SEG.filesize; + SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds; + + FHOut.outword(SEG.cmd); + FHOut.outword(SEG.cmdsize); + FHOut.outstring(SEG.segname, 16); + FHOut.outaddr(SEG.vmaddr); + FHOut.outaddr(SEG.vmsize); + FHOut.outaddr(SEG.fileoff); + FHOut.outaddr(SEG.filesize); + FHOut.outword(SEG.maxprot); + FHOut.outword(SEG.initprot); + FHOut.outword(SEG.nsects); + FHOut.outword(SEG.flags); + + // Step #5: Finish filling in the fields of the MachOSections + uint64_t currentAddr = 0; + for (std::vector<MachOSection*>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + MachOSection *MOS = *I; + MOS->addr = currentAddr; + MOS->offset = currentAddr + SEG.fileoff; + + // FIXME: do we need to do something with alignment here? + currentAddr += MOS->size; + } + + // Step #6: Emit the symbol table to temporary buffers, so that we know the + // size of the string table when we write the next load command. This also + // sorts and assigns indices to each of the symbols, which is necessary for + // emitting relocations to externally-defined objects. + BufferSymbolAndStringTable(); + + // Step #7: Calculate the number of relocations for each section and write out + // the section commands for each section + currentAddr += SEG.fileoff; + for (std::vector<MachOSection*>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + MachOSection *MOS = *I; + // Convert the relocations to target-specific relocations, and fill in the + // relocation offset for this section. + CalculateRelocations(*MOS); + MOS->reloff = MOS->nreloc ? currentAddr : 0; + currentAddr += MOS->nreloc * 8; + + // write the finalized section command to the output buffer + FHOut.outstring(MOS->sectname, 16); + FHOut.outstring(MOS->segname, 16); + FHOut.outaddr(MOS->addr); + FHOut.outaddr(MOS->size); + FHOut.outword(MOS->offset); + FHOut.outword(MOS->align); + FHOut.outword(MOS->reloff); + FHOut.outword(MOS->nreloc); + FHOut.outword(MOS->flags); + FHOut.outword(MOS->reserved1); + FHOut.outword(MOS->reserved2); + if (is64Bit) + FHOut.outword(MOS->reserved3); + } + + // Step #8: Emit LC_SYMTAB/LC_DYSYMTAB load commands + SymTab.symoff = currentAddr; + SymTab.nsyms = SymbolTable.size(); + SymTab.stroff = SymTab.symoff + SymT.size(); + SymTab.strsize = StrT.size(); + FHOut.outword(SymTab.cmd); + FHOut.outword(SymTab.cmdsize); + FHOut.outword(SymTab.symoff); + FHOut.outword(SymTab.nsyms); + FHOut.outword(SymTab.stroff); + FHOut.outword(SymTab.strsize); + + // FIXME: set DySymTab fields appropriately + // We should probably just update these in BufferSymbolAndStringTable since + // thats where we're partitioning up the different kinds of symbols. + FHOut.outword(DySymTab.cmd); + FHOut.outword(DySymTab.cmdsize); + FHOut.outword(DySymTab.ilocalsym); + FHOut.outword(DySymTab.nlocalsym); + FHOut.outword(DySymTab.iextdefsym); + FHOut.outword(DySymTab.nextdefsym); + FHOut.outword(DySymTab.iundefsym); + FHOut.outword(DySymTab.nundefsym); + FHOut.outword(DySymTab.tocoff); + FHOut.outword(DySymTab.ntoc); + FHOut.outword(DySymTab.modtaboff); + FHOut.outword(DySymTab.nmodtab); + FHOut.outword(DySymTab.extrefsymoff); + FHOut.outword(DySymTab.nextrefsyms); + FHOut.outword(DySymTab.indirectsymoff); + FHOut.outword(DySymTab.nindirectsyms); + FHOut.outword(DySymTab.extreloff); + FHOut.outword(DySymTab.nextrel); + FHOut.outword(DySymTab.locreloff); + FHOut.outword(DySymTab.nlocrel); + + O.write((char*)&FH[0], FH.size()); +} + +/// EmitSections - Now that we have constructed the file header and load +/// commands, emit the data for each section to the file. +void MachOWriter::EmitSections() { + for (std::vector<MachOSection*>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) + // Emit the contents of each section + O.write((char*)&(*I)->SectionData[0], (*I)->size); + for (std::vector<MachOSection*>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) + // Emit the relocation entry data for each section. + O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size()); +} + +/// PartitionByLocal - Simple boolean predicate that returns true if Sym is +/// a local symbol rather than an external symbol. +bool MachOWriter::PartitionByLocal(const MachOSym &Sym) { + return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0; +} + +/// PartitionByDefined - Simple boolean predicate that returns true if Sym is +/// defined in this module. +bool MachOWriter::PartitionByDefined(const MachOSym &Sym) { + // FIXME: Do N_ABS or N_INDR count as defined? + return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT; +} + +/// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them +/// each a string table index so that they appear in the correct order in the +/// output file. +void MachOWriter::BufferSymbolAndStringTable() { + // The order of the symbol table is: + // 1. local symbols + // 2. defined external symbols (sorted by name) + // 3. undefined external symbols (sorted by name) + + // Before sorting the symbols, check the PendingGlobals for any undefined + // globals that need to be put in the symbol table. + for (std::vector<GlobalValue*>::iterator I = PendingGlobals.begin(), + E = PendingGlobals.end(); I != E; ++I) { + if (GVOffset[*I] == 0 && GVSection[*I] == 0) { + MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TM); + SymbolTable.push_back(UndfSym); + GVOffset[*I] = -1; + } + } + + // Sort the symbols by name, so that when we partition the symbols by scope + // of definition, we won't have to sort by name within each partition. + std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSymCmp()); + + // Parition the symbol table entries so that all local symbols come before + // all symbols with external linkage. { 1 | 2 3 } + std::partition(SymbolTable.begin(), SymbolTable.end(), PartitionByLocal); + + // Advance iterator to beginning of external symbols and partition so that + // all external symbols defined in this module come before all external + // symbols defined elsewhere. { 1 | 2 | 3 } + for (std::vector<MachOSym>::iterator I = SymbolTable.begin(), + E = SymbolTable.end(); I != E; ++I) { + if (!PartitionByLocal(*I)) { + std::partition(I, E, PartitionByDefined); + break; + } + } + + // Calculate the starting index for each of the local, extern defined, and + // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB + // load command. + for (std::vector<MachOSym>::iterator I = SymbolTable.begin(), + E = SymbolTable.end(); I != E; ++I) { + if (PartitionByLocal(*I)) { + ++DySymTab.nlocalsym; + ++DySymTab.iextdefsym; + ++DySymTab.iundefsym; + } else if (PartitionByDefined(*I)) { + ++DySymTab.nextdefsym; + ++DySymTab.iundefsym; + } else { + ++DySymTab.nundefsym; + } + } + + // Write out a leading zero byte when emitting string table, for n_strx == 0 + // which means an empty string. + OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian); + StrTOut.outbyte(0); + + // The order of the string table is: + // 1. strings for external symbols + // 2. strings for local symbols + // Since this is the opposite order from the symbol table, which we have just + // sorted, we can walk the symbol table backwards to output the string table. + for (std::vector<MachOSym>::reverse_iterator I = SymbolTable.rbegin(), + E = SymbolTable.rend(); I != E; ++I) { + if (I->GVName == "") { + I->n_strx = 0; + } else { + I->n_strx = StrT.size(); + StrTOut.outstring(I->GVName, I->GVName.length()+1); + } + } + + OutputBuffer SymTOut(SymT, is64Bit, isLittleEndian); + + unsigned index = 0; + for (std::vector<MachOSym>::iterator I = SymbolTable.begin(), + E = SymbolTable.end(); I != E; ++I, ++index) { + // Add the section base address to the section offset in the n_value field + // to calculate the full address. + // FIXME: handle symbols where the n_value field is not the address + GlobalValue *GV = const_cast<GlobalValue*>(I->GV); + if (GV && GVSection[GV]) + I->n_value += GVSection[GV]->addr; + if (GV && (GVOffset[GV] == -1)) + GVOffset[GV] = index; + + // Emit nlist to buffer + SymTOut.outword(I->n_strx); + SymTOut.outbyte(I->n_type); + SymTOut.outbyte(I->n_sect); + SymTOut.outhalf(I->n_desc); + SymTOut.outaddr(I->n_value); + } +} + +/// CalculateRelocations - For each MachineRelocation in the current section, +/// calculate the index of the section containing the object to be relocated, +/// and the offset into that section. From this information, create the +/// appropriate target-specific MachORelocation type and add buffer it to be +/// written out after we are finished writing out sections. +void MachOWriter::CalculateRelocations(MachOSection &MOS) { + for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) { + MachineRelocation &MR = MOS.Relocations[i]; + unsigned TargetSection = MR.getConstantVal(); + unsigned TargetAddr = 0; + unsigned TargetIndex = 0; + + // This is a scattered relocation entry if it points to a global value with + // a non-zero offset. + bool Scattered = false; + bool Extern = false; + + // Since we may not have seen the GlobalValue we were interested in yet at + // the time we emitted the relocation for it, fix it up now so that it + // points to the offset into the correct section. + if (MR.isGlobalValue()) { + GlobalValue *GV = MR.getGlobalValue(); + MachOSection *MOSPtr = GVSection[GV]; + intptr_t Offset = GVOffset[GV]; + + // If we have never seen the global before, it must be to a symbol + // defined in another module (N_UNDF). + if (!MOSPtr) { + // FIXME: need to append stub suffix + Extern = true; + TargetAddr = 0; + TargetIndex = GVOffset[GV]; + } else { + Scattered = TargetSection != 0; + TargetSection = MOSPtr->Index; + } + MR.setResultPointer((void*)Offset); + } + + // If the symbol is locally defined, pass in the address of the section and + // the section index to the code which will generate the target relocation. + if (!Extern) { + MachOSection &To = *SectionList[TargetSection - 1]; + TargetAddr = To.addr; + TargetIndex = To.Index; + } + + OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian); + OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian); + + MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex, + RelocOut, SecOut, Scattered, Extern); + } +} + +// InitMem - Write the value of a Constant to the specified memory location, +// converting it into bytes and relocations. +void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, + const TargetData *TD, + std::vector<MachineRelocation> &MRs) { + typedef std::pair<const Constant*, intptr_t> CPair; + std::vector<CPair> WorkList; + + WorkList.push_back(CPair(C,(intptr_t)Addr + Offset)); + + intptr_t ScatteredOffset = 0; + + while (!WorkList.empty()) { + const Constant *PC = WorkList.back().first; + intptr_t PA = WorkList.back().second; + WorkList.pop_back(); + + if (isa<UndefValue>(PC)) { + continue; + } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(PC)) { + unsigned ElementSize = + TD->getTypeAllocSize(CP->getType()->getElementType()); + for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) + WorkList.push_back(CPair(CP->getOperand(i), PA+i*ElementSize)); + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(PC)) { + // + // FIXME: Handle ConstantExpression. See EE::getConstantValue() + // + switch (CE->getOpcode()) { + case Instruction::GetElementPtr: { + SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end()); + ScatteredOffset = TD->getIndexedOffset(CE->getOperand(0)->getType(), + &Indices[0], Indices.size()); + WorkList.push_back(CPair(CE->getOperand(0), PA)); + break; + } + case Instruction::Add: + default: + cerr << "ConstantExpr not handled as global var init: " << *CE << "\n"; + abort(); + break; + } + } else if (PC->getType()->isSingleValueType()) { + uint8_t *ptr = (uint8_t *)PA; + switch (PC->getType()->getTypeID()) { + case Type::IntegerTyID: { + unsigned NumBits = cast<IntegerType>(PC->getType())->getBitWidth(); + uint64_t val = cast<ConstantInt>(PC)->getZExtValue(); + if (NumBits <= 8) + ptr[0] = val; + else if (NumBits <= 16) { + if (TD->isBigEndian()) + val = ByteSwap_16(val); + ptr[0] = val; + ptr[1] = val >> 8; + } else if (NumBits <= 32) { + if (TD->isBigEndian()) + val = ByteSwap_32(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + } else if (NumBits <= 64) { + if (TD->isBigEndian()) + val = ByteSwap_64(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + ptr[4] = val >> 32; + ptr[5] = val >> 40; + ptr[6] = val >> 48; + ptr[7] = val >> 56; + } else { + assert(0 && "Not implemented: bit widths > 64"); + } + break; + } + case Type::FloatTyID: { + uint32_t val = cast<ConstantFP>(PC)->getValueAPF().bitcastToAPInt(). + getZExtValue(); + if (TD->isBigEndian()) + val = ByteSwap_32(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + break; + } + case Type::DoubleTyID: { + uint64_t val = cast<ConstantFP>(PC)->getValueAPF().bitcastToAPInt(). + getZExtValue(); + if (TD->isBigEndian()) + val = ByteSwap_64(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + ptr[4] = val >> 32; + ptr[5] = val >> 40; + ptr[6] = val >> 48; + ptr[7] = val >> 56; + break; + } + case Type::PointerTyID: + if (isa<ConstantPointerNull>(PC)) + memset(ptr, 0, TD->getPointerSize()); + else if (const GlobalValue* GV = dyn_cast<GlobalValue>(PC)) { + // FIXME: what about function stubs? + MRs.push_back(MachineRelocation::getGV(PA-(intptr_t)Addr, + MachineRelocation::VANILLA, + const_cast<GlobalValue*>(GV), + ScatteredOffset)); + ScatteredOffset = 0; + } else + assert(0 && "Unknown constant pointer type!"); + break; + default: + cerr << "ERROR: Constant unimp for type: " << *PC->getType() << "\n"; + abort(); + } + } else if (isa<ConstantAggregateZero>(PC)) { + memset((void*)PA, 0, (size_t)TD->getTypeAllocSize(PC->getType())); + } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(PC)) { + unsigned ElementSize = + TD->getTypeAllocSize(CPA->getType()->getElementType()); + for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) + WorkList.push_back(CPair(CPA->getOperand(i), PA+i*ElementSize)); + } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(PC)) { + const StructLayout *SL = + TD->getStructLayout(cast<StructType>(CPS->getType())); + for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) + WorkList.push_back(CPair(CPS->getOperand(i), + PA+SL->getElementOffset(i))); + } else { + cerr << "Bad Type: " << *PC->getType() << "\n"; + assert(0 && "Unknown constant type to initialize memory with!"); + } + } +} + +MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, + TargetMachine &TM) : + GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect), + n_desc(0), n_value(0) { + + const TargetAsmInfo *TAI = TM.getTargetAsmInfo(); + + switch (GV->getLinkage()) { + default: + assert(0 && "Unexpected linkage type!"); + break; + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::CommonLinkage: + assert(!isa<Function>(gv) && "Unexpected linkage type for Function!"); + case GlobalValue::ExternalLinkage: + GVName = TAI->getGlobalPrefix() + name; + n_type |= GV->hasHiddenVisibility() ? N_PEXT : N_EXT; + break; + case GlobalValue::PrivateLinkage: + GVName = TAI->getPrivateGlobalPrefix() + name; + break; + case GlobalValue::InternalLinkage: + GVName = TAI->getGlobalPrefix() + name; + break; + } +} diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h new file mode 100644 index 0000000..6ab66ee --- /dev/null +++ b/lib/CodeGen/MachOWriter.h @@ -0,0 +1,629 @@ +//=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOWriter class. +// +//===----------------------------------------------------------------------===// + +#ifndef MACHOWRITER_H +#define MACHOWRITER_H + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetMachOWriterInfo.h" +#include <map> + +namespace llvm { + class GlobalVariable; + class Mangler; + class MachineCodeEmitter; + class MachOCodeEmitter; + class OutputBuffer; + class raw_ostream; + + /// MachOSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct MachOSym { + const GlobalValue *GV; // The global value this corresponds to. + std::string GVName; // The mangled name of the global value. + uint32_t n_strx; // index into the string table + uint8_t n_type; // type flag + uint8_t n_sect; // section number or NO_SECT + int16_t n_desc; // see <mach-o/stab.h> + uint64_t n_value; // value for this symbol (or stab offset) + + // Constants for the n_sect field + // see <mach-o/nlist.h> + enum { NO_SECT = 0 }; // symbol is not in any section + + // Constants for the n_type field + // see <mach-o/nlist.h> + enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT + N_ABS = 0x2, // absolute, n_sect == NO_SECT + N_SECT = 0xe, // defined in section number n_sect + N_PBUD = 0xc, // prebound undefined (defined in a dylib) + N_INDR = 0xa // indirect + }; + // The following bits are OR'd into the types above. For example, a type + // of 0x0f would be an external N_SECT symbol (0x0e | 0x01). + enum { N_EXT = 0x01, // external symbol bit + N_PEXT = 0x10 // private external symbol bit + }; + + // Constants for the n_desc field + // see <mach-o/loader.h> + enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, + REFERENCE_FLAG_UNDEFINED_LAZY = 1, + REFERENCE_FLAG_DEFINED = 2, + REFERENCE_FLAG_PRIVATE_DEFINED = 3, + REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, + REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5 + }; + enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped + N_WEAK_REF = 0x0040, // symbol is weak referenced + N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition + }; + + MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, + TargetMachine &TM); + }; + + /// MachOWriter - This class implements the common target-independent code for + /// writing Mach-O files. Targets should derive a class from this to + /// parameterize the output format. + /// + class MachOWriter : public MachineFunctionPass { + friend class MachOCodeEmitter; + public: + static char ID; + MachineCodeEmitter &getMachineCodeEmitter() const { + return *(MachineCodeEmitter*)MCE; + } + + MachOWriter(raw_ostream &O, TargetMachine &TM); + virtual ~MachOWriter(); + + virtual const char *getPassName() const { + return "Mach-O Writer"; + } + + typedef std::vector<uint8_t> DataBuffer; + protected: + /// Output stream to send the resultant object file to. + /// + raw_ostream &O; + + /// Target machine description. + /// + TargetMachine &TM; + + /// Mang - The object used to perform name mangling for this module. + /// + Mangler *Mang; + + /// MCE - The MachineCodeEmitter object that we are exposing to emit machine + /// code for functions to the .o file. + MachOCodeEmitter *MCE; + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating what header values and flags to set. + bool is64Bit, isLittleEndian; + + /// doInitialization - Emit the file header and all of the global variables + /// for the module to the Mach-O file. + bool doInitialization(Module &M); + + bool runOnMachineFunction(MachineFunction &MF); + + /// doFinalization - Now that the module has been completely processed, emit + /// the Mach-O file to 'O'. + bool doFinalization(Module &M); + + /// MachOHeader - This struct contains the header information about a + /// specific architecture type/subtype pair that is emitted to the file. + struct MachOHeader { + uint32_t magic; // mach magic number identifier + uint32_t filetype; // type of file + uint32_t ncmds; // number of load commands + uint32_t sizeofcmds; // the size of all the load commands + uint32_t flags; // flags + uint32_t reserved; // 64-bit only + + /// HeaderData - The actual data for the header which we are building + /// up for emission to the file. + DataBuffer HeaderData; + + // Constants for the filetype field + // see <mach-o/loader.h> for additional info on the various types + enum { MH_OBJECT = 1, // relocatable object file + MH_EXECUTE = 2, // demand paged executable file + MH_FVMLIB = 3, // fixed VM shared library file + MH_CORE = 4, // core file + MH_PRELOAD = 5, // preloaded executable file + MH_DYLIB = 6, // dynamically bound shared library + MH_DYLINKER = 7, // dynamic link editor + MH_BUNDLE = 8, // dynamically bound bundle file + MH_DYLIB_STUB = 9, // shared library stub for static linking only + MH_DSYM = 10 // companion file wiht only debug sections + }; + + // Constants for the flags field + enum { MH_NOUNDEFS = 1 << 0, + // the object file has no undefined references + MH_INCRLINK = 1 << 1, + // the object file is the output of an incremental link against + // a base file and cannot be link edited again + MH_DYLDLINK = 1 << 2, + // the object file is input for the dynamic linker and cannot be + // statically link edited again. + MH_BINDATLOAD = 1 << 3, + // the object file's undefined references are bound by the + // dynamic linker when loaded. + MH_PREBOUND = 1 << 4, + // the file has its dynamic undefined references prebound + MH_SPLIT_SEGS = 1 << 5, + // the file has its read-only and read-write segments split + // see <mach/shared_memory_server.h> + MH_LAZY_INIT = 1 << 6, + // the shared library init routine is to be run lazily via + // catching memory faults to its writable segments (obsolete) + MH_TWOLEVEL = 1 << 7, + // the image is using two-level namespace bindings + MH_FORCE_FLAT = 1 << 8, + // the executable is forcing all images to use flat namespace + // bindings. + MH_NOMULTIDEFS = 1 << 8, + // this umbrella guarantees no multiple definitions of symbols + // in its sub-images so the two-level namespace hints can + // always be used. + MH_NOFIXPREBINDING = 1 << 10, + // do not have dyld notify the prebidning agent about this + // executable. + MH_PREBINDABLE = 1 << 11, + // the binary is not prebound but can have its prebinding + // redone. only used when MH_PREBOUND is not set. + MH_ALLMODSBOUND = 1 << 12, + // indicates that this binary binds to all two-level namespace + // modules of its dependent libraries. Only used when + // MH_PREBINDABLE and MH_TWOLEVEL are both set. + MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13, + // safe to divide up the sections into sub-sections via symbols + // for dead code stripping. + MH_CANONICAL = 1 << 14, + // the binary has been canonicalized via the unprebind operation + MH_WEAK_DEFINES = 1 << 15, + // the final linked image contains external weak symbols + MH_BINDS_TO_WEAK = 1 << 16, + // the final linked image uses weak symbols + MH_ALLOW_STACK_EXECUTION = 1 << 17 + // When this bit is set, all stacks in the task will be given + // stack execution privilege. Only used in MH_EXECUTE filetype + }; + + MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0), + reserved(0) { } + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 8 * sizeof(uint32_t); + else + return 7 * sizeof(uint32_t); + } + + /// setMagic - This routine sets the appropriate value for the 'magic' + /// field based on pointer size and endianness. + void setMagic(bool isLittleEndian, bool is64Bit) { + if (isLittleEndian) + if (is64Bit) magic = 0xcffaedfe; + else magic = 0xcefaedfe; + else + if (is64Bit) magic = 0xfeedfacf; + else magic = 0xfeedface; + } + }; + + /// Header - An instance of MachOHeader that we will update while we build + /// the file, and then emit during finalization. + MachOHeader Header; + + /// MachOSegment - This struct contains the necessary information to + /// emit the load commands for each section in the file. + struct MachOSegment { + uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64 + uint32_t cmdsize; // Total size of this struct and section commands + std::string segname; // segment name + uint64_t vmaddr; // address of this segment + uint64_t vmsize; // size of this segment, may be larger than filesize + uint64_t fileoff; // offset in file + uint64_t filesize; // amount to read from file + uint32_t maxprot; // maximum VM protection + uint32_t initprot; // initial VM protection + uint32_t nsects; // number of sections in this segment + uint32_t flags; // flags + + // The following constants are getting pulled in by one of the + // system headers, which creates a neat clash with the enum. +#if !defined(VM_PROT_NONE) +#define VM_PROT_NONE 0x00 +#endif +#if !defined(VM_PROT_READ) +#define VM_PROT_READ 0x01 +#endif +#if !defined(VM_PROT_WRITE) +#define VM_PROT_WRITE 0x02 +#endif +#if !defined(VM_PROT_EXECUTE) +#define VM_PROT_EXECUTE 0x04 +#endif +#if !defined(VM_PROT_ALL) +#define VM_PROT_ALL 0x07 +#endif + + // Constants for the vm protection fields + // see <mach-o/vm_prot.h> + enum { SEG_VM_PROT_NONE = VM_PROT_NONE, + SEG_VM_PROT_READ = VM_PROT_READ, // read permission + SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission + SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE, + SEG_VM_PROT_ALL = VM_PROT_ALL + }; + + // Constants for the cmd field + // see <mach-o/loader.h> + enum { LC_SEGMENT = 0x01, // segment of this file to be mapped + LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped + }; + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16; + else + return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits + } + + MachOSegment(const std::string &seg, bool is64Bit) + : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg), + vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL), + initprot(VM_PROT_ALL), nsects(0), flags(0) { } + }; + + /// MachOSection - This struct contains information about each section in a + /// particular segment that is emitted to the file. This is eventually + /// turned into the SectionCommand in the load command for a particlar + /// segment. + struct MachOSection { + std::string sectname; // name of this section, + std::string segname; // segment this section goes in + uint64_t addr; // memory address of this section + uint64_t size; // size in bytes of this section + uint32_t offset; // file offset of this section + uint32_t align; // section alignment (power of 2) + uint32_t reloff; // file offset of relocation entries + uint32_t nreloc; // number of relocation entries + uint32_t flags; // flags (section type and attributes) + uint32_t reserved1; // reserved (for offset or index) + uint32_t reserved2; // reserved (for count or sizeof) + uint32_t reserved3; // reserved (64 bit only) + + /// A unique number for this section, which will be used to match symbols + /// to the correct section. + uint32_t Index; + + /// SectionData - The actual data for this section which we are building + /// up for emission to the file. + DataBuffer SectionData; + + /// RelocBuffer - A buffer to hold the mach-o relocations before we write + /// them out at the appropriate location in the file. + DataBuffer RelocBuffer; + + /// Relocations - The relocations that we have encountered so far in this + /// section that we will need to convert to MachORelocation entries when + /// the file is written. + std::vector<MachineRelocation> Relocations; + + // Constants for the section types (low 8 bits of flags field) + // see <mach-o/loader.h> + enum { S_REGULAR = 0, + // regular section + S_ZEROFILL = 1, + // zero fill on demand section + S_CSTRING_LITERALS = 2, + // section with only literal C strings + S_4BYTE_LITERALS = 3, + // section with only 4 byte literals + S_8BYTE_LITERALS = 4, + // section with only 8 byte literals + S_LITERAL_POINTERS = 5, + // section with only pointers to literals + S_NON_LAZY_SYMBOL_POINTERS = 6, + // section with only non-lazy symbol pointers + S_LAZY_SYMBOL_POINTERS = 7, + // section with only lazy symbol pointers + S_SYMBOL_STUBS = 8, + // section with only symbol stubs + // byte size of stub in the reserved2 field + S_MOD_INIT_FUNC_POINTERS = 9, + // section with only function pointers for initialization + S_MOD_TERM_FUNC_POINTERS = 10, + // section with only function pointers for termination + S_COALESCED = 11, + // section contains symbols that are coalesced + S_GB_ZEROFILL = 12, + // zero fill on demand section (that can be larger than 4GB) + S_INTERPOSING = 13, + // section with only pairs of function pointers for interposing + S_16BYTE_LITERALS = 14 + // section with only 16 byte literals + }; + + // Constants for the section flags (high 24 bits of flags field) + // see <mach-o/loader.h> + enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31, + // section contains only true machine instructions + S_ATTR_NO_TOC = 1 << 30, + // section contains coalesced symbols that are not to be in a + // ranlib table of contents + S_ATTR_STRIP_STATIC_SYMS = 1 << 29, + // ok to strip static symbols in this section in files with the + // MY_DYLDLINK flag + S_ATTR_NO_DEAD_STRIP = 1 << 28, + // no dead stripping + S_ATTR_LIVE_SUPPORT = 1 << 27, + // blocks are live if they reference live blocks + S_ATTR_SELF_MODIFYING_CODE = 1 << 26, + // used with i386 code stubs written on by dyld + S_ATTR_DEBUG = 1 << 25, + // a debug section + S_ATTR_SOME_INSTRUCTIONS = 1 << 10, + // section contains some machine instructions + S_ATTR_EXT_RELOC = 1 << 9, + // section has external relocation entries + S_ATTR_LOC_RELOC = 1 << 8 + // section has local relocation entries + }; + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32; + else + return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits + } + + MachOSection(const std::string &seg, const std::string §) + : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2), + reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0), + reserved3(0) { } + }; + + private: + + /// SectionList - This is the list of sections that we have emitted to the + /// file. Once the file has been completely built, the segment load command + /// SectionCommands are constructed from this info. + std::vector<MachOSection*> SectionList; + + /// SectionLookup - This is a mapping from section name to SectionList entry + std::map<std::string, MachOSection*> SectionLookup; + + /// GVSection - This is a mapping from a GlobalValue to a MachOSection, + /// to aid in emitting relocations. + std::map<GlobalValue*, MachOSection*> GVSection; + + /// GVOffset - This is a mapping from a GlobalValue to an offset from the + /// start of the section in which the GV resides, to aid in emitting + /// relocations. + std::map<GlobalValue*, intptr_t> GVOffset; + + /// getSection - Return the section with the specified name, creating a new + /// section if one does not already exist. + MachOSection *getSection(const std::string &seg, const std::string §, + unsigned Flags = 0) { + MachOSection *MOS = SectionLookup[seg+sect]; + if (MOS) return MOS; + + MOS = new MachOSection(seg, sect); + SectionList.push_back(MOS); + MOS->Index = SectionList.size(); + MOS->flags = MachOSection::S_REGULAR | Flags; + SectionLookup[seg+sect] = MOS; + return MOS; + } + MachOSection *getTextSection(bool isCode = true) { + if (isCode) + return getSection("__TEXT", "__text", + MachOSection::S_ATTR_PURE_INSTRUCTIONS | + MachOSection::S_ATTR_SOME_INSTRUCTIONS); + else + return getSection("__TEXT", "__text"); + } + MachOSection *getBSSSection() { + return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL); + } + MachOSection *getDataSection() { + return getSection("__DATA", "__data"); + } + MachOSection *getConstSection(Constant *C) { + const ConstantArray *CVA = dyn_cast<ConstantArray>(C); + if (CVA && CVA->isCString()) + return getSection("__TEXT", "__cstring", + MachOSection::S_CSTRING_LITERALS); + + const Type *Ty = C->getType(); + if (Ty->isPrimitiveType() || Ty->isInteger()) { + unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); + switch(Size) { + default: break; // Fall through to __TEXT,__const + case 4: + return getSection("__TEXT", "__literal4", + MachOSection::S_4BYTE_LITERALS); + case 8: + return getSection("__TEXT", "__literal8", + MachOSection::S_8BYTE_LITERALS); + case 16: + return getSection("__TEXT", "__literal16", + MachOSection::S_16BYTE_LITERALS); + } + } + return getSection("__TEXT", "__const"); + } + MachOSection *getJumpTableSection() { + if (TM.getRelocationModel() == Reloc::PIC_) + return getTextSection(false); + else + return getSection("__TEXT", "__const"); + } + + /// MachOSymTab - This struct contains information about the offsets and + /// size of symbol table information. + /// segment. + struct MachOSymTab { + uint32_t cmd; // LC_SYMTAB + uint32_t cmdsize; // sizeof( MachOSymTab ) + uint32_t symoff; // symbol table offset + uint32_t nsyms; // number of symbol table entries + uint32_t stroff; // string table offset + uint32_t strsize; // string table size in bytes + + // Constants for the cmd field + // see <mach-o/loader.h> + enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info + }; + + MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0), + nsyms(0), stroff(0), strsize(0) { } + }; + + /// MachOSymTab - This struct contains information about the offsets and + /// size of symbol table information. + /// segment. + struct MachODySymTab { + uint32_t cmd; // LC_DYSYMTAB + uint32_t cmdsize; // sizeof( MachODySymTab ) + uint32_t ilocalsym; // index to local symbols + uint32_t nlocalsym; // number of local symbols + uint32_t iextdefsym; // index to externally defined symbols + uint32_t nextdefsym; // number of externally defined symbols + uint32_t iundefsym; // index to undefined symbols + uint32_t nundefsym; // number of undefined symbols + uint32_t tocoff; // file offset to table of contents + uint32_t ntoc; // number of entries in table of contents + uint32_t modtaboff; // file offset to module table + uint32_t nmodtab; // number of module table entries + uint32_t extrefsymoff; // offset to referenced symbol table + uint32_t nextrefsyms; // number of referenced symbol table entries + uint32_t indirectsymoff; // file offset to the indirect symbol table + uint32_t nindirectsyms; // number of indirect symbol table entries + uint32_t extreloff; // offset to external relocation entries + uint32_t nextrel; // number of external relocation entries + uint32_t locreloff; // offset to local relocation entries + uint32_t nlocrel; // number of local relocation entries + + // Constants for the cmd field + // see <mach-o/loader.h> + enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info + }; + + MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)), + ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0), + iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0), + nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0), + nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { } + }; + + /// SymTab - The "stab" style symbol table information + MachOSymTab SymTab; + /// DySymTab - symbol table info for the dynamic link editor + MachODySymTab DySymTab; + + struct MachOSymCmp { + // FIXME: this does not appear to be sorting 'f' after 'F' + bool operator()(const MachOSym &LHS, const MachOSym &RHS) { + return LHS.GVName < RHS.GVName; + } + }; + + /// PartitionByLocal - Simple boolean predicate that returns true if Sym is + /// a local symbol rather than an external symbol. + static bool PartitionByLocal(const MachOSym &Sym); + + /// PartitionByDefined - Simple boolean predicate that returns true if Sym + /// is defined in this module. + static bool PartitionByDefined(const MachOSym &Sym); + + protected: + + /// SymbolTable - This is the list of symbols we have emitted to the file. + /// This actually gets rearranged before emission to the file (to put the + /// local symbols first in the list). + std::vector<MachOSym> SymbolTable; + + /// SymT - A buffer to hold the symbol table before we write it out at the + /// appropriate location in the file. + DataBuffer SymT; + + /// StrT - A buffer to hold the string table before we write it out at the + /// appropriate location in the file. + DataBuffer StrT; + + /// PendingSyms - This is a list of externally defined symbols that we have + /// been asked to emit, but have not seen a reference to. When a reference + /// is seen, the symbol will move from this list to the SymbolTable. + std::vector<GlobalValue*> PendingGlobals; + + /// DynamicSymbolTable - This is just a vector of indices into + /// SymbolTable to aid in emitting the DYSYMTAB load command. + std::vector<unsigned> DynamicSymbolTable; + + static void InitMem(const Constant *C, void *Addr, intptr_t Offset, + const TargetData *TD, + std::vector<MachineRelocation> &MRs); + + private: + void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV); + void EmitGlobal(GlobalVariable *GV); + void EmitHeaderAndLoadCommands(); + void EmitSections(); + void BufferSymbolAndStringTable(); + void CalculateRelocations(MachOSection &MOS); + + MachineRelocation GetJTRelocation(unsigned Offset, + MachineBasicBlock *MBB) const { + return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB); + } + + /// GetTargetRelocation - Returns the number of relocations. + unsigned GetTargetRelocation(MachineRelocation &MR, + unsigned FromIdx, + unsigned ToAddr, + unsigned ToIndex, + OutputBuffer &RelocOut, + OutputBuffer &SecOut, + bool Scattered, + bool Extern) { + return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr, + ToIndex, RelocOut, + SecOut, Scattered, + Extern); + } + }; +} + +#endif diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp new file mode 100644 index 0000000..71e6b3e --- /dev/null +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -0,0 +1,372 @@ +//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect the sequence of machine instructions for a basic block. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrDesc.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/LeakDetector.h" +#include <algorithm> +using namespace llvm; + +MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) + : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false) { + Insts.Parent = this; +} + +MachineBasicBlock::~MachineBasicBlock() { + LeakDetector::removeGarbageObject(this); +} + +std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) { + MBB.print(OS); + return OS; +} + +/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the +/// parent pointer of the MBB, the MBB numbering, and any instructions in the +/// MBB to be on the right operand list for registers. +/// +/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it +/// gets the next available unique MBB number. If it is removed from a +/// MachineFunction, it goes back to being #-1. +void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) { + MachineFunction &MF = *N->getParent(); + N->Number = MF.addToMBBNumbering(N); + + // Make sure the instructions have their operands in the reginfo lists. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I) + I->AddRegOperandsToUseLists(RegInfo); + + LeakDetector::removeGarbageObject(N); +} + +void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) { + N->getParent()->removeFromMBBNumbering(N->Number); + N->Number = -1; + LeakDetector::addGarbageObject(N); +} + + +/// addNodeToList (MI) - When we add an instruction to a basic block +/// list, we update its parent pointer and add its operands from reg use/def +/// lists if appropriate. +void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) { + assert(N->getParent() == 0 && "machine instruction already in a basic block"); + N->setParent(Parent); + + // Add the instruction's register operands to their corresponding + // use/def lists. + MachineFunction *MF = Parent->getParent(); + N->AddRegOperandsToUseLists(MF->getRegInfo()); + + LeakDetector::removeGarbageObject(N); +} + +/// removeNodeFromList (MI) - When we remove an instruction from a basic block +/// list, we update its parent pointer and remove its operands from reg use/def +/// lists if appropriate. +void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) { + assert(N->getParent() != 0 && "machine instruction not in a basic block"); + + // Remove from the use/def lists. + N->RemoveRegOperandsFromUseLists(); + + N->setParent(0); + + LeakDetector::addGarbageObject(N); +} + +/// transferNodesFromList (MI) - When moving a range of instructions from one +/// MBB list to another, we need to update the parent pointers and the use/def +/// lists. +void ilist_traits<MachineInstr>::transferNodesFromList( + ilist_traits<MachineInstr>& fromList, + MachineBasicBlock::iterator first, + MachineBasicBlock::iterator last) { + assert(Parent->getParent() == fromList.Parent->getParent() && + "MachineInstr parent mismatch!"); + + // Splice within the same MBB -> no change. + if (Parent == fromList.Parent) return; + + // If splicing between two blocks within the same function, just update the + // parent pointers. + for (; first != last; ++first) + first->setParent(Parent); +} + +void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) { + assert(!MI->getParent() && "MI is still in a block!"); + Parent->getParent()->DeleteMachineInstr(MI); +} + +MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { + iterator I = end(); + while (I != begin() && (--I)->getDesc().isTerminator()) + ; /*noop */ + if (I != end() && !I->getDesc().isTerminator()) ++I; + return I; +} + +bool +MachineBasicBlock::isOnlyReachableByFallthrough() const { + return !isLandingPad() && + !pred_empty() && + next(pred_begin()) == pred_end() && + (*pred_begin())->isLayoutSuccessor(this) && + ((*pred_begin())->empty() || + !(*pred_begin())->back().getDesc().isBarrier()); +} + +void MachineBasicBlock::dump() const { + print(*cerr.stream()); +} + +static inline void OutputReg(std::ostream &os, unsigned RegNo, + const TargetRegisterInfo *TRI = 0) { + if (!RegNo || TargetRegisterInfo::isPhysicalRegister(RegNo)) { + if (TRI) + os << " %" << TRI->get(RegNo).Name; + else + os << " %mreg(" << RegNo << ")"; + } else + os << " %reg" << RegNo; +} + +void MachineBasicBlock::print(std::ostream &OS) const { + const MachineFunction *MF = getParent(); + if(!MF) { + OS << "Can't print out MachineBasicBlock because parent MachineFunction" + << " is null\n"; + return; + } + + const BasicBlock *LBB = getBasicBlock(); + OS << "\n"; + if (LBB) OS << LBB->getName() << ": "; + OS << (const void*)this + << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber(); + if (Alignment) OS << ", Alignment " << Alignment; + if (isLandingPad()) OS << ", EH LANDING PAD"; + OS << ":\n"; + + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + if (!livein_empty()) { + OS << "Live Ins:"; + for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) + OutputReg(OS, *I, TRI); + OS << "\n"; + } + // Print the preds of this block according to the CFG. + if (!pred_empty()) { + OS << " Predecessors according to CFG:"; + for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) + OS << " " << *PI << " (#" << (*PI)->getNumber() << ")"; + OS << "\n"; + } + + for (const_iterator I = begin(); I != end(); ++I) { + OS << "\t"; + I->print(OS, &getParent()->getTarget()); + } + + // Print the successors of this block according to the CFG. + if (!succ_empty()) { + OS << " Successors according to CFG:"; + for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) + OS << " " << *SI << " (#" << (*SI)->getNumber() << ")"; + OS << "\n"; + } +} + +void MachineBasicBlock::removeLiveIn(unsigned Reg) { + livein_iterator I = std::find(livein_begin(), livein_end(), Reg); + assert(I != livein_end() && "Not a live in!"); + LiveIns.erase(I); +} + +bool MachineBasicBlock::isLiveIn(unsigned Reg) const { + const_livein_iterator I = std::find(livein_begin(), livein_end(), Reg); + return I != livein_end(); +} + +void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) { + getParent()->splice(NewAfter, this); +} + +void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { + MachineFunction::iterator BBI = NewBefore; + getParent()->splice(++BBI, this); +} + + +void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) { + Successors.push_back(succ); + succ->addPredecessor(this); +} + +void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) { + succ->removePredecessor(this); + succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); + assert(I != Successors.end() && "Not a current successor!"); + Successors.erase(I); +} + +MachineBasicBlock::succ_iterator +MachineBasicBlock::removeSuccessor(succ_iterator I) { + assert(I != Successors.end() && "Not a current successor!"); + (*I)->removePredecessor(this); + return Successors.erase(I); +} + +void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { + Predecessors.push_back(pred); +} + +void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { + std::vector<MachineBasicBlock *>::iterator I = + std::find(Predecessors.begin(), Predecessors.end(), pred); + assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); + Predecessors.erase(I); +} + +void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) +{ + if (this == fromMBB) + return; + + for(MachineBasicBlock::succ_iterator iter = fromMBB->succ_begin(), + end = fromMBB->succ_end(); iter != end; ++iter) { + addSuccessor(*iter); + } + while(!fromMBB->succ_empty()) + fromMBB->removeSuccessor(fromMBB->succ_begin()); +} + +bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { + std::vector<MachineBasicBlock *>::const_iterator I = + std::find(Successors.begin(), Successors.end(), MBB); + return I != Successors.end(); +} + +bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { + MachineFunction::const_iterator I(this); + return next(I) == MachineFunction::const_iterator(MBB); +} + +/// removeFromParent - This method unlinks 'this' from the containing function, +/// and returns it, but does not delete it. +MachineBasicBlock *MachineBasicBlock::removeFromParent() { + assert(getParent() && "Not embedded in a function!"); + getParent()->remove(this); + return this; +} + + +/// eraseFromParent - This method unlinks 'this' from the containing function, +/// and deletes it. +void MachineBasicBlock::eraseFromParent() { + assert(getParent() && "Not embedded in a function!"); + getParent()->erase(this); +} + + +/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to +/// 'Old', change the code and CFG so that it branches to 'New' instead. +void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Cannot replace self with self!"); + + MachineBasicBlock::iterator I = end(); + while (I != begin()) { + --I; + if (!I->getDesc().isTerminator()) break; + + // Scan the operands of this machine instruction, replacing any uses of Old + // with New. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i).isMBB() && + I->getOperand(i).getMBB() == Old) + I->getOperand(i).setMBB(New); + } + + // Update the successor information. + removeSuccessor(Old); + addSuccessor(New); +} + +/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the +/// CFG to be inserted. If we have proven that MBB can only branch to DestA and +/// DestB, remove any other MBB successors from the CFG. DestA and DestB can +/// be null. +/// Besides DestA and DestB, retain other edges leading to LandingPads +/// (currently there can be only one; we don't check or require that here). +/// Note it is possible that DestA and/or DestB are LandingPads. +bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, + MachineBasicBlock *DestB, + bool isCond) { + bool MadeChange = false; + bool AddedFallThrough = false; + + MachineFunction::iterator FallThru = next(MachineFunction::iterator(this)); + + // If this block ends with a conditional branch that falls through to its + // successor, set DestB as the successor. + if (isCond) { + if (DestB == 0 && FallThru != getParent()->end()) { + DestB = FallThru; + AddedFallThrough = true; + } + } else { + // If this is an unconditional branch with no explicit dest, it must just be + // a fallthrough into DestB. + if (DestA == 0 && FallThru != getParent()->end()) { + DestA = FallThru; + AddedFallThrough = true; + } + } + + MachineBasicBlock::succ_iterator SI = succ_begin(); + MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; + while (SI != succ_end()) { + if (*SI == DestA && DestA == DestB) { + DestA = DestB = 0; + ++SI; + } else if (*SI == DestA) { + DestA = 0; + ++SI; + } else if (*SI == DestB) { + DestB = 0; + ++SI; + } else if ((*SI)->isLandingPad() && + *SI!=OrigDestA && *SI!=OrigDestB) { + ++SI; + } else { + // Otherwise, this is a superfluous edge, remove it. + SI = removeSuccessor(SI); + MadeChange = true; + } + } + if (!AddedFallThrough) { + assert(DestA == 0 && DestB == 0 && + "MachineCFG is missing edges!"); + } else if (isCond) { + assert(DestA == 0 && "MachineCFG is missing edges!"); + } + return MadeChange; +} diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp new file mode 100644 index 0000000..37c8601 --- /dev/null +++ b/lib/CodeGen/MachineDominators.cpp @@ -0,0 +1,53 @@ +//===- MachineDominators.cpp - Machine Dominator Calculation --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements simple dominator construction algorithms for finding +// forward dominators on machine functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>); +TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>); + +char MachineDominatorTree::ID = 0; + +static RegisterPass<MachineDominatorTree> +E("machinedomtree", "MachineDominator Tree Construction", true); + +const PassInfo *const llvm::MachineDominatorsID = &E; + +void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { + DT->recalculate(F); + + return false; +} + +MachineDominatorTree::MachineDominatorTree() + : MachineFunctionPass(&ID) { + DT = new DominatorTreeBase<MachineBasicBlock>(false); +} + +MachineDominatorTree::~MachineDominatorTree() { + DT->releaseMemory(); + delete DT; +} + +void MachineDominatorTree::releaseMemory() { + DT->releaseMemory(); +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp new file mode 100644 index 0000000..cacfed1 --- /dev/null +++ b/lib/CodeGen/MachineFunction.cpp @@ -0,0 +1,598 @@ +//===-- MachineFunction.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect native machine code information for a function. This allows +// target-specific information about the generated code to be stored with each +// function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/config.h" +#include <fstream> +#include <sstream> +using namespace llvm; + +bool MachineFunctionPass::runOnFunction(Function &F) { + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (F.hasAvailableExternallyLinkage()) + return false; + + return runOnMachineFunction(MachineFunction::get(&F)); +} + +namespace { + struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass { + static char ID; + + std::ostream *OS; + const std::string Banner; + + Printer (std::ostream *os, const std::string &banner) + : MachineFunctionPass(&ID), OS(os), Banner(banner) {} + + const char *getPassName() const { return "MachineFunction Printer"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnMachineFunction(MachineFunction &MF) { + (*OS) << Banner; + MF.print (*OS); + return false; + } + }; + char Printer::ID = 0; +} + +/// Returns a newly-created MachineFunction Printer pass. The default output +/// stream is std::cerr; the default banner is empty. +/// +FunctionPass *llvm::createMachineFunctionPrinterPass(std::ostream *OS, + const std::string &Banner){ + return new Printer(OS, Banner); +} + +namespace { + struct VISIBILITY_HIDDEN Deleter : public MachineFunctionPass { + static char ID; + Deleter() : MachineFunctionPass(&ID) {} + + const char *getPassName() const { return "Machine Code Deleter"; } + + bool runOnMachineFunction(MachineFunction &MF) { + // Delete the annotation from the function now. + MachineFunction::destruct(MF.getFunction()); + return true; + } + }; + char Deleter::ID = 0; +} + +/// MachineCodeDeletion Pass - This pass deletes all of the machine code for +/// the current function, which should happen after the function has been +/// emitted to a .s file or to memory. +FunctionPass *llvm::createMachineCodeDeleter() { + return new Deleter(); +} + + + +//===---------------------------------------------------------------------===// +// MachineFunction implementation +//===---------------------------------------------------------------------===// + +void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { + MBB->getParent()->DeleteMachineBasicBlock(MBB); +} + +MachineFunction::MachineFunction(const Function *F, + const TargetMachine &TM) + : Annotation(AnnotationManager::getID("CodeGen::MachineCodeForFunction")), + Fn(F), Target(TM) { + if (TM.getRegisterInfo()) + RegInfo = new (Allocator.Allocate<MachineRegisterInfo>()) + MachineRegisterInfo(*TM.getRegisterInfo()); + else + RegInfo = 0; + MFInfo = 0; + FrameInfo = new (Allocator.Allocate<MachineFrameInfo>()) + MachineFrameInfo(*TM.getFrameInfo()); + ConstantPool = new (Allocator.Allocate<MachineConstantPool>()) + MachineConstantPool(TM.getTargetData()); + + // Set up jump table. + const TargetData &TD = *TM.getTargetData(); + bool IsPic = TM.getRelocationModel() == Reloc::PIC_; + unsigned EntrySize = IsPic ? 4 : TD.getPointerSize(); + unsigned Alignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty) + : TD.getPointerABIAlignment(); + JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>()) + MachineJumpTableInfo(EntrySize, Alignment); +} + +MachineFunction::~MachineFunction() { + BasicBlocks.clear(); + InstructionRecycler.clear(Allocator); + BasicBlockRecycler.clear(Allocator); + if (RegInfo) + RegInfo->~MachineRegisterInfo(); Allocator.Deallocate(RegInfo); + if (MFInfo) { + MFInfo->~MachineFunctionInfo(); Allocator.Deallocate(MFInfo); + } + FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo); + ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool); + JumpTableInfo->~MachineJumpTableInfo(); Allocator.Deallocate(JumpTableInfo); +} + + +/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and +/// recomputes them. This guarantees that the MBB numbers are sequential, +/// dense, and match the ordering of the blocks within the function. If a +/// specific MachineBasicBlock is specified, only that block and those after +/// it are renumbered. +void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { + if (empty()) { MBBNumbering.clear(); return; } + MachineFunction::iterator MBBI, E = end(); + if (MBB == 0) + MBBI = begin(); + else + MBBI = MBB; + + // Figure out the block number this should have. + unsigned BlockNo = 0; + if (MBBI != begin()) + BlockNo = prior(MBBI)->getNumber()+1; + + for (; MBBI != E; ++MBBI, ++BlockNo) { + if (MBBI->getNumber() != (int)BlockNo) { + // Remove use of the old number. + if (MBBI->getNumber() != -1) { + assert(MBBNumbering[MBBI->getNumber()] == &*MBBI && + "MBB number mismatch!"); + MBBNumbering[MBBI->getNumber()] = 0; + } + + // If BlockNo is already taken, set that block's number to -1. + if (MBBNumbering[BlockNo]) + MBBNumbering[BlockNo]->setNumber(-1); + + MBBNumbering[BlockNo] = MBBI; + MBBI->setNumber(BlockNo); + } + } + + // Okay, all the blocks are renumbered. If we have compactified the block + // numbering, shrink MBBNumbering now. + assert(BlockNo <= MBBNumbering.size() && "Mismatch!"); + MBBNumbering.resize(BlockNo); +} + +/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead +/// of `new MachineInstr'. +/// +MachineInstr * +MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID, + DebugLoc DL, bool NoImp) { + return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) + MachineInstr(TID, DL, NoImp); +} + +/// CloneMachineInstr - Create a new MachineInstr which is a copy of the +/// 'Orig' instruction, identical in all ways except the the instruction +/// has no parent, prev, or next. +/// +MachineInstr * +MachineFunction::CloneMachineInstr(const MachineInstr *Orig) { + return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) + MachineInstr(*this, *Orig); +} + +/// DeleteMachineInstr - Delete the given MachineInstr. +/// +void +MachineFunction::DeleteMachineInstr(MachineInstr *MI) { + // Clear the instructions memoperands. This must be done manually because + // the instruction's parent pointer is now null, so it can't properly + // deallocate them on its own. + MI->clearMemOperands(*this); + + MI->~MachineInstr(); + InstructionRecycler.Deallocate(Allocator, MI); +} + +/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this +/// instead of `new MachineBasicBlock'. +/// +MachineBasicBlock * +MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) { + return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator)) + MachineBasicBlock(*this, bb); +} + +/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock. +/// +void +MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { + assert(MBB->getParent() == this && "MBB parent mismatch!"); + MBB->~MachineBasicBlock(); + BasicBlockRecycler.Deallocate(Allocator, MBB); +} + +void MachineFunction::dump() const { + print(*cerr.stream()); +} + +void MachineFunction::print(std::ostream &OS) const { + OS << "# Machine code for " << Fn->getName () << "():\n"; + + // Print Frame Information + FrameInfo->print(*this, OS); + + // Print JumpTable Information + JumpTableInfo->print(OS); + + // Print Constant Pool + { + raw_os_ostream OSS(OS); + ConstantPool->print(OSS); + } + + const TargetRegisterInfo *TRI = getTarget().getRegisterInfo(); + + if (RegInfo && !RegInfo->livein_empty()) { + OS << "Live Ins:"; + for (MachineRegisterInfo::livein_iterator + I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) { + if (TRI) + OS << " " << TRI->getName(I->first); + else + OS << " Reg #" << I->first; + + if (I->second) + OS << " in VR#" << I->second << " "; + } + OS << "\n"; + } + if (RegInfo && !RegInfo->liveout_empty()) { + OS << "Live Outs:"; + for (MachineRegisterInfo::liveout_iterator + I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I) + if (TRI) + OS << " " << TRI->getName(*I); + else + OS << " Reg #" << *I; + OS << "\n"; + } + + for (const_iterator BB = begin(); BB != end(); ++BB) + BB->print(OS); + + OS << "\n# End machine code for " << Fn->getName () << "().\n\n"; +} + +/// CFGOnly flag - This is used to control whether or not the CFG graph printer +/// prints out the contents of basic blocks or not. This is acceptable because +/// this code is only really used for debugging purposes. +/// +static bool CFGOnly = false; + +namespace llvm { + template<> + struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits { + static std::string getGraphName(const MachineFunction *F) { + return "CFG for '" + F->getFunction()->getName() + "' function"; + } + + static std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineFunction *Graph) { + if (CFGOnly && Node->getBasicBlock() && + !Node->getBasicBlock()->getName().empty()) + return Node->getBasicBlock()->getName() + ":"; + + std::ostringstream Out; + if (CFGOnly) { + Out << Node->getNumber() << ':'; + return Out.str(); + } + + Node->print(Out); + + std::string OutStr = Out.str(); + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + } + return OutStr; + } + }; +} + +void MachineFunction::viewCFG() const +{ +#ifndef NDEBUG + ViewGraph(this, "mf" + getFunction()->getName()); +#else + cerr << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +void MachineFunction::viewCFGOnly() const +{ + CFGOnly = true; + viewCFG(); + CFGOnly = false; +} + +// The next two methods are used to construct and to retrieve +// the MachineCodeForFunction object for the given function. +// construct() -- Allocates and initializes for a given function and target +// get() -- Returns a handle to the object. +// This should not be called before "construct()" +// for a given Function. +// +MachineFunction& +MachineFunction::construct(const Function *Fn, const TargetMachine &Tar) +{ + AnnotationID MF_AID = + AnnotationManager::getID("CodeGen::MachineCodeForFunction"); + assert(Fn->getAnnotation(MF_AID) == 0 && + "Object already exists for this function!"); + MachineFunction* mcInfo = new MachineFunction(Fn, Tar); + Fn->addAnnotation(mcInfo); + return *mcInfo; +} + +void MachineFunction::destruct(const Function *Fn) { + AnnotationID MF_AID = + AnnotationManager::getID("CodeGen::MachineCodeForFunction"); + bool Deleted = Fn->deleteAnnotation(MF_AID); + assert(Deleted && "Machine code did not exist for function!"); + Deleted = Deleted; // silence warning when no assertions. +} + +MachineFunction& MachineFunction::get(const Function *F) +{ + AnnotationID MF_AID = + AnnotationManager::getID("CodeGen::MachineCodeForFunction"); + MachineFunction *mc = (MachineFunction*)F->getAnnotation(MF_AID); + assert(mc && "Call construct() method first to allocate the object"); + return *mc; +} + +/// addLiveIn - Add the specified physical register as a live-in value and +/// create a corresponding virtual register for it. +unsigned MachineFunction::addLiveIn(unsigned PReg, + const TargetRegisterClass *RC) { + assert(RC->contains(PReg) && "Not the correct regclass!"); + unsigned VReg = getRegInfo().createVirtualRegister(RC); + getRegInfo().addLiveIn(PReg, VReg); + return VReg; +} + +/// getOrCreateDebugLocID - Look up the DebugLocTuple index with the given +/// source file, line, and column. If none currently exists, create a new +/// DebugLocTuple, and insert it into the DebugIdMap. +unsigned MachineFunction::getOrCreateDebugLocID(GlobalVariable *CompileUnit, + unsigned Line, unsigned Col) { + DebugLocTuple Tuple(CompileUnit, Line, Col); + DenseMap<DebugLocTuple, unsigned>::iterator II + = DebugLocInfo.DebugIdMap.find(Tuple); + if (II != DebugLocInfo.DebugIdMap.end()) + return II->second; + // Add a new tuple. + unsigned Id = DebugLocInfo.DebugLocations.size(); + DebugLocInfo.DebugLocations.push_back(Tuple); + DebugLocInfo.DebugIdMap[Tuple] = Id; + return Id; +} + +/// getDebugLocTuple - Get the DebugLocTuple for a given DebugLoc object. +DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const { + unsigned Idx = DL.getIndex(); + assert(Idx < DebugLocInfo.DebugLocations.size() && + "Invalid index into debug locations!"); + return DebugLocInfo.DebugLocations[Idx]; +} + +//===----------------------------------------------------------------------===// +// MachineFrameInfo implementation +//===----------------------------------------------------------------------===// + +/// CreateFixedObject - Create a new object at a fixed location on the stack. +/// All fixed objects should be created before other objects are created for +/// efficiency. By default, fixed objects are immutable. This returns an +/// index with a negative value. +/// +int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, + bool Immutable) { + assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); + Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable)); + return -++NumFixedObjects; +} + + +void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{ + const TargetFrameInfo *FI = MF.getTarget().getFrameInfo(); + int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); + + for (unsigned i = 0, e = Objects.size(); i != e; ++i) { + const StackObject &SO = Objects[i]; + OS << " <fi#" << (int)(i-NumFixedObjects) << ">: "; + if (SO.Size == ~0ULL) { + OS << "dead\n"; + continue; + } + if (SO.Size == 0) + OS << "variable sized"; + else + OS << "size is " << SO.Size << " byte" << (SO.Size != 1 ? "s," : ","); + OS << " alignment is " << SO.Alignment << " byte" + << (SO.Alignment != 1 ? "s," : ","); + + if (i < NumFixedObjects) + OS << " fixed"; + if (i < NumFixedObjects || SO.SPOffset != -1) { + int64_t Off = SO.SPOffset - ValOffset; + OS << " at location [SP"; + if (Off > 0) + OS << "+" << Off; + else if (Off < 0) + OS << Off; + OS << "]"; + } + OS << "\n"; + } + + if (HasVarSizedObjects) + OS << " Stack frame contains variable sized objects\n"; +} + +void MachineFrameInfo::dump(const MachineFunction &MF) const { + print(MF, *cerr.stream()); +} + + +//===----------------------------------------------------------------------===// +// MachineJumpTableInfo implementation +//===----------------------------------------------------------------------===// + +/// getJumpTableIndex - Create a new jump table entry in the jump table info +/// or return an existing one. +/// +unsigned MachineJumpTableInfo::getJumpTableIndex( + const std::vector<MachineBasicBlock*> &DestBBs) { + assert(!DestBBs.empty() && "Cannot create an empty jump table!"); + for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) + if (JumpTables[i].MBBs == DestBBs) + return i; + + JumpTables.push_back(MachineJumpTableEntry(DestBBs)); + return JumpTables.size()-1; +} + +/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update +/// the jump tables to branch to New instead. +bool +MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Not making a change?"); + bool MadeChange = false; + for (size_t i = 0, e = JumpTables.size(); i != e; ++i) { + MachineJumpTableEntry &JTE = JumpTables[i]; + for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j) + if (JTE.MBBs[j] == Old) { + JTE.MBBs[j] = New; + MadeChange = true; + } + } + return MadeChange; +} + +void MachineJumpTableInfo::print(std::ostream &OS) const { + // FIXME: this is lame, maybe we could print out the MBB numbers or something + // like {1, 2, 4, 5, 3, 0} + for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { + OS << " <jt#" << i << "> has " << JumpTables[i].MBBs.size() + << " entries\n"; + } +} + +void MachineJumpTableInfo::dump() const { print(*cerr.stream()); } + + +//===----------------------------------------------------------------------===// +// MachineConstantPool implementation +//===----------------------------------------------------------------------===// + +const Type *MachineConstantPoolEntry::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + +MachineConstantPool::~MachineConstantPool() { + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (Constants[i].isMachineConstantPoolEntry()) + delete Constants[i].Val.MachineCPVal; +} + +/// getConstantPoolIndex - Create a new entry in the constant pool or return +/// an existing one. User must specify the log2 of the minimum required +/// alignment for the object. +/// +unsigned MachineConstantPool::getConstantPoolIndex(Constant *C, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (Constants[i].Val.ConstVal == C && + (Constants[i].getAlignment() & (Alignment - 1)) == 0) + return i; + + Constants.push_back(MachineConstantPoolEntry(C, Alignment)); + return Constants.size()-1; +} + +unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + int Idx = V->getExistingMachineCPValue(this, Alignment); + if (Idx != -1) + return (unsigned)Idx; + + Constants.push_back(MachineConstantPoolEntry(V, Alignment)); + return Constants.size()-1; +} + +void MachineConstantPool::print(raw_ostream &OS) const { + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + OS << " <cp#" << i << "> is"; + if (Constants[i].isMachineConstantPoolEntry()) + Constants[i].Val.MachineCPVal->print(OS); + else + OS << *(Value*)Constants[i].Val.ConstVal; + OS << " , alignment=" << Constants[i].getAlignment(); + OS << "\n"; + } +} + +void MachineConstantPool::dump() const { print(errs()); } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp new file mode 100644 index 0000000..b8c8563 --- /dev/null +++ b/lib/CodeGen/MachineInstr.cpp @@ -0,0 +1,1105 @@ +//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Methods common to all machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Constants.h" +#include "llvm/InlineAsm.h" +#include "llvm/Value.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetInstrDesc.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/LeakDetector.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/FoldingSet.h" +#include <ostream> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineOperand Implementation +//===----------------------------------------------------------------------===// + +/// AddRegOperandToRegInfo - Add this register operand to the specified +/// MachineRegisterInfo. If it is null, then the next/prev fields should be +/// explicitly nulled out. +void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) { + assert(isReg() && "Can only add reg operand to use lists"); + + // If the reginfo pointer is null, just explicitly null out or next/prev + // pointers, to ensure they are not garbage. + if (RegInfo == 0) { + Contents.Reg.Prev = 0; + Contents.Reg.Next = 0; + return; + } + + // Otherwise, add this operand to the head of the registers use/def list. + MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg()); + + // For SSA values, we prefer to keep the definition at the start of the list. + // we do this by skipping over the definition if it is at the head of the + // list. + if (*Head && (*Head)->isDef()) + Head = &(*Head)->Contents.Reg.Next; + + Contents.Reg.Next = *Head; + if (Contents.Reg.Next) { + assert(getReg() == Contents.Reg.Next->getReg() && + "Different regs on the same list!"); + Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next; + } + + Contents.Reg.Prev = Head; + *Head = this; +} + +/// RemoveRegOperandFromRegInfo - Remove this register operand from the +/// MachineRegisterInfo it is linked with. +void MachineOperand::RemoveRegOperandFromRegInfo() { + assert(isOnRegUseList() && "Reg operand is not on a use list"); + // Unlink this from the doubly linked list of operands. + MachineOperand *NextOp = Contents.Reg.Next; + *Contents.Reg.Prev = NextOp; + if (NextOp) { + assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!"); + NextOp->Contents.Reg.Prev = Contents.Reg.Prev; + } + Contents.Reg.Prev = 0; + Contents.Reg.Next = 0; +} + +void MachineOperand::setReg(unsigned Reg) { + if (getReg() == Reg) return; // No change. + + // Otherwise, we have to change the register. If this operand is embedded + // into a machine function, we need to update the old and new register's + // use/def lists. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + RemoveRegOperandFromRegInfo(); + Contents.Reg.RegNo = Reg; + AddRegOperandToRegInfo(&MF->getRegInfo()); + return; + } + + // Otherwise, just change the register, no problem. :) + Contents.Reg.RegNo = Reg; +} + +/// ChangeToImmediate - Replace this operand with a new immediate operand of +/// the specified value. If an operand is known to be an immediate already, +/// the setImm method should be used. +void MachineOperand::ChangeToImmediate(int64_t ImmVal) { + // If this operand is currently a register operand, and if this is in a + // function, deregister the operand from the register's use/def list. + if (isReg() && getParent() && getParent()->getParent() && + getParent()->getParent()->getParent()) + RemoveRegOperandFromRegInfo(); + + OpKind = MO_Immediate; + Contents.ImmVal = ImmVal; +} + +/// ChangeToRegister - Replace this operand with a new register operand of +/// the specified value. If an operand is known to be an register already, +/// the setReg method should be used. +void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, + bool isKill, bool isDead) { + // If this operand is already a register operand, use setReg to update the + // register's use/def lists. + if (isReg()) { + assert(!isEarlyClobber()); + setReg(Reg); + } else { + // Otherwise, change this to a register and set the reg#. + OpKind = MO_Register; + Contents.Reg.RegNo = Reg; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + AddRegOperandToRegInfo(&MF->getRegInfo()); + } + + IsDef = isDef; + IsImp = isImp; + IsKill = isKill; + IsDead = isDead; + IsEarlyClobber = false; + SubReg = 0; +} + +/// isIdenticalTo - Return true if this operand is identical to the specified +/// operand. +bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { + if (getType() != Other.getType()) return false; + + switch (getType()) { + default: assert(0 && "Unrecognized operand type"); + case MachineOperand::MO_Register: + return getReg() == Other.getReg() && isDef() == Other.isDef() && + getSubReg() == Other.getSubReg(); + case MachineOperand::MO_Immediate: + return getImm() == Other.getImm(); + case MachineOperand::MO_FPImmediate: + return getFPImm() == Other.getFPImm(); + case MachineOperand::MO_MachineBasicBlock: + return getMBB() == Other.getMBB(); + case MachineOperand::MO_FrameIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_ConstantPoolIndex: + return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); + case MachineOperand::MO_JumpTableIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_GlobalAddress: + return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); + case MachineOperand::MO_ExternalSymbol: + return !strcmp(getSymbolName(), Other.getSymbolName()) && + getOffset() == Other.getOffset(); + } +} + +/// print - Print the specified machine operand. +/// +void MachineOperand::print(std::ostream &OS, const TargetMachine *TM) const { + raw_os_ostream RawOS(OS); + print(RawOS, TM); +} + +void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { + switch (getType()) { + case MachineOperand::MO_Register: + if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) { + OS << "%reg" << getReg(); + } else { + // If the instruction is embedded into a basic block, we can find the + // target info for the instruction. + if (TM == 0) + if (const MachineInstr *MI = getParent()) + if (const MachineBasicBlock *MBB = MI->getParent()) + if (const MachineFunction *MF = MBB->getParent()) + TM = &MF->getTarget(); + + if (TM) + OS << "%" << TM->getRegisterInfo()->get(getReg()).Name; + else + OS << "%mreg" << getReg(); + } + + if (getSubReg() != 0) { + OS << ":" << getSubReg(); + } + + if (isDef() || isKill() || isDead() || isImplicit() || isEarlyClobber()) { + OS << "<"; + bool NeedComma = false; + if (isImplicit()) { + if (NeedComma) OS << ","; + OS << (isDef() ? "imp-def" : "imp-use"); + NeedComma = true; + } else if (isDef()) { + if (NeedComma) OS << ","; + if (isEarlyClobber()) + OS << "earlyclobber,"; + OS << "def"; + NeedComma = true; + } + if (isKill() || isDead()) { + if (NeedComma) OS << ","; + if (isKill()) OS << "kill"; + if (isDead()) OS << "dead"; + } + OS << ">"; + } + break; + case MachineOperand::MO_Immediate: + OS << getImm(); + break; + case MachineOperand::MO_FPImmediate: + if (getFPImm()->getType() == Type::FloatTy) { + OS << getFPImm()->getValueAPF().convertToFloat(); + } else { + OS << getFPImm()->getValueAPF().convertToDouble(); + } + break; + case MachineOperand::MO_MachineBasicBlock: + OS << "mbb<" + << ((Value*)getMBB()->getBasicBlock())->getName() + << "," << (void*)getMBB() << ">"; + break; + case MachineOperand::MO_FrameIndex: + OS << "<fi#" << getIndex() << ">"; + break; + case MachineOperand::MO_ConstantPoolIndex: + OS << "<cp#" << getIndex(); + if (getOffset()) OS << "+" << getOffset(); + OS << ">"; + break; + case MachineOperand::MO_JumpTableIndex: + OS << "<jt#" << getIndex() << ">"; + break; + case MachineOperand::MO_GlobalAddress: + OS << "<ga:" << ((Value*)getGlobal())->getName(); + if (getOffset()) OS << "+" << getOffset(); + OS << ">"; + break; + case MachineOperand::MO_ExternalSymbol: + OS << "<es:" << getSymbolName(); + if (getOffset()) OS << "+" << getOffset(); + OS << ">"; + break; + default: + assert(0 && "Unrecognized operand type"); + } +} + +//===----------------------------------------------------------------------===// +// MachineMemOperand Implementation +//===----------------------------------------------------------------------===// + +MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f, + int64_t o, uint64_t s, unsigned int a) + : Offset(o), Size(s), V(v), + Flags((f & 7) | ((Log2_32(a) + 1) << 3)) { + assert(isPowerOf2_32(a) && "Alignment is not a power of 2!"); + assert((isLoad() || isStore()) && "Not a load/store!"); +} + +/// Profile - Gather unique data for the object. +/// +void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Offset); + ID.AddInteger(Size); + ID.AddPointer(V); + ID.AddInteger(Flags); +} + +//===----------------------------------------------------------------------===// +// MachineInstr Implementation +//===----------------------------------------------------------------------===// + +/// MachineInstr ctor - This constructor creates a dummy MachineInstr with +/// TID NULL and no operands. +MachineInstr::MachineInstr() + : TID(0), NumImplicitOps(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); +} + +void MachineInstr::addImplicitDefUseOperands() { + if (TID->ImplicitDefs) + for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs) + addOperand(MachineOperand::CreateReg(*ImpDefs, true, true)); + if (TID->ImplicitUses) + for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses) + addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); +} + +/// MachineInstr ctor - This constructor create a MachineInstr and add the +/// implicit operands. It reserves space for number of operands specified by +/// TargetInstrDesc or the numOperands if it is not zero. (for +/// instructions with variable number of operands). +MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) + : TID(&tid), NumImplicitOps(0), Parent(0), + debugLoc(DebugLoc::getUnknownLoc()) { + if (!NoImp && TID->getImplicitDefs()) + for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + NumImplicitOps++; + if (!NoImp && TID->getImplicitUses()) + for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) + NumImplicitOps++; + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + if (!NoImp) + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); +} + +/// MachineInstr ctor - As above, but with a DebugLoc. +MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, + bool NoImp) + : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) { + if (!NoImp && TID->getImplicitDefs()) + for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + NumImplicitOps++; + if (!NoImp && TID->getImplicitUses()) + for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) + NumImplicitOps++; + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + if (!NoImp) + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); +} + +/// MachineInstr ctor - Work exactly the same as the ctor two above, except +/// that the MachineInstr is created and added to the end of the specified +/// basic block. +/// +MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) + : TID(&tid), NumImplicitOps(0), Parent(0), + debugLoc(DebugLoc::getUnknownLoc()) { + assert(MBB && "Cannot use inserting ctor with null basic block!"); + if (TID->ImplicitDefs) + for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + NumImplicitOps++; + if (TID->ImplicitUses) + for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) + NumImplicitOps++; + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); + MBB->push_back(this); // Add instruction to end of basic block! +} + +/// MachineInstr ctor - As above, but with a DebugLoc. +/// +MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, + const TargetInstrDesc &tid) + : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) { + assert(MBB && "Cannot use inserting ctor with null basic block!"); + if (TID->ImplicitDefs) + for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + NumImplicitOps++; + if (TID->ImplicitUses) + for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) + NumImplicitOps++; + Operands.reserve(NumImplicitOps + TID->getNumOperands()); + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); + MBB->push_back(this); // Add instruction to end of basic block! +} + +/// MachineInstr ctor - Copies MachineInstr arg exactly +/// +MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) + : TID(&MI.getDesc()), NumImplicitOps(0), Parent(0), + debugLoc(MI.getDebugLoc()) { + Operands.reserve(MI.getNumOperands()); + + // Add operands + for (unsigned i = 0; i != MI.getNumOperands(); ++i) + addOperand(MI.getOperand(i)); + NumImplicitOps = MI.NumImplicitOps; + + // Add memory operands. + for (std::list<MachineMemOperand>::const_iterator i = MI.memoperands_begin(), + j = MI.memoperands_end(); i != j; ++i) + addMemOperand(MF, *i); + + // Set parent to null. + Parent = 0; + + LeakDetector::addGarbageObject(this); +} + +MachineInstr::~MachineInstr() { + LeakDetector::removeGarbageObject(this); + assert(MemOperands.empty() && + "MachineInstr being deleted with live memoperands!"); +#ifndef NDEBUG + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + assert(Operands[i].ParentMI == this && "ParentMI mismatch!"); + assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) && + "Reg operand def/use list corrupted"); + } +#endif +} + +/// getRegInfo - If this instruction is embedded into a MachineFunction, +/// return the MachineRegisterInfo object for the current function, otherwise +/// return null. +MachineRegisterInfo *MachineInstr::getRegInfo() { + if (MachineBasicBlock *MBB = getParent()) + return &MBB->getParent()->getRegInfo(); + return 0; +} + +/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in +/// this instruction from their respective use lists. This requires that the +/// operands already be on their use lists. +void MachineInstr::RemoveRegOperandsFromUseLists() { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].RemoveRegOperandFromRegInfo(); + } +} + +/// AddRegOperandsToUseLists - Add all of the register operands in +/// this instruction from their respective use lists. This requires that the +/// operands not be on their use lists yet. +void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].AddRegOperandToRegInfo(&RegInfo); + } +} + + +/// addOperand - Add the specified operand to the instruction. If it is an +/// implicit operand, it is added to the end of the operand list. If it is +/// an explicit operand it is added at the end of the explicit operand list +/// (before the first implicit operand). +void MachineInstr::addOperand(const MachineOperand &Op) { + bool isImpReg = Op.isReg() && Op.isImplicit(); + assert((isImpReg || !OperandsComplete()) && + "Trying to add an operand to a machine instr that is already done!"); + + MachineRegisterInfo *RegInfo = getRegInfo(); + + // If we are adding the operand to the end of the list, our job is simpler. + // This is true most of the time, so this is a reasonable optimization. + if (isImpReg || NumImplicitOps == 0) { + // We can only do this optimization if we know that the operand list won't + // reallocate. + if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) { + Operands.push_back(Op); + + // Set the parent of the operand. + Operands.back().ParentMI = this; + + // If the operand is a register, update the operand's use list. + if (Op.isReg()) + Operands.back().AddRegOperandToRegInfo(RegInfo); + return; + } + } + + // Otherwise, we have to insert a real operand before any implicit ones. + unsigned OpNo = Operands.size()-NumImplicitOps; + + // If this instruction isn't embedded into a function, then we don't need to + // update any operand lists. + if (RegInfo == 0) { + // Simple insertion, no reginfo update needed for other register operands. + Operands.insert(Operands.begin()+OpNo, Op); + Operands[OpNo].ParentMI = this; + + // Do explicitly set the reginfo for this operand though, to ensure the + // next/prev fields are properly nulled out. + if (Operands[OpNo].isReg()) + Operands[OpNo].AddRegOperandToRegInfo(0); + + } else if (Operands.size()+1 <= Operands.capacity()) { + // Otherwise, we have to remove register operands from their register use + // list, add the operand, then add the register operands back to their use + // list. This also must handle the case when the operand list reallocates + // to somewhere else. + + // If insertion of this operand won't cause reallocation of the operand + // list, just remove the implicit operands, add the operand, then re-add all + // the rest of the operands. + for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { + assert(Operands[i].isReg() && "Should only be an implicit reg!"); + Operands[i].RemoveRegOperandFromRegInfo(); + } + + // Add the operand. If it is a register, add it to the reg list. + Operands.insert(Operands.begin()+OpNo, Op); + Operands[OpNo].ParentMI = this; + + if (Operands[OpNo].isReg()) + Operands[OpNo].AddRegOperandToRegInfo(RegInfo); + + // Re-add all the implicit ops. + for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) { + assert(Operands[i].isReg() && "Should only be an implicit reg!"); + Operands[i].AddRegOperandToRegInfo(RegInfo); + } + } else { + // Otherwise, we will be reallocating the operand list. Remove all reg + // operands from their list, then readd them after the operand list is + // reallocated. + RemoveRegOperandsFromUseLists(); + + Operands.insert(Operands.begin()+OpNo, Op); + Operands[OpNo].ParentMI = this; + + // Re-add all the operands. + AddRegOperandsToUseLists(*RegInfo); + } +} + +/// RemoveOperand - Erase an operand from an instruction, leaving it with one +/// fewer operand than it started with. +/// +void MachineInstr::RemoveOperand(unsigned OpNo) { + assert(OpNo < Operands.size() && "Invalid operand number"); + + // Special case removing the last one. + if (OpNo == Operands.size()-1) { + // If needed, remove from the reg def/use list. + if (Operands.back().isReg() && Operands.back().isOnRegUseList()) + Operands.back().RemoveRegOperandFromRegInfo(); + + Operands.pop_back(); + return; + } + + // Otherwise, we are removing an interior operand. If we have reginfo to + // update, remove all operands that will be shifted down from their reg lists, + // move everything down, then re-add them. + MachineRegisterInfo *RegInfo = getRegInfo(); + if (RegInfo) { + for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].RemoveRegOperandFromRegInfo(); + } + } + + Operands.erase(Operands.begin()+OpNo); + + if (RegInfo) { + for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { + if (Operands[i].isReg()) + Operands[i].AddRegOperandToRegInfo(RegInfo); + } + } +} + +/// addMemOperand - Add a MachineMemOperand to the machine instruction, +/// referencing arbitrary storage. +void MachineInstr::addMemOperand(MachineFunction &MF, + const MachineMemOperand &MO) { + MemOperands.push_back(MO); +} + +/// clearMemOperands - Erase all of this MachineInstr's MachineMemOperands. +void MachineInstr::clearMemOperands(MachineFunction &MF) { + MemOperands.clear(); +} + + +/// removeFromParent - This method unlinks 'this' from the containing basic +/// block, and returns it, but does not delete it. +MachineInstr *MachineInstr::removeFromParent() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->remove(this); + return this; +} + + +/// eraseFromParent - This method unlinks 'this' from the containing basic +/// block, and deletes it. +void MachineInstr::eraseFromParent() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->erase(this); +} + + +/// OperandComplete - Return true if it's illegal to add a new operand +/// +bool MachineInstr::OperandsComplete() const { + unsigned short NumOperands = TID->getNumOperands(); + if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands) + return true; // Broken: we have all the operands of this instruction! + return false; +} + +/// getNumExplicitOperands - Returns the number of non-implicit operands. +/// +unsigned MachineInstr::getNumExplicitOperands() const { + unsigned NumOperands = TID->getNumOperands(); + if (!TID->isVariadic()) + return NumOperands; + + for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isImplicit()) + NumOperands++; + } + return NumOperands; +} + + +/// isLabel - Returns true if the MachineInstr represents a label. +/// +bool MachineInstr::isLabel() const { + return getOpcode() == TargetInstrInfo::DBG_LABEL || + getOpcode() == TargetInstrInfo::EH_LABEL || + getOpcode() == TargetInstrInfo::GC_LABEL; +} + +/// isDebugLabel - Returns true if the MachineInstr represents a debug label. +/// +bool MachineInstr::isDebugLabel() const { + return getOpcode() == TargetInstrInfo::DBG_LABEL; +} + +/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of +/// the specific register or -1 if it is not found. It further tightening +/// the search criteria to a use that kills the register if isKill is true. +int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill, + const TargetRegisterInfo *TRI) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MOReg == Reg || + (TRI && + TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(MOReg, Reg))) + if (!isKill || MO.isKill()) + return i; + } + return -1; +} + +/// findRegisterDefOperandIdx() - Returns the operand index that is a def of +/// the specified register or -1 if it is not found. If isDead is true, defs +/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it +/// also checks if there is a def of a super-register. +int MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, + const TargetRegisterInfo *TRI) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + if (MOReg == Reg || + (TRI && + TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(MOReg, Reg))) + if (!isDead || MO.isDead()) + return i; + } + return -1; +} + +/// findFirstPredOperandIdx() - Find the index of the first operand in the +/// operand list that is used to represent the predicate. It returns -1 if +/// none is found. +int MachineInstr::findFirstPredOperandIdx() const { + const TargetInstrDesc &TID = getDesc(); + if (TID.isPredicable()) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (TID.OpInfo[i].isPredicate()) + return i; + } + + return -1; +} + +/// isRegTiedToUseOperand - Given the index of a register def operand, +/// check if the register def is tied to a source operand, due to either +/// two-address elimination or inline assembly constraints. Returns the +/// first tied use operand index by reference is UseOpIdx is not null. +bool MachineInstr:: +isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { + if (getOpcode() == TargetInstrInfo::INLINEASM) { + assert(DefOpIdx >= 2); + const MachineOperand &MO = getOperand(DefOpIdx); + if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) + return false; + // Determine the actual operand no corresponding to this index. + unsigned DefNo = 0; + for (unsigned i = 1, e = getNumOperands(); i < e; ) { + const MachineOperand &FMO = getOperand(i); + assert(FMO.isImm()); + // Skip over this def. + i += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1; + if (i > DefOpIdx) + break; + ++DefNo; + } + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &FMO = getOperand(i); + if (!FMO.isImm()) + continue; + if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse()) + continue; + unsigned Idx; + if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) && + Idx == DefNo) { + if (UseOpIdx) + *UseOpIdx = (unsigned)i + 1; + return true; + } + } + } + + assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!"); + const TargetInstrDesc &TID = getDesc(); + for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.isUse() && + TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) { + if (UseOpIdx) + *UseOpIdx = (unsigned)i; + return true; + } + } + return false; +} + +/// isRegTiedToDefOperand - Return true if the operand of the specified index +/// is a register use and it is tied to an def operand. It also returns the def +/// operand index by reference. +bool MachineInstr:: +isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { + if (getOpcode() == TargetInstrInfo::INLINEASM) { + const MachineOperand &MO = getOperand(UseOpIdx); + if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0) + return false; + assert(UseOpIdx > 0); + const MachineOperand &UFMO = getOperand(UseOpIdx-1); + if (!UFMO.isImm()) + return false; // Must be physreg uses. + unsigned DefNo; + if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) { + if (!DefOpIdx) + return true; + + unsigned DefIdx = 1; + // Remember to adjust the index. First operand is asm string, then there + // is a flag for each. + while (DefNo) { + const MachineOperand &FMO = getOperand(DefIdx); + assert(FMO.isImm()); + // Skip over this def. + DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1; + --DefNo; + } + *DefOpIdx = DefIdx+1; + return true; + } + return false; + } + + const TargetInstrDesc &TID = getDesc(); + if (UseOpIdx >= TID.getNumOperands()) + return false; + const MachineOperand &MO = getOperand(UseOpIdx); + if (!MO.isReg() || !MO.isUse()) + return false; + int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO); + if (DefIdx == -1) + return false; + if (DefOpIdx) + *DefOpIdx = (unsigned)DefIdx; + return true; +} + +/// copyKillDeadInfo - Copies kill / dead operand properties from MI. +/// +void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || (!MO.isKill() && !MO.isDead())) + continue; + for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) { + MachineOperand &MOp = getOperand(j); + if (!MOp.isIdenticalTo(MO)) + continue; + if (MO.isKill()) + MOp.setIsKill(); + else + MOp.setIsDead(); + break; + } + } +} + +/// copyPredicates - Copies predicate operand(s) from MI. +void MachineInstr::copyPredicates(const MachineInstr *MI) { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isPredicable()) + return; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (TID.OpInfo[i].isPredicate()) { + // Predicated operands must be last operands. + addOperand(MI->getOperand(i)); + } + } +} + +/// isSafeToMove - Return true if it is safe to move this instruction. If +/// SawStore is set to true, it means that there is a store (or call) between +/// the instruction's location and its intended destination. +bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, + bool &SawStore) const { + // Ignore stuff that we obviously can't move. + if (TID->mayStore() || TID->isCall()) { + SawStore = true; + return false; + } + if (TID->isTerminator() || TID->hasUnmodeledSideEffects()) + return false; + + // See if this instruction does a load. If so, we have to guarantee that the + // loaded value doesn't change between the load and the its intended + // destination. The check for isInvariantLoad gives the targe the chance to + // classify the load as always returning a constant, e.g. a constant pool + // load. + if (TID->mayLoad() && !TII->isInvariantLoad(this)) + // Otherwise, this is a real load. If there is a store between the load and + // end of block, or if the laod is volatile, we can't move it. + return !SawStore && !hasVolatileMemoryRef(); + + return true; +} + +/// isSafeToReMat - Return true if it's safe to rematerialize the specified +/// instruction which defined the specified register instead of copying it. +bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, + unsigned DstReg) const { + bool SawStore = false; + if (!getDesc().isRematerializable() || + !TII->isTriviallyReMaterializable(this) || + !isSafeToMove(TII, SawStore)) + return false; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg()) + continue; + // FIXME: For now, do not remat any instruction with register operands. + // Later on, we can loosen the restriction is the register operands have + // not been modified between the def and use. Note, this is different from + // MachineSink because the code is no longer in two-address form (at least + // partially). + if (MO.isUse()) + return false; + else if (!MO.isDead() && MO.getReg() != DstReg) + return false; + } + return true; +} + +/// hasVolatileMemoryRef - Return true if this instruction may have a +/// volatile memory reference, or if the information describing the +/// memory reference is not available. Return false if it is known to +/// have no volatile memory references. +bool MachineInstr::hasVolatileMemoryRef() const { + // An instruction known never to access memory won't have a volatile access. + if (!TID->mayStore() && + !TID->mayLoad() && + !TID->isCall() && + !TID->hasUnmodeledSideEffects()) + return false; + + // Otherwise, if the instruction has no memory reference information, + // conservatively assume it wasn't preserved. + if (memoperands_empty()) + return true; + + // Check the memory reference information for volatile references. + for (std::list<MachineMemOperand>::const_iterator I = memoperands_begin(), + E = memoperands_end(); I != E; ++I) + if (I->isVolatile()) + return true; + + return false; +} + +void MachineInstr::dump() const { + cerr << " " << *this; +} + +void MachineInstr::print(std::ostream &OS, const TargetMachine *TM) const { + raw_os_ostream RawOS(OS); + print(RawOS, TM); +} + +void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { + // Specialize printing if op#0 is definition + unsigned StartOp = 0; + if (getNumOperands() && getOperand(0).isReg() && getOperand(0).isDef()) { + getOperand(0).print(OS, TM); + OS << " = "; + ++StartOp; // Don't print this operand again! + } + + OS << getDesc().getName(); + + for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { + if (i != StartOp) + OS << ","; + OS << " "; + getOperand(i).print(OS, TM); + } + + if (!memoperands_empty()) { + OS << ", Mem:"; + for (std::list<MachineMemOperand>::const_iterator i = memoperands_begin(), + e = memoperands_end(); i != e; ++i) { + const MachineMemOperand &MRO = *i; + const Value *V = MRO.getValue(); + + assert((MRO.isLoad() || MRO.isStore()) && + "SV has to be a load, store or both."); + + if (MRO.isVolatile()) + OS << "Volatile "; + + if (MRO.isLoad()) + OS << "LD"; + if (MRO.isStore()) + OS << "ST"; + + OS << "(" << MRO.getSize() << "," << MRO.getAlignment() << ") ["; + + if (!V) + OS << "<unknown>"; + else if (!V->getName().empty()) + OS << V->getName(); + else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + PSV->print(OS); + } else + OS << V; + + OS << " + " << MRO.getOffset() << "]"; + } + } + + if (!debugLoc.isUnknown()) { + const MachineFunction *MF = getParent()->getParent(); + DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc); + DICompileUnit CU(DLT.CompileUnit); + std::string Dir, Fn; + OS << " [dbg: " + << CU.getDirectory(Dir) << '/' << CU.getFilename(Fn) << "," + << DLT.Line << "," + << DLT.Col << "]"; + } + + OS << "\n"; +} + +bool MachineInstr::addRegisterKilled(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo, + bool AddIfNotFound) { + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg); + bool Found = false; + SmallVector<unsigned,4> DeadOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (Reg == IncomingReg) { + if (!Found) { + if (MO.isKill()) + // The register is already marked kill. + return true; + MO.setIsKill(); + Found = true; + } + } else if (hasAliases && MO.isKill() && + TargetRegisterInfo::isPhysicalRegister(Reg)) { + // A super-register kill already exists. + if (RegInfo->isSuperRegister(IncomingReg, Reg)) + return true; + if (RegInfo->isSubRegister(IncomingReg, Reg)) + DeadOps.push_back(i); + } + } + + // Trim unneeded kill operands. + while (!DeadOps.empty()) { + unsigned OpIdx = DeadOps.back(); + if (getOperand(OpIdx).isImplicit()) + RemoveOperand(OpIdx); + else + getOperand(OpIdx).setIsKill(false); + DeadOps.pop_back(); + } + + // If not found, this means an alias of one of the operands is killed. Add a + // new implicit operand if required. + if (!Found && AddIfNotFound) { + addOperand(MachineOperand::CreateReg(IncomingReg, + false /*IsDef*/, + true /*IsImp*/, + true /*IsKill*/)); + return true; + } + return Found; +} + +bool MachineInstr::addRegisterDead(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo, + bool AddIfNotFound) { + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg); + bool Found = false; + SmallVector<unsigned,4> DeadOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (Reg == IncomingReg) { + if (!Found) { + if (MO.isDead()) + // The register is already marked dead. + return true; + MO.setIsDead(); + Found = true; + } + } else if (hasAliases && MO.isDead() && + TargetRegisterInfo::isPhysicalRegister(Reg)) { + // There exists a super-register that's marked dead. + if (RegInfo->isSuperRegister(IncomingReg, Reg)) + return true; + if (RegInfo->getSubRegisters(IncomingReg) && + RegInfo->getSuperRegisters(Reg) && + RegInfo->isSubRegister(IncomingReg, Reg)) + DeadOps.push_back(i); + } + } + + // Trim unneeded dead operands. + while (!DeadOps.empty()) { + unsigned OpIdx = DeadOps.back(); + if (getOperand(OpIdx).isImplicit()) + RemoveOperand(OpIdx); + else + getOperand(OpIdx).setIsDead(false); + DeadOps.pop_back(); + } + + // If not found, this means an alias of one of the operands is dead. Add a + // new implicit operand if required. + if (!Found && AddIfNotFound) { + addOperand(MachineOperand::CreateReg(IncomingReg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/, + true /*IsDead*/)); + return true; + } + return Found; +} diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp new file mode 100644 index 0000000..aaa4de4 --- /dev/null +++ b/lib/CodeGen/MachineLICM.cpp @@ -0,0 +1,406 @@ +//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs loop invariant code motion on machine instructions. We +// attempt to remove as much code from the body of a loop as possible. +// +// This pass does not attempt to throttle itself to limit register pressure. +// The register allocation phases are expected to perform rematerialization +// to recover when register pressure is high. +// +// This pass is not intended to be a replacement or a complete alternative +// for the LLVM-IR-level LICM pass. It is only designed to hoist simple +// constructs that are not exposed before lowering and instruction selection. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-licm" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); +STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); + +namespace { + class VISIBILITY_HIDDEN MachineLICM : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + + // Various analyses that we use... + MachineLoopInfo *LI; // Current MachineLoopInfo + MachineDominatorTree *DT; // Machine dominator tree for the cur loop + MachineRegisterInfo *RegInfo; // Machine register information + + // State that is updated as we process loops + bool Changed; // True if a loop is changed. + MachineLoop *CurLoop; // The current loop we are working on. + MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + + // For each BB and opcode pair, keep a list of hoisted instructions. + DenseMap<std::pair<unsigned, unsigned>, + std::vector<const MachineInstr*> > CSEMap; + public: + static char ID; // Pass identification, replacement for typeid + MachineLICM() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "Machine Instruction LICM"; } + + // FIXME: Loop preheaders? + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual void releaseMemory() { + CSEMap.clear(); + } + + private: + /// IsLoopInvariantInst - Returns true if the instruction is loop + /// invariant. I.e., all virtual register operands are defined outside of + /// the loop, physical registers aren't accessed (explicitly or implicitly), + /// and the instruction is hoistable. + /// + bool IsLoopInvariantInst(MachineInstr &I); + + /// IsProfitableToHoist - Return true if it is potentially profitable to + /// hoist the given loop invariant. + bool IsProfitableToHoist(MachineInstr &MI); + + /// HoistRegion - Walk the specified region of the CFG (defined by all + /// blocks dominated by the specified block, and that are in the current + /// loop) in depth first order w.r.t the DominatorTree. This allows us to + /// visit definitions before uses, allowing us to hoist a loop body in one + /// pass without iteration. + /// + void HoistRegion(MachineDomTreeNode *N); + + /// Hoist - When an instruction is found to only use loop invariant operands + /// that is safe to hoist, this instruction is called to do the dirty work. + /// + void Hoist(MachineInstr &MI); + }; +} // end anonymous namespace + +char MachineLICM::ID = 0; +static RegisterPass<MachineLICM> +X("machinelicm", "Machine Loop Invariant Code Motion"); + +FunctionPass *llvm::createMachineLICMPass() { return new MachineLICM(); } + +/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most +/// loop that has a preheader. +static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { + for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) + if (L->getLoopPreheader()) + return false; + return true; +} + +/// Hoist expressions out of the specified loop. Note, alias info for inner loop +/// is not preserved so it is not a good idea to run LICM multiple times on one +/// loop. +/// +bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { + const Function *F = MF.getFunction(); + if (F->hasFnAttr(Attribute::OptimizeForSize)) + return false; + + DOUT << "******** Machine LICM ********\n"; + + Changed = false; + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + RegInfo = &MF.getRegInfo(); + + // Get our Loop information... + LI = &getAnalysis<MachineLoopInfo>(); + DT = &getAnalysis<MachineDominatorTree>(); + + for (MachineLoopInfo::iterator + I = LI->begin(), E = LI->end(); I != E; ++I) { + CurLoop = *I; + + // Only visit outer-most preheader-sporting loops. + if (!LoopIsOuterMostWithPreheader(CurLoop)) + continue; + + // Determine the block to which to hoist instructions. If we can't find a + // suitable loop preheader, we can't do any hoisting. + // + // FIXME: We are only hoisting if the basic block coming into this loop + // has only one successor. This isn't the case in general because we haven't + // broken critical edges or added preheaders. + CurPreheader = CurLoop->getLoopPreheader(); + if (!CurPreheader) + continue; + + HoistRegion(DT->getNode(CurLoop->getHeader())); + } + + return Changed; +} + +/// HoistRegion - Walk the specified region of the CFG (defined by all blocks +/// dominated by the specified block, and that are in the current loop) in depth +/// first order w.r.t the DominatorTree. This allows us to visit definitions +/// before uses, allowing us to hoist a loop body in one pass without iteration. +/// +void MachineLICM::HoistRegion(MachineDomTreeNode *N) { + assert(N != 0 && "Null dominator tree node?"); + MachineBasicBlock *BB = N->getBlock(); + + // If this subregion is not in the top level loop at all, exit. + if (!CurLoop->contains(BB)) return; + + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ) { + MachineBasicBlock::iterator NextMII = MII; ++NextMII; + MachineInstr &MI = *MII; + + Hoist(MI); + + MII = NextMII; + } + + const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); + + for (unsigned I = 0, E = Children.size(); I != E; ++I) + HoistRegion(Children[I]); +} + +/// IsLoopInvariantInst - Returns true if the instruction is loop +/// invariant. I.e., all virtual register operands are defined outside of the +/// loop, physical registers aren't accessed explicitly, and there are no side +/// effects that aren't captured by the operands or other flags. +/// +bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { + const TargetInstrDesc &TID = I.getDesc(); + + // Ignore stuff that we obviously can't hoist. + if (TID.mayStore() || TID.isCall() || TID.isTerminator() || + TID.hasUnmodeledSideEffects()) + return false; + + if (TID.mayLoad()) { + // Okay, this instruction does a load. As a refinement, we allow the target + // to decide whether the loaded value is actually a constant. If so, we can + // actually use it as a load. + if (!TII->isInvariantLoad(&I)) + // FIXME: we should be able to sink loads with no other side effects if + // there is nothing that can change memory from here until the end of + // block. This is a trivial form of alias analysis. + return false; + } + + DEBUG({ + DOUT << "--- Checking if we can hoist " << I; + if (I.getDesc().getImplicitUses()) { + DOUT << " * Instruction has implicit uses:\n"; + + const TargetRegisterInfo *TRI = TM->getRegisterInfo(); + for (const unsigned *ImpUses = I.getDesc().getImplicitUses(); + *ImpUses; ++ImpUses) + DOUT << " -> " << TRI->getName(*ImpUses) << "\n"; + } + + if (I.getDesc().getImplicitDefs()) { + DOUT << " * Instruction has implicit defines:\n"; + + const TargetRegisterInfo *TRI = TM->getRegisterInfo(); + for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs(); + *ImpDefs; ++ImpDefs) + DOUT << " -> " << TRI->getName(*ImpDefs) << "\n"; + } + }); + + if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) { + DOUT << "Cannot hoist with implicit defines or uses\n"; + return false; + } + + // The instruction is loop invariant if all of its operands are. + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = I.getOperand(i); + + if (!MO.isReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + // Don't hoist an instruction that uses or defines a physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return false; + + if (!MO.isUse()) + continue; + + assert(RegInfo->getVRegDef(Reg) && + "Machine instr not mapped for this vreg?!"); + + // If the loop contains the definition of an operand, then the instruction + // isn't loop invariant. + if (CurLoop->contains(RegInfo->getVRegDef(Reg)->getParent())) + return false; + } + + // If we got this far, the instruction is loop invariant! + return true; +} + + +/// HasPHIUses - Return true if the specified register has any PHI use. +static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) { + for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg), + UE = RegInfo->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI->getOpcode() == TargetInstrInfo::PHI) + return true; + } + return false; +} + +/// IsProfitableToHoist - Return true if it is potentially profitable to hoist +/// the given loop invariant. +bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { + if (MI.getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + return false; + + const TargetInstrDesc &TID = MI.getDesc(); + + // FIXME: For now, only hoist re-materilizable instructions. LICM will + // increase register pressure. We want to make sure it doesn't increase + // spilling. + if (!TID.mayLoad() && (!TID.isRematerializable() || + !TII->isTriviallyReMaterializable(&MI))) + return false; + + // If result(s) of this instruction is used by PHIs, then don't hoist it. + // The presence of joins makes it difficult for current register allocator + // implementation to perform remat. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + if (HasPHIUses(MO.getReg(), RegInfo)) + return false; + } + + return true; +} + +static const MachineInstr *LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr*> &PrevMIs, + MachineRegisterInfo *RegInfo) { + unsigned NumOps = MI->getNumOperands(); + for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { + const MachineInstr *PrevMI = PrevMIs[i]; + unsigned NumOps2 = PrevMI->getNumOperands(); + if (NumOps != NumOps2) + continue; + bool IsSame = true; + for (unsigned j = 0; j != NumOps; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (MO.isReg() && MO.isDef()) { + if (RegInfo->getRegClass(MO.getReg()) != + RegInfo->getRegClass(PrevMI->getOperand(j).getReg())) { + IsSame = false; + break; + } + continue; + } + if (!MO.isIdenticalTo(PrevMI->getOperand(j))) { + IsSame = false; + break; + } + } + if (IsSame) + return PrevMI; + } + return 0; +} + +/// Hoist - When an instruction is found to use only loop invariant operands +/// that are safe to hoist, this instruction is called to do the dirty work. +/// +void MachineLICM::Hoist(MachineInstr &MI) { + if (!IsLoopInvariantInst(MI)) return; + if (!IsProfitableToHoist(MI)) return; + + // Now move the instructions to the predecessor, inserting it before any + // terminator instructions. + DEBUG({ + DOUT << "Hoisting " << MI; + if (CurPreheader->getBasicBlock()) + DOUT << " to MachineBasicBlock " + << CurPreheader->getBasicBlock()->getName(); + if (MI.getParent()->getBasicBlock()) + DOUT << " from MachineBasicBlock " + << MI.getParent()->getBasicBlock()->getName(); + DOUT << "\n"; + }); + + // Look for opportunity to CSE the hoisted instruction. + std::pair<unsigned, unsigned> BBOpcPair = + std::make_pair(CurPreheader->getNumber(), MI.getOpcode()); + DenseMap<std::pair<unsigned, unsigned>, + std::vector<const MachineInstr*> >::iterator CI = CSEMap.find(BBOpcPair); + bool DoneCSE = false; + if (CI != CSEMap.end()) { + const MachineInstr *Dup = LookForDuplicate(&MI, CI->second, RegInfo); + if (Dup) { + DOUT << "CSEing " << MI; + DOUT << " with " << *Dup; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (MO.isReg() && MO.isDef()) + RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); + } + MI.eraseFromParent(); + DoneCSE = true; + ++NumCSEed; + } + } + + // Otherwise, splice the instruction to the preheader. + if (!DoneCSE) { + CurPreheader->splice(CurPreheader->getFirstTerminator(), + MI.getParent(), &MI); + // Add to the CSE map. + if (CI != CSEMap.end()) + CI->second.push_back(&MI); + else { + std::vector<const MachineInstr*> CSEMIs; + CSEMIs.push_back(&MI); + CSEMap.insert(std::make_pair(BBOpcPair, CSEMIs)); + } + } + + ++NumHoisted; + Changed = true; +} diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp new file mode 100644 index 0000000..68ddb7b --- /dev/null +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -0,0 +1,40 @@ +//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachineLoopInfo class that is used to identify natural +// loops and determine the loop depth of various nodes of the CFG. Note that +// the loops identified may actually be several natural loops that share the +// same header node... not just a single natural loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/Passes.h" +using namespace llvm; + +TEMPLATE_INSTANTIATION(class LoopBase<MachineBasicBlock>); +TEMPLATE_INSTANTIATION(class LoopInfoBase<MachineBasicBlock>); + +char MachineLoopInfo::ID = 0; +static RegisterPass<MachineLoopInfo> +X("machine-loops", "Machine Natural Loop Construction", true); + +const PassInfo *const llvm::MachineLoopInfoID = &X; + +bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { + releaseMemory(); + LI->Calculate(getAnalysis<MachineDominatorTree>().getBase()); // Update + return false; +} + +void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineDominatorTree>(); +} diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp new file mode 100644 index 0000000..1d8109e --- /dev/null +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -0,0 +1,368 @@ +//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleInfo.h" + +#include "llvm/Constants.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Streams.h" +using namespace llvm; +using namespace llvm::dwarf; + +// Handle the Pass registration stuff necessary to use TargetData's. +static RegisterPass<MachineModuleInfo> +X("machinemoduleinfo", "Module Information"); +char MachineModuleInfo::ID = 0; + +//===----------------------------------------------------------------------===// + +MachineModuleInfo::MachineModuleInfo() +: ImmutablePass(&ID) +, LabelIDList() +, FrameMoves() +, LandingPads() +, Personalities() +, CallsEHReturn(0) +, CallsUnwindInit(0) +, DbgInfoAvailable(false) +{ + // Always emit "no personality" info + Personalities.push_back(NULL); +} +MachineModuleInfo::~MachineModuleInfo() { + +} + +/// doInitialization - Initialize the state for a new module. +/// +bool MachineModuleInfo::doInitialization() { + return false; +} + +/// doFinalization - Tear down the state after completion of a module. +/// +bool MachineModuleInfo::doFinalization() { + return false; +} + +/// BeginFunction - Begin gathering function meta information. +/// +void MachineModuleInfo::BeginFunction(MachineFunction *MF) { + // Coming soon. +} + +/// EndFunction - Discard function meta information. +/// +void MachineModuleInfo::EndFunction() { + // Clean up frame info. + FrameMoves.clear(); + + // Clean up exception info. + LandingPads.clear(); + TypeInfos.clear(); + FilterIds.clear(); + FilterEnds.clear(); + CallsEHReturn = 0; + CallsUnwindInit = 0; +} + +/// AnalyzeModule - Scan the module for global debug information. +/// +void MachineModuleInfo::AnalyzeModule(Module &M) { + // Insert functions in the llvm.used array into UsedFunctions. + GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InitList->getOperand(i))) + if (CE->getOpcode() == Instruction::BitCast) + if (Function *F = dyn_cast<Function>(CE->getOperand(0))) + UsedFunctions.insert(F); + } +} + +//===-EH-------------------------------------------------------------------===// + +/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the +/// specified MachineBasicBlock. +LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo + (MachineBasicBlock *LandingPad) { + unsigned N = LandingPads.size(); + for (unsigned i = 0; i < N; ++i) { + LandingPadInfo &LP = LandingPads[i]; + if (LP.LandingPadBlock == LandingPad) + return LP; + } + + LandingPads.push_back(LandingPadInfo(LandingPad)); + return LandingPads[N]; +} + +/// addInvoke - Provide the begin and end labels of an invoke style call and +/// associate it with a try landing pad block. +void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad, + unsigned BeginLabel, unsigned EndLabel) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.BeginLabels.push_back(BeginLabel); + LP.EndLabels.push_back(EndLabel); +} + +/// addLandingPad - Provide the label of a try LandingPad block. +/// +unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { + unsigned LandingPadLabel = NextLabelID(); + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.LandingPadLabel = LandingPadLabel; + return LandingPadLabel; +} + +/// addPersonality - Provide the personality function for the exception +/// information. +void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, + Function *Personality) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.Personality = Personality; + + for (unsigned i = 0; i < Personalities.size(); ++i) + if (Personalities[i] == Personality) + return; + + Personalities.push_back(Personality); +} + +/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. +/// +void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad, + std::vector<GlobalVariable *> &TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + for (unsigned N = TyInfo.size(); N; --N) + LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); +} + +/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad. +/// +void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad, + std::vector<GlobalVariable *> &TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + std::vector<unsigned> IdsInFilter(TyInfo.size()); + for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) + IdsInFilter[I] = getTypeIDFor(TyInfo[I]); + LP.TypeIds.push_back(getFilterIDFor(IdsInFilter)); +} + +/// addCleanup - Add a cleanup action for a landing pad. +/// +void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.TypeIds.push_back(0); +} + +/// TidyLandingPads - Remap landing pad labels and remove any deleted landing +/// pads. +void MachineModuleInfo::TidyLandingPads() { + for (unsigned i = 0; i != LandingPads.size(); ) { + LandingPadInfo &LandingPad = LandingPads[i]; + LandingPad.LandingPadLabel = MappedLabel(LandingPad.LandingPadLabel); + + // Special case: we *should* emit LPs with null LP MBB. This indicates + // "nounwind" case. + if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + for (unsigned j=0; j != LandingPads[i].BeginLabels.size(); ) { + unsigned BeginLabel = MappedLabel(LandingPad.BeginLabels[j]); + unsigned EndLabel = MappedLabel(LandingPad.EndLabels[j]); + + if (!BeginLabel || !EndLabel) { + LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); + LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); + continue; + } + + LandingPad.BeginLabels[j] = BeginLabel; + LandingPad.EndLabels[j] = EndLabel; + ++j; + } + + // Remove landing pads with no try-ranges. + if (LandingPads[i].BeginLabels.empty()) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + // If there is no landing pad, ensure that the list of typeids is empty. + // If the only typeid is a cleanup, this is the same as having no typeids. + if (!LandingPad.LandingPadBlock || + (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0])) + LandingPad.TypeIds.clear(); + + ++i; + } +} + +/// getTypeIDFor - Return the type id for the specified typeinfo. This is +/// function wide. +unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) { + for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) + if (TypeInfos[i] == TI) return i + 1; + + TypeInfos.push_back(TI); + return TypeInfos.size(); +} + +/// getFilterIDFor - Return the filter id for the specified typeinfos. This is +/// function wide. +int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) { + // If the new filter coincides with the tail of an existing filter, then + // re-use the existing filter. Folding filters more than this requires + // re-ordering filters and/or their elements - probably not worth it. + for (std::vector<unsigned>::iterator I = FilterEnds.begin(), + E = FilterEnds.end(); I != E; ++I) { + unsigned i = *I, j = TyIds.size(); + + while (i && j) + if (FilterIds[--i] != TyIds[--j]) + goto try_next; + + if (!j) + // The new filter coincides with range [i, end) of the existing filter. + return -(1 + i); + +try_next:; + } + + // Add the new filter. + int FilterID = -(1 + FilterIds.size()); + FilterIds.reserve(FilterIds.size() + TyIds.size() + 1); + for (unsigned I = 0, N = TyIds.size(); I != N; ++I) + FilterIds.push_back(TyIds[I]); + FilterEnds.push_back(FilterIds.size()); + FilterIds.push_back(0); // terminator + return FilterID; +} + +/// getPersonality - Return the personality function for the current function. +Function *MachineModuleInfo::getPersonality() const { + // FIXME: Until PR1414 will be fixed, we're using 1 personality function per + // function + return !LandingPads.empty() ? LandingPads[0].Personality : NULL; +} + +/// getPersonalityIndex - Return unique index for current personality +/// function. NULL personality function should always get zero index. +unsigned MachineModuleInfo::getPersonalityIndex() const { + const Function* Personality = NULL; + + // Scan landing pads. If there is at least one non-NULL personality - use it. + for (unsigned i = 0; i != LandingPads.size(); ++i) + if (LandingPads[i].Personality) { + Personality = LandingPads[i].Personality; + break; + } + + for (unsigned i = 0; i < Personalities.size(); ++i) { + if (Personalities[i] == Personality) + return i; + } + + // This should never happen + assert(0 && "Personality function should be set!"); + return 0; +} + +//===----------------------------------------------------------------------===// +/// DebugLabelFolding pass - This pass prunes out redundant labels. This allows +/// a info consumer to determine if the range of two labels is empty, by seeing +/// if the labels map to the same reduced label. + +namespace llvm { + +struct DebugLabelFolder : public MachineFunctionPass { + static char ID; + DebugLabelFolder() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Label Folder"; } +}; + +char DebugLabelFolder::ID = 0; + +bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) { + // Get machine module info. + MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + if (!MMI) return false; + + // Track if change is made. + bool MadeChange = false; + // No prior label to begin. + unsigned PriorLabel = 0; + + // Iterate through basic blocks. + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); + BB != E; ++BB) { + // Iterate through instructions. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + // Is it a label. + if (I->isDebugLabel() && !MMI->isDbgLabelUsed(I->getOperand(0).getImm())){ + // The label ID # is always operand #0, an immediate. + unsigned NextLabel = I->getOperand(0).getImm(); + + // If there was an immediate prior label. + if (PriorLabel) { + // Remap the current label to prior label. + MMI->RemapLabel(NextLabel, PriorLabel); + // Delete the current label. + I = BB->erase(I); + // Indicate a change has been made. + MadeChange = true; + continue; + } else { + // Start a new round. + PriorLabel = NextLabel; + } + } else { + // No consecutive labels. + PriorLabel = 0; + } + + ++I; + } + } + + return MadeChange; +} + +FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); } + +} + diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp new file mode 100644 index 0000000..9f4ef12 --- /dev/null +++ b/lib/CodeGen/MachinePassRegistry.cpp @@ -0,0 +1,41 @@ +//===-- CodeGen/MachineInstr.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the machine function pass registry for register allocators +// and instruction schedulers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePassRegistry.h" + +using namespace llvm; + + +/// Add - Adds a function pass to the registration list. +/// +void MachinePassRegistry::Add(MachinePassRegistryNode *Node) { + Node->setNext(List); + List = Node; + if (Listener) Listener->NotifyAdd(Node->getName(), + Node->getCtor(), + Node->getDescription()); +} + + +/// Remove - Removes a function pass from the registration list. +/// +void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) { + for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) { + if (*I == Node) { + if (Listener) Listener->NotifyRemove(Node->getName()); + *I = (*I)->getNext(); + break; + } + } +} diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp new file mode 100644 index 0000000..4f5ab1f --- /dev/null +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -0,0 +1,125 @@ +//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the MachineRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + +MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { + VRegInfo.reserve(256); + RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1. + UsedPhysRegs.resize(TRI.getNumRegs()); + + // Create the physreg use/def lists. + PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()]; + memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs()); +} + +MachineRegisterInfo::~MachineRegisterInfo() { +#ifndef NDEBUG + for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) + assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?"); + for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i) + assert(!PhysRegUseDefLists[i] && + "PhysRegUseDefLists has entries after all instructions are deleted"); +#endif + delete [] PhysRegUseDefLists; +} + +/// setRegClass - Set the register class of the specified virtual register. +/// +void +MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { + unsigned VR = Reg; + Reg -= TargetRegisterInfo::FirstVirtualRegister; + assert(Reg < VRegInfo.size() && "Invalid vreg!"); + const TargetRegisterClass *OldRC = VRegInfo[Reg].first; + VRegInfo[Reg].first = RC; + + // Remove from old register class's vregs list. This may be slow but + // fortunately this operation is rarely needed. + std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()]; + std::vector<unsigned>::iterator I=std::find(VRegs.begin(), VRegs.end(), VR); + VRegs.erase(I); + + // Add to new register class's vregs list. + RegClass2VRegMap[RC->getID()].push_back(VR); +} + +/// createVirtualRegister - Create and return a new virtual register in the +/// function with the specified register class. +/// +unsigned +MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ + assert(RegClass && "Cannot create register without RegClass!"); + // Add a reg, but keep track of whether the vector reallocated or not. + void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0]; + VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0)); + + if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1))) + // The vector reallocated, handle this now. + HandleVRegListReallocation(); + unsigned VR = getLastVirtReg(); + RegClass2VRegMap[RegClass->getID()].push_back(VR); + return VR; +} + +/// HandleVRegListReallocation - We just added a virtual register to the +/// VRegInfo info list and it reallocated. Update the use/def lists info +/// pointers. +void MachineRegisterInfo::HandleVRegListReallocation() { + // The back pointers for the vreg lists point into the previous vector. + // Update them to point to their correct slots. + for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) { + MachineOperand *List = VRegInfo[i].second; + if (!List) continue; + // Update the back-pointer to be accurate once more. + List->Contents.Reg.Prev = &VRegInfo[i].second; + } +} + +/// replaceRegWith - Replace all instances of FromReg with ToReg in the +/// machine function. This is like llvm-level X->replaceAllUsesWith(Y), +/// except that it also changes any definitions of the register as well. +void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { + assert(FromReg != ToReg && "Cannot replace a reg with itself"); + + // TODO: This could be more efficient by bulk changing the operands. + for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { + MachineOperand &O = I.getOperand(); + ++I; + O.setReg(ToReg); + } +} + + +/// getVRegDef - Return the machine instr that defines the specified virtual +/// register or null if none is found. This assumes that the code is in SSA +/// form, so there should only be one definition. +MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { + assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() && + "Invalid vreg!"); + for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) { + // Since we are in SSA form, we can stop at the first definition. + if (I.getOperand().isDef()) + return &*I; + } + return 0; +} + + +#ifndef NDEBUG +void MachineRegisterInfo::dumpUses(unsigned Reg) const { + for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) + I.getOperand().getParent()->dump(); +} +#endif diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp new file mode 100644 index 0000000..0e18fa7 --- /dev/null +++ b/lib/CodeGen/MachineSink.cpp @@ -0,0 +1,257 @@ +//===-- MachineSink.cpp - Sinking for machine instructions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-sink" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +STATISTIC(NumSunk, "Number of machine instructions sunk"); + +namespace { + class VISIBILITY_HIDDEN MachineSinking : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + MachineFunction *CurMF; // Current MachineFunction + MachineRegisterInfo *RegInfo; // Machine register information + MachineDominatorTree *DT; // Machine dominator tree for the current Loop + + public: + static char ID; // Pass identification + MachineSinking() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + } + private: + bool ProcessBlock(MachineBasicBlock &MBB); + bool SinkInstruction(MachineInstr *MI, bool &SawStore); + bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const; + }; +} // end anonymous namespace + +char MachineSinking::ID = 0; +static RegisterPass<MachineSinking> +X("machine-sink", "Machine code sinking"); + +FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); } + +/// AllUsesDominatedByBlock - Return true if all uses of the specified register +/// occur in blocks dominated by the specified block. +bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, + MachineBasicBlock *MBB) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Only makes sense for vregs"); + for (MachineRegisterInfo::reg_iterator I = RegInfo->reg_begin(Reg), + E = RegInfo->reg_end(); I != E; ++I) { + if (I.getOperand().isDef()) continue; // ignore def. + + // Determine the block of the use. + MachineInstr *UseInst = &*I; + MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseInst->getOpcode() == TargetInstrInfo::PHI) { + // PHI nodes use the operand in the predecessor block, not the block with + // the PHI. + UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); + } + // Check that it dominates. + if (!DT->dominates(MBB, UseBlock)) + return false; + } + return true; +} + + + +bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { + DOUT << "******** Machine Sinking ********\n"; + + CurMF = &MF; + TM = &CurMF->getTarget(); + TII = TM->getInstrInfo(); + RegInfo = &CurMF->getRegInfo(); + DT = &getAnalysis<MachineDominatorTree>(); + + bool EverMadeChange = false; + + while (1) { + bool MadeChange = false; + + // Process all basic blocks. + for (MachineFunction::iterator I = CurMF->begin(), E = CurMF->end(); + I != E; ++I) + MadeChange |= ProcessBlock(*I); + + // If this iteration over the code changed anything, keep iterating. + if (!MadeChange) break; + EverMadeChange = true; + } + return EverMadeChange; +} + +bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { + // Can't sink anything out of a block that has less than two successors. + if (MBB.succ_size() <= 1 || MBB.empty()) return false; + + bool MadeChange = false; + + // Walk the basic block bottom-up. Remember if we saw a store. + MachineBasicBlock::iterator I = MBB.end(); + --I; + bool ProcessedBegin, SawStore = false; + do { + MachineInstr *MI = I; // The instruction to sink. + + // Predecrement I (if it's not begin) so that it isn't invalidated by + // sinking. + ProcessedBegin = I == MBB.begin(); + if (!ProcessedBegin) + --I; + + if (SinkInstruction(MI, SawStore)) + ++NumSunk, MadeChange = true; + + // If we just processed the first instruction in the block, we're done. + } while (!ProcessedBegin); + + return MadeChange; +} + +/// SinkInstruction - Determine whether it is safe to sink the specified machine +/// instruction out of its current block into a successor. +bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { + // Check if it's safe to move the instruction. + if (!MI->isSafeToMove(TII, SawStore)) + return false; + + // FIXME: This should include support for sinking instructions within the + // block they are currently in to shorten the live ranges. We often get + // instructions sunk into the top of a large block, but it would be better to + // also sink them down before their first use in the block. This xform has to + // be careful not to *increase* register pressure though, e.g. sinking + // "x = y + z" down if it kills y and z would increase the live ranges of y + // and z only the shrink the live range of x. + + // Loop over all the operands of the specified instruction. If there is + // anything we can't handle, bail out. + MachineBasicBlock *ParentBlock = MI->getParent(); + + // SuccToSinkTo - This is the successor to sink this instruction to, once we + // decide. + MachineBasicBlock *SuccToSinkTo = 0; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; // Ignore non-register operands. + + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // If this is a physical register use, we can't move it. If it is a def, + // we can move it, but only if the def is dead. + if (MO.isUse() || !MO.isDead()) + return false; + } else { + // Virtual register uses are always safe to sink. + if (MO.isUse()) continue; + + // If it's not safe to move defs of the register class, then abort. + if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg))) + return false; + + // FIXME: This picks a successor to sink into based on having one + // successor that dominates all the uses. However, there are cases where + // sinking can happen but where the sink point isn't a successor. For + // example: + // x = computation + // if () {} else {} + // use x + // the instruction could be sunk over the whole diamond for the + // if/then/else (or loop, etc), allowing it to be sunk into other blocks + // after that. + + // Virtual register defs can only be sunk if all their uses are in blocks + // dominated by one of the successors. + if (SuccToSinkTo) { + // If a previous operand picked a block to sink to, then this operand + // must be sinkable to the same block. + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo)) + return false; + continue; + } + + // Otherwise, we should look at all the successors and decide which one + // we should sink to. + for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), + E = ParentBlock->succ_end(); SI != E; ++SI) { + if (AllUsesDominatedByBlock(Reg, *SI)) { + SuccToSinkTo = *SI; + break; + } + } + + // If we couldn't find a block to sink to, ignore this instruction. + if (SuccToSinkTo == 0) + return false; + } + } + + // If there are no outputs, it must have side-effects. + if (SuccToSinkTo == 0) + return false; + + // It's not safe to sink instructions to EH landing pad. Control flow into + // landing pad is implicitly defined. + if (SuccToSinkTo->isLandingPad()) + return false; + + // If is not possible to sink an instruction into its own block. This can + // happen with loops. + if (MI->getParent() == SuccToSinkTo) + return false; + + DEBUG(cerr << "Sink instr " << *MI); + DEBUG(cerr << "to block " << *SuccToSinkTo); + + // If the block has multiple predecessors, this would introduce computation on + // a path that it doesn't already exist. We could split the critical edge, + // but for now we just punt. + // FIXME: Split critical edges if not backedges. + if (SuccToSinkTo->pred_size() > 1) { + DEBUG(cerr << " *** PUNTING: Critical edge found\n"); + return false; + } + + // Determine where to insert into. Skip phi nodes. + MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); + while (InsertPos != SuccToSinkTo->end() && + InsertPos->getOpcode() == TargetInstrInfo::PHI) + ++InsertPos; + + // Move the instruction. + SuccToSinkTo->splice(InsertPos, ParentBlock, MI, + ++MachineBasicBlock::iterator(MI)); + return true; +} diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp new file mode 100644 index 0000000..be1396c --- /dev/null +++ b/lib/CodeGen/MachineVerifier.cpp @@ -0,0 +1,690 @@ +//===-- MachineVerifier.cpp - Machine Code Verifier -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pass to verify generated machine code. The following is checked: +// +// Operand counts: All explicit operands must be present. +// +// Register classes: All physical and virtual register operands must be +// compatible with the register class required by the instruction descriptor. +// +// Register live intervals: Registers must be defined only once, and must be +// defined before use. +// +// The machine code verifier is enabled from LLVMTargetMachine.cpp with the +// command-line option -verify-machineinstrs, or by defining the environment +// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive +// the verifier errors. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include <fstream> + +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN MachineVerifier : public MachineFunctionPass { + static char ID; // Pass ID, replacement for typeid + + MachineVerifier(bool allowDoubleDefs = false) : + MachineFunctionPass(&ID), + allowVirtDoubleDefs(allowDoubleDefs), + allowPhysDoubleDefs(allowDoubleDefs), + OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) + {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnMachineFunction(MachineFunction &MF); + + const bool allowVirtDoubleDefs; + const bool allowPhysDoubleDefs; + + const char *const OutFileName; + std::ostream *OS; + const MachineFunction *MF; + const TargetMachine *TM; + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + + unsigned foundErrors; + + typedef SmallVector<unsigned, 16> RegVector; + typedef DenseSet<unsigned> RegSet; + typedef DenseMap<unsigned, const MachineInstr*> RegMap; + + BitVector regsReserved; + RegSet regsLive; + RegVector regsDefined, regsImpDefined, regsDead, regsKilled; + + // Add Reg and any sub-registers to RV + void addRegWithSubRegs(RegVector &RV, unsigned Reg) { + RV.push_back(Reg); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++) + RV.push_back(*R); + } + + // Does RS contain any super-registers of Reg? + bool anySuperRegisters(const RegSet &RS, unsigned Reg) { + for (const unsigned *R = TRI->getSuperRegisters(Reg); *R; R++) + if (RS.count(*R)) + return true; + return false; + } + + struct BBInfo { + // Is this MBB reachable from the MF entry point? + bool reachable; + + // Vregs that must be live in because they are used without being + // defined. Map value is the user. + RegMap vregsLiveIn; + + // Vregs that must be dead in because they are defined without being + // killed first. Map value is the defining instruction. + RegMap vregsDeadIn; + + // Regs killed in MBB. They may be defined again, and will then be in both + // regsKilled and regsLiveOut. + RegSet regsKilled; + + // Regs defined in MBB and live out. Note that vregs passing through may + // be live out without being mentioned here. + RegSet regsLiveOut; + + // Vregs that pass through MBB untouched. This set is disjoint from + // regsKilled and regsLiveOut. + RegSet vregsPassed; + + BBInfo() : reachable(false) {} + + // Add register to vregsPassed if it belongs there. Return true if + // anything changed. + bool addPassed(unsigned Reg) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (regsKilled.count(Reg) || regsLiveOut.count(Reg)) + return false; + return vregsPassed.insert(Reg).second; + } + + // Same for a full set. + bool addPassed(const RegSet &RS) { + bool changed = false; + for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) + if (addPassed(*I)) + changed = true; + return changed; + } + + // Live-out registers are either in regsLiveOut or vregsPassed. + bool isLiveOut(unsigned Reg) const { + return regsLiveOut.count(Reg) || vregsPassed.count(Reg); + } + }; + + // Extra register info per MBB. + DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap; + + bool isReserved(unsigned Reg) { + return Reg < regsReserved.size() && regsReserved[Reg]; + } + + void visitMachineFunctionBefore(); + void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); + void visitMachineInstrBefore(const MachineInstr *MI); + void visitMachineOperand(const MachineOperand *MO, unsigned MONum); + void visitMachineInstrAfter(const MachineInstr *MI); + void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); + void visitMachineFunctionAfter(); + + void report(const char *msg, const MachineFunction *MF); + void report(const char *msg, const MachineBasicBlock *MBB); + void report(const char *msg, const MachineInstr *MI); + void report(const char *msg, const MachineOperand *MO, unsigned MONum); + + void markReachable(const MachineBasicBlock *MBB); + void calcMaxRegsPassed(); + void calcMinRegsPassed(); + void checkPHIOps(const MachineBasicBlock *MBB); + }; +} + +char MachineVerifier::ID = 0; +static RegisterPass<MachineVerifier> +MachineVer("machineverifier", "Verify generated machine code"); +static const PassInfo *const MachineVerifyID = &MachineVer; + +FunctionPass * +llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) +{ + return new MachineVerifier(allowPhysDoubleDefs); +} + +bool +MachineVerifier::runOnMachineFunction(MachineFunction &MF) +{ + std::ofstream OutFile; + if (OutFileName) { + OutFile.open(OutFileName, std::ios::out | std::ios::app); + OS = &OutFile; + } else { + OS = cerr.stream(); + } + + foundErrors = 0; + + this->MF = &MF; + TM = &MF.getTarget(); + TRI = TM->getRegisterInfo(); + MRI = &MF.getRegInfo(); + + visitMachineFunctionBefore(); + for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); + MFI!=MFE; ++MFI) { + visitMachineBasicBlockBefore(MFI); + for (MachineBasicBlock::const_iterator MBBI = MFI->begin(), + MBBE = MFI->end(); MBBI != MBBE; ++MBBI) { + visitMachineInstrBefore(MBBI); + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) + visitMachineOperand(&MBBI->getOperand(I), I); + visitMachineInstrAfter(MBBI); + } + visitMachineBasicBlockAfter(MFI); + } + visitMachineFunctionAfter(); + + if (OutFileName) + OutFile.close(); + + if (foundErrors) { + cerr << "\nStopping with " << foundErrors << " machine code errors.\n"; + exit(1); + } + + return false; // no changes +} + +void +MachineVerifier::report(const char *msg, const MachineFunction *MF) +{ + assert(MF); + *OS << "\n"; + if (!foundErrors++) + MF->print(OS); + *OS << "*** Bad machine code: " << msg << " ***\n" + << "- function: " << MF->getFunction()->getName() << "\n"; +} + +void +MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) +{ + assert(MBB); + report(msg, MBB->getParent()); + *OS << "- basic block: " << MBB->getBasicBlock()->getName() + << " " << (void*)MBB + << " (#" << MBB->getNumber() << ")\n"; +} + +void +MachineVerifier::report(const char *msg, const MachineInstr *MI) +{ + assert(MI); + report(msg, MI->getParent()); + *OS << "- instruction: "; + MI->print(OS, TM); +} + +void +MachineVerifier::report(const char *msg, + const MachineOperand *MO, unsigned MONum) +{ + assert(MO); + report(msg, MO->getParent()); + *OS << "- operand " << MONum << ": "; + MO->print(*OS, TM); + *OS << "\n"; +} + +void +MachineVerifier::markReachable(const MachineBasicBlock *MBB) +{ + BBInfo &MInfo = MBBInfoMap[MBB]; + if (!MInfo.reachable) { + MInfo.reachable = true; + for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), + SuE = MBB->succ_end(); SuI != SuE; ++SuI) + markReachable(*SuI); + } +} + +void +MachineVerifier::visitMachineFunctionBefore() +{ + regsReserved = TRI->getReservedRegs(*MF); + markReachable(&MF->front()); +} + +void +MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) +{ + regsLive.clear(); + for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) { + if (!TargetRegisterInfo::isPhysicalRegister(*I)) { + report("MBB live-in list contains non-physical register", MBB); + continue; + } + regsLive.insert(*I); + for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++) + regsLive.insert(*R); + } + regsKilled.clear(); + regsDefined.clear(); + regsImpDefined.clear(); +} + +void +MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) +{ + const TargetInstrDesc &TI = MI->getDesc(); + if (MI->getNumExplicitOperands() < TI.getNumOperands()) { + report("Too few operands", MI); + *OS << TI.getNumOperands() << " operands expected, but " + << MI->getNumExplicitOperands() << " given.\n"; + } + if (!TI.isVariadic()) { + if (MI->getNumExplicitOperands() > TI.getNumOperands()) { + report("Too many operands", MI); + *OS << TI.getNumOperands() << " operands expected, but " + << MI->getNumExplicitOperands() << " given.\n"; + } + } +} + +void +MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) +{ + const MachineInstr *MI = MO->getParent(); + const TargetInstrDesc &TI = MI->getDesc(); + + // The first TI.NumDefs operands must be explicit register defines + if (MONum < TI.getNumDefs()) { + if (!MO->isReg()) + report("Explicit definition must be a register", MO, MONum); + else if (!MO->isDef()) + report("Explicit definition marked as use", MO, MONum); + else if (MO->isImplicit()) + report("Explicit definition marked as implicit", MO, MONum); + } + + switch (MO->getType()) { + case MachineOperand::MO_Register: { + const unsigned Reg = MO->getReg(); + if (!Reg) + return; + + // Check Live Variables. + if (MO->isUse()) { + if (MO->isKill()) { + addRegWithSubRegs(regsKilled, Reg); + } else { + // TwoAddress instr modyfying a reg is treated as kill+def. + unsigned defIdx; + if (MI->isRegTiedToDefOperand(MONum, &defIdx) && + MI->getOperand(defIdx).getReg() == Reg) + addRegWithSubRegs(regsKilled, Reg); + } + // Explicit use of a dead register. + if (!MO->isImplicit() && !regsLive.count(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Reserved registers may be used even when 'dead'. + if (!isReserved(Reg)) + report("Using an undefined physical register", MO, MONum); + } else { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + // We don't know which virtual registers are live in, so only complain + // if vreg was killed in this MBB. Otherwise keep track of vregs that + // must be live in. PHI instructions are handled separately. + if (MInfo.regsKilled.count(Reg)) + report("Using a killed virtual register", MO, MONum); + else if (MI->getOpcode() != TargetInstrInfo::PHI) + MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); + } + } + } else { + // Register defined. + // TODO: verify that earlyclobber ops are not used. + if (MO->isImplicit()) + addRegWithSubRegs(regsImpDefined, Reg); + else + addRegWithSubRegs(regsDefined, Reg); + + if (MO->isDead()) + addRegWithSubRegs(regsDead, Reg); + } + + // Check register classes. + if (MONum < TI.getNumOperands() && !MO->isImplicit()) { + const TargetOperandInfo &TOI = TI.OpInfo[MONum]; + unsigned SubIdx = MO->getSubReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + unsigned sr = Reg; + if (SubIdx) { + unsigned s = TRI->getSubReg(Reg, SubIdx); + if (!s) { + report("Invalid subregister index for physical register", + MO, MONum); + return; + } + sr = s; + } + if (TOI.RegClass) { + const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass); + if (!DRC->contains(sr)) { + report("Illegal physical register for instruction", MO, MONum); + *OS << TRI->getName(sr) << " is not a " + << DRC->getName() << " register.\n"; + } + } + } else { + // Virtual register. + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (SubIdx) { + if (RC->subregclasses_begin()+SubIdx >= RC->subregclasses_end()) { + report("Invalid subregister index for virtual register", MO, MONum); + return; + } + RC = *(RC->subregclasses_begin()+SubIdx); + } + if (TOI.RegClass) { + const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass); + if (RC != DRC && !RC->hasSuperClass(DRC)) { + report("Illegal virtual register for instruction", MO, MONum); + *OS << "Expected a " << DRC->getName() << " register, but got a " + << RC->getName() << " register\n"; + } + } + } + } + break; + } + // Can PHI instrs refer to MBBs not in the CFG? X86 and ARM do. + // case MachineOperand::MO_MachineBasicBlock: + // if (MI->getOpcode() == TargetInstrInfo::PHI) { + // if (!MO->getMBB()->isSuccessor(MI->getParent())) + // report("PHI operand is not in the CFG", MO, MONum); + // } + // break; + default: + break; + } +} + +void +MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) +{ + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + set_union(MInfo.regsKilled, regsKilled); + set_subtract(regsLive, regsKilled); + regsKilled.clear(); + + for (RegVector::const_iterator I = regsDefined.begin(), + E = regsDefined.end(); I != E; ++I) { + if (regsLive.count(*I)) { + if (TargetRegisterInfo::isPhysicalRegister(*I)) { + // We allow double defines to physical registers with live + // super-registers. + if (!allowPhysDoubleDefs && !isReserved(*I) && + !anySuperRegisters(regsLive, *I)) { + report("Redefining a live physical register", MI); + *OS << "Register " << TRI->getName(*I) + << " was defined but already live.\n"; + } + } else { + if (!allowVirtDoubleDefs) { + report("Redefining a live virtual register", MI); + *OS << "Virtual register %reg" << *I + << " was defined but already live.\n"; + } + } + } else if (TargetRegisterInfo::isVirtualRegister(*I) && + !MInfo.regsKilled.count(*I)) { + // Virtual register defined without being killed first must be dead on + // entry. + MInfo.vregsDeadIn.insert(std::make_pair(*I, MI)); + } + } + + set_union(regsLive, regsDefined); regsDefined.clear(); + set_union(regsLive, regsImpDefined); regsImpDefined.clear(); + set_subtract(regsLive, regsDead); regsDead.clear(); +} + +void +MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) +{ + MBBInfoMap[MBB].regsLiveOut = regsLive; + regsLive.clear(); +} + +// Calculate the largest possible vregsPassed sets. These are the registers that +// can pass through an MBB live, but may not be live every time. It is assumed +// that all vregsPassed sets are empty before the call. +void +MachineVerifier::calcMaxRegsPassed() +{ + // First push live-out regs to successors' vregsPassed. Remember the MBBs that + // have any vregsPassed. + DenseSet<const MachineBasicBlock*> todo; + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + const MachineBasicBlock &MBB(*MFI); + BBInfo &MInfo = MBBInfoMap[&MBB]; + if (!MInfo.reachable) + continue; + for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(), + SuE = MBB.succ_end(); SuI != SuE; ++SuI) { + BBInfo &SInfo = MBBInfoMap[*SuI]; + if (SInfo.addPassed(MInfo.regsLiveOut)) + todo.insert(*SuI); + } + } + + // Iteratively push vregsPassed to successors. This will converge to the same + // final state regardless of DenseSet iteration order. + while (!todo.empty()) { + const MachineBasicBlock *MBB = *todo.begin(); + todo.erase(MBB); + BBInfo &MInfo = MBBInfoMap[MBB]; + for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), + SuE = MBB->succ_end(); SuI != SuE; ++SuI) { + if (*SuI == MBB) + continue; + BBInfo &SInfo = MBBInfoMap[*SuI]; + if (SInfo.addPassed(MInfo.vregsPassed)) + todo.insert(*SuI); + } + } +} + +// Calculate the minimum vregsPassed set. These are the registers that always +// pass live through an MBB. The calculation assumes that calcMaxRegsPassed has +// been called earlier. +void +MachineVerifier::calcMinRegsPassed() +{ + DenseSet<const MachineBasicBlock*> todo; + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) + todo.insert(MFI); + + while (!todo.empty()) { + const MachineBasicBlock *MBB = *todo.begin(); + todo.erase(MBB); + BBInfo &MInfo = MBBInfoMap[MBB]; + + // Remove entries from vRegsPassed that are not live out from all + // reachable predecessors. + RegSet dead; + for (RegSet::iterator I = MInfo.vregsPassed.begin(), + E = MInfo.vregsPassed.end(); I != E; ++I) { + for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), + PrE = MBB->pred_end(); PrI != PrE; ++PrI) { + BBInfo &PrInfo = MBBInfoMap[*PrI]; + if (PrInfo.reachable && !PrInfo.isLiveOut(*I)) { + dead.insert(*I); + break; + } + } + } + // If any regs removed, we need to recheck successors. + if (!dead.empty()) { + set_subtract(MInfo.vregsPassed, dead); + todo.insert(MBB->succ_begin(), MBB->succ_end()); + } + } +} + +// Check PHI instructions at the beginning of MBB. It is assumed that +// calcMinRegsPassed has been run so BBInfo::isLiveOut is valid. +void +MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) +{ + for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); + BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) { + DenseSet<const MachineBasicBlock*> seen; + + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI->getOperand(i).getReg(); + const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB(); + if (!Pre->isSuccessor(MBB)) + continue; + seen.insert(Pre); + BBInfo &PrInfo = MBBInfoMap[Pre]; + if (PrInfo.reachable && !PrInfo.isLiveOut(Reg)) + report("PHI operand is not live-out from predecessor", + &BBI->getOperand(i), i); + } + + // Did we see all predecessors? + for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), + PrE = MBB->pred_end(); PrI != PrE; ++PrI) { + if (!seen.count(*PrI)) { + report("Missing PHI operand", BBI); + *OS << "MBB #" << (*PrI)->getNumber() + << " is a predecessor according to the CFG.\n"; + } + } + } +} + +void +MachineVerifier::visitMachineFunctionAfter() +{ + calcMaxRegsPassed(); + + // With the maximal set of vregsPassed we can verify dead-in registers. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + + // Skip unreachable MBBs. + if (!MInfo.reachable) + continue; + + for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), + PrE = MFI->pred_end(); PrI != PrE; ++PrI) { + BBInfo &PrInfo = MBBInfoMap[*PrI]; + if (!PrInfo.reachable) + continue; + + // Verify physical live-ins. EH landing pads have magic live-ins so we + // ignore them. + if (!MFI->isLandingPad()) { + for (MachineBasicBlock::const_livein_iterator I = MFI->livein_begin(), + E = MFI->livein_end(); I != E; ++I) { + if (TargetRegisterInfo::isPhysicalRegister(*I) && + !isReserved (*I) && !PrInfo.isLiveOut(*I)) { + report("Live-in physical register is not live-out from predecessor", + MFI); + *OS << "Register " << TRI->getName(*I) + << " is not live-out from MBB #" << (*PrI)->getNumber() + << ".\n"; + } + } + } + + + // Verify dead-in virtual registers. + if (!allowVirtDoubleDefs) { + for (RegMap::iterator I = MInfo.vregsDeadIn.begin(), + E = MInfo.vregsDeadIn.end(); I != E; ++I) { + // DeadIn register must be in neither regsLiveOut or vregsPassed of + // any predecessor. + if (PrInfo.isLiveOut(I->first)) { + report("Live-in virtual register redefined", I->second); + *OS << "Register %reg" << I->first + << " was live-out from predecessor MBB #" + << (*PrI)->getNumber() << ".\n"; + } + } + } + } + } + + calcMinRegsPassed(); + + // With the minimal set of vregsPassed we can verify live-in virtual + // registers, including PHI instructions. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + + // Skip unreachable MBBs. + if (!MInfo.reachable) + continue; + + checkPHIOps(MFI); + + for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), + PrE = MFI->pred_end(); PrI != PrE; ++PrI) { + BBInfo &PrInfo = MBBInfoMap[*PrI]; + if (!PrInfo.reachable) + continue; + + for (RegMap::iterator I = MInfo.vregsLiveIn.begin(), + E = MInfo.vregsLiveIn.end(); I != E; ++I) { + if (!PrInfo.isLiveOut(I->first)) { + report("Used virtual register is not live-in", I->second); + *OS << "Register %reg" << I->first + << " is not live-out from predecessor MBB #" + << (*PrI)->getNumber() + << ".\n"; + } + } + } + } +} diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile new file mode 100644 index 0000000..4ab3e3c --- /dev/null +++ b/lib/CodeGen/Makefile @@ -0,0 +1,22 @@ +##===- lib/CodeGen/Makefile --------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMCodeGen +PARALLEL_DIRS = SelectionDAG AsmPrinter +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + +# Xcode prior to 2.4 generates an error in -pedantic mode with use of HUGE_VAL +# in this directory. Disable -pedantic for this broken compiler. +ifneq ($(HUGE_VAL_SANITY),yes) +CompileCommonOpts := $(filter-out -pedantic, $(CompileCommonOpts)) +endif + diff --git a/lib/CodeGen/OcamlGC.cpp b/lib/CodeGen/OcamlGC.cpp new file mode 100644 index 0000000..f7bc9f3 --- /dev/null +++ b/lib/CodeGen/OcamlGC.cpp @@ -0,0 +1,38 @@ +//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics compatible with +// Objective Caml 3.10.0, which uses a liveness-accurate static stack map. +// +// The frametable emitter is in OcamlGCPrinter.cpp. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/Support/Compiler.h" + +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN OcamlGC : public GCStrategy { + public: + OcamlGC(); + }; +} + +static GCRegistry::Add<OcamlGC> +X("ocaml", "ocaml 3.10-compatible GC"); + +void llvm::linkOcamlGC() { } + +OcamlGC::OcamlGC() { + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; +} diff --git a/lib/CodeGen/PBQP.cpp b/lib/CodeGen/PBQP.cpp new file mode 100644 index 0000000..562300f --- /dev/null +++ b/lib/CodeGen/PBQP.cpp @@ -0,0 +1,1395 @@ +//===---------------- PBQP.cpp --------- PBQP Solver ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Developed by: Bernhard Scholz +// The University of Sydney +// http://www.it.usyd.edu.au/~scholz +//===----------------------------------------------------------------------===// + +#include "PBQP.h" +#include "llvm/Config/alloca.h" +#include <limits> +#include <cassert> +#include <cstring> + +namespace llvm { + +/************************************************************************** + * Data Structures + **************************************************************************/ + +/* edge of PBQP graph */ +typedef struct adjnode { + struct adjnode *prev, /* doubly chained list */ + *succ, + *reverse; /* reverse edge */ + int adj; /* adj. node */ + PBQPMatrix *costs; /* cost matrix of edge */ + + bool tc_valid; /* flag whether following fields are valid */ + int *tc_safe_regs; /* safe registers */ + int tc_impact; /* impact */ +} adjnode; + +/* bucket node */ +typedef struct bucketnode { + struct bucketnode *prev; /* doubly chained list */ + struct bucketnode *succ; + int u; /* node */ +} bucketnode; + +/* data structure of partitioned boolean quadratic problem */ +struct pbqp { + int num_nodes; /* number of nodes */ + int max_deg; /* maximal degree of a node */ + bool solved; /* flag that indicates whether PBQP has been solved yet */ + bool optimal; /* flag that indicates whether PBQP is optimal */ + PBQPNum min; + bool changed; /* flag whether graph has changed in simplification */ + + /* node fields */ + PBQPVector **node_costs; /* cost vectors of nodes */ + int *node_deg; /* node degree of nodes */ + int *solution; /* solution for node */ + adjnode **adj_list; /* adj. list */ + bucketnode **bucket_ptr; /* bucket pointer of a node */ + + /* node stack */ + int *stack; /* stack of nodes */ + int stack_ptr; /* stack pointer */ + + /* bucket fields */ + bucketnode **bucket_list; /* bucket list */ + + int num_r0; /* counters for number statistics */ + int num_ri; + int num_rii; + int num_rn; + int num_rn_special; +}; + +bool isInf(PBQPNum n) { return n == std::numeric_limits<PBQPNum>::infinity(); } + +/***************************************************************************** + * allocation/de-allocation of pbqp problem + ****************************************************************************/ + +/* allocate new partitioned boolean quadratic program problem */ +pbqp *alloc_pbqp(int num_nodes) +{ + pbqp *this_; + int u; + + assert(num_nodes > 0); + + /* allocate memory for pbqp data structure */ + this_ = (pbqp *)malloc(sizeof(pbqp)); + + /* Initialize pbqp fields */ + this_->num_nodes = num_nodes; + this_->solved = false; + this_->optimal = true; + this_->min = 0.0; + this_->max_deg = 0; + this_->changed = false; + this_->num_r0 = 0; + this_->num_ri = 0; + this_->num_rii = 0; + this_->num_rn = 0; + this_->num_rn_special = 0; + + /* initialize/allocate stack fields of pbqp */ + this_->stack = (int *) malloc(sizeof(int)*num_nodes); + this_->stack_ptr = 0; + + /* initialize/allocate node fields of pbqp */ + this_->adj_list = (adjnode **) malloc(sizeof(adjnode *)*num_nodes); + this_->node_deg = (int *) malloc(sizeof(int)*num_nodes); + this_->solution = (int *) malloc(sizeof(int)*num_nodes); + this_->bucket_ptr = (bucketnode **) malloc(sizeof(bucketnode **)*num_nodes); + this_->node_costs = (PBQPVector**) malloc(sizeof(PBQPVector*) * num_nodes); + for(u=0;u<num_nodes;u++) { + this_->solution[u]=-1; + this_->adj_list[u]=NULL; + this_->node_deg[u]=0; + this_->bucket_ptr[u]=NULL; + this_->node_costs[u]=NULL; + } + + /* initialize bucket list */ + this_->bucket_list = NULL; + + return this_; +} + +/* free pbqp problem */ +void free_pbqp(pbqp *this_) +{ + int u; + int deg; + adjnode *adj_ptr,*adj_next; + bucketnode *bucket,*bucket_next; + + assert(this_ != NULL); + + /* free node cost fields */ + for(u=0;u < this_->num_nodes;u++) { + delete this_->node_costs[u]; + } + free(this_->node_costs); + + /* free bucket list */ + for(deg=0;deg<=this_->max_deg;deg++) { + for(bucket=this_->bucket_list[deg];bucket!=NULL;bucket=bucket_next) { + this_->bucket_ptr[bucket->u] = NULL; + bucket_next = bucket-> succ; + free(bucket); + } + } + free(this_->bucket_list); + + /* free adj. list */ + assert(this_->adj_list != NULL); + for(u=0;u < this_->num_nodes; u++) { + for(adj_ptr = this_->adj_list[u]; adj_ptr != NULL; adj_ptr = adj_next) { + adj_next = adj_ptr -> succ; + if (u < adj_ptr->adj) { + assert(adj_ptr != NULL); + delete adj_ptr->costs; + } + if (adj_ptr -> tc_safe_regs != NULL) { + free(adj_ptr -> tc_safe_regs); + } + free(adj_ptr); + } + } + free(this_->adj_list); + + /* free other node fields */ + free(this_->node_deg); + free(this_->solution); + free(this_->bucket_ptr); + + /* free stack */ + free(this_->stack); + + /* free pbqp data structure itself */ + free(this_); +} + + +/**************************************************************************** + * adj. node routines + ****************************************************************************/ + +/* find data structure of adj. node of a given node */ +static +adjnode *find_adjnode(pbqp *this_,int u,int v) +{ + adjnode *adj_ptr; + + assert (this_ != NULL); + assert (u >= 0 && u < this_->num_nodes); + assert (v >= 0 && v < this_->num_nodes); + assert(this_->adj_list != NULL); + + for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + if (adj_ptr->adj == v) { + return adj_ptr; + } + } + return NULL; +} + +/* allocate a new data structure for adj. node */ +static +adjnode *alloc_adjnode(pbqp *this_,int u, PBQPMatrix *costs) +{ + adjnode *p; + + assert(this_ != NULL); + assert(costs != NULL); + assert(u >= 0 && u < this_->num_nodes); + + p = (adjnode *)malloc(sizeof(adjnode)); + assert(p != NULL); + + p->adj = u; + p->costs = costs; + + p->tc_valid= false; + p->tc_safe_regs = NULL; + p->tc_impact = 0; + + return p; +} + +/* insert adjacence node to adj. list */ +static +void insert_adjnode(pbqp *this_, int u, adjnode *adj_ptr) +{ + + assert(this_ != NULL); + assert(adj_ptr != NULL); + assert(u >= 0 && u < this_->num_nodes); + + /* if adjacency list of node is not empty -> update + first node of the list */ + if (this_ -> adj_list[u] != NULL) { + assert(this_->adj_list[u]->prev == NULL); + this_->adj_list[u] -> prev = adj_ptr; + } + + /* update doubly chained list pointers of pointers */ + adj_ptr -> succ = this_->adj_list[u]; + adj_ptr -> prev = NULL; + + /* update adjacency list pointer of node u */ + this_->adj_list[u] = adj_ptr; +} + +/* remove entry in an adj. list */ +static +void remove_adjnode(pbqp *this_, int u, adjnode *adj_ptr) +{ + assert(this_!= NULL); + assert(u >= 0 && u <= this_->num_nodes); + assert(this_->adj_list != NULL); + assert(adj_ptr != NULL); + + if (adj_ptr -> prev == NULL) { + this_->adj_list[u] = adj_ptr -> succ; + } else { + adj_ptr -> prev -> succ = adj_ptr -> succ; + } + + if (adj_ptr -> succ != NULL) { + adj_ptr -> succ -> prev = adj_ptr -> prev; + } + + if(adj_ptr->reverse != NULL) { + adjnode *rev = adj_ptr->reverse; + rev->reverse = NULL; + } + + if (adj_ptr -> tc_safe_regs != NULL) { + free(adj_ptr -> tc_safe_regs); + } + + free(adj_ptr); +} + +/***************************************************************************** + * node functions + ****************************************************************************/ + +/* get degree of a node */ +static +int get_deg(pbqp *this_,int u) +{ + adjnode *adj_ptr; + int deg = 0; + + assert(this_ != NULL); + assert(u >= 0 && u < this_->num_nodes); + assert(this_->adj_list != NULL); + + for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + deg ++; + } + return deg; +} + +/* reinsert node */ +static +void reinsert_node(pbqp *this_,int u) +{ + adjnode *adj_u, + *adj_v; + + assert(this_!= NULL); + assert(u >= 0 && u <= this_->num_nodes); + assert(this_->adj_list != NULL); + + for(adj_u = this_ -> adj_list[u]; adj_u != NULL; adj_u = adj_u -> succ) { + int v = adj_u -> adj; + adj_v = alloc_adjnode(this_,u,adj_u->costs); + insert_adjnode(this_,v,adj_v); + } +} + +/* remove node */ +static +void remove_node(pbqp *this_,int u) +{ + adjnode *adj_ptr; + + assert(this_!= NULL); + assert(u >= 0 && u <= this_->num_nodes); + assert(this_->adj_list != NULL); + + for(adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + remove_adjnode(this_,adj_ptr->adj,adj_ptr -> reverse); + } +} + +/***************************************************************************** + * edge functions + ****************************************************************************/ + +/* insert edge to graph */ +/* (does not check whether edge exists in graph */ +static +void insert_edge(pbqp *this_, int u, int v, PBQPMatrix *costs) +{ + adjnode *adj_u, + *adj_v; + + /* create adjanceny entry for u */ + adj_u = alloc_adjnode(this_,v,costs); + insert_adjnode(this_,u,adj_u); + + + /* create adjanceny entry for v */ + adj_v = alloc_adjnode(this_,u,costs); + insert_adjnode(this_,v,adj_v); + + /* create link for reverse edge */ + adj_u -> reverse = adj_v; + adj_v -> reverse = adj_u; +} + +/* delete edge */ +static +void delete_edge(pbqp *this_,int u,int v) +{ + adjnode *adj_ptr; + adjnode *rev; + + assert(this_ != NULL); + assert( u >= 0 && u < this_->num_nodes); + assert( v >= 0 && v < this_->num_nodes); + + adj_ptr=find_adjnode(this_,u,v); + assert(adj_ptr != NULL); + assert(adj_ptr->reverse != NULL); + + delete adj_ptr -> costs; + + rev = adj_ptr->reverse; + remove_adjnode(this_,u,adj_ptr); + remove_adjnode(this_,v,rev); +} + +/***************************************************************************** + * cost functions + ****************************************************************************/ + +/* Note: Since cost(u,v) = transpose(cost(v,u)), it would be necessary to store + two matrices for both edges (u,v) and (v,u). However, we only store the + matrix for the case u < v. For the other case we transpose the stored matrix + if required. +*/ + +/* add costs to cost vector of a node */ +void add_pbqp_nodecosts(pbqp *this_,int u, PBQPVector *costs) +{ + assert(this_ != NULL); + assert(costs != NULL); + assert(u >= 0 && u <= this_->num_nodes); + + if (!this_->node_costs[u]) { + this_->node_costs[u] = new PBQPVector(*costs); + } else { + *this_->node_costs[u] += *costs; + } +} + +/* get cost matrix ptr */ +static +PBQPMatrix *get_costmatrix_ptr(pbqp *this_, int u, int v) +{ + adjnode *adj_ptr; + PBQPMatrix *m = NULL; + + assert (this_ != NULL); + assert (u >= 0 && u < this_->num_nodes); + assert (v >= 0 && v < this_->num_nodes); + + adj_ptr = find_adjnode(this_,u,v); + + if (adj_ptr != NULL) { + m = adj_ptr -> costs; + } + + return m; +} + +/* get cost matrix ptr */ +/* Note: only the pointer is returned for + cost(u,v), if u < v. +*/ +static +PBQPMatrix *pbqp_get_costmatrix(pbqp *this_, int u, int v) +{ + adjnode *adj_ptr = find_adjnode(this_,u,v); + + if (adj_ptr != NULL) { + if ( u < v) { + return new PBQPMatrix(*adj_ptr->costs); + } else { + return new PBQPMatrix(adj_ptr->costs->transpose()); + } + } else { + return NULL; + } +} + +/* add costs to cost matrix of an edge */ +void add_pbqp_edgecosts(pbqp *this_,int u,int v, PBQPMatrix *costs) +{ + PBQPMatrix *adj_costs; + + assert(this_!= NULL); + assert(costs != NULL); + assert(u >= 0 && u <= this_->num_nodes); + assert(v >= 0 && v <= this_->num_nodes); + + /* does the edge u-v exists ? */ + if (u == v) { + PBQPVector *diag = new PBQPVector(costs->diagonalize()); + add_pbqp_nodecosts(this_,v,diag); + delete diag; + } else if ((adj_costs = get_costmatrix_ptr(this_,u,v))!=NULL) { + if ( u < v) { + *adj_costs += *costs; + } else { + *adj_costs += costs->transpose(); + } + } else { + adj_costs = new PBQPMatrix((u < v) ? *costs : costs->transpose()); + insert_edge(this_,u,v,adj_costs); + } +} + +/* remove bucket from bucket list */ +static +void pbqp_remove_bucket(pbqp *this_, bucketnode *bucket) +{ + int u = bucket->u; + + assert(this_ != NULL); + assert(u >= 0 && u < this_->num_nodes); + assert(this_->bucket_list != NULL); + assert(this_->bucket_ptr[u] != NULL); + + /* update predecessor node in bucket list + (if no preceeding bucket exists, then + the bucket_list pointer needs to be + updated.) + */ + if (bucket->prev != NULL) { + bucket->prev-> succ = bucket->succ; + } else { + this_->bucket_list[this_->node_deg[u]] = bucket -> succ; + } + + /* update successor node in bucket list */ + if (bucket->succ != NULL) { + bucket->succ-> prev = bucket->prev; + } +} + +/********************************************************************************** + * pop functions + **********************************************************************************/ + +/* pop node of given degree */ +static +int pop_node(pbqp *this_,int deg) +{ + bucketnode *bucket; + int u; + + assert(this_ != NULL); + assert(deg >= 0 && deg <= this_->max_deg); + assert(this_->bucket_list != NULL); + + /* get first bucket of bucket list */ + bucket = this_->bucket_list[deg]; + assert(bucket != NULL); + + /* remove bucket */ + pbqp_remove_bucket(this_,bucket); + u = bucket->u; + free(bucket); + return u; +} + +/********************************************************************************** + * reorder functions + **********************************************************************************/ + +/* add bucket to bucketlist */ +static +void add_to_bucketlist(pbqp *this_,bucketnode *bucket, int deg) +{ + bucketnode *old_head; + + assert(bucket != NULL); + assert(this_ != NULL); + assert(deg >= 0 && deg <= this_->max_deg); + assert(this_->bucket_list != NULL); + + /* store node degree (for re-ordering purposes)*/ + this_->node_deg[bucket->u] = deg; + + /* put bucket to front of doubly chained list */ + old_head = this_->bucket_list[deg]; + bucket -> prev = NULL; + bucket -> succ = old_head; + this_ -> bucket_list[deg] = bucket; + if (bucket -> succ != NULL ) { + assert ( old_head -> prev == NULL); + old_head -> prev = bucket; + } +} + + +/* reorder node in bucket list according to + current node degree */ +static +void reorder_node(pbqp *this_, int u) +{ + int deg; + + assert(this_ != NULL); + assert(u>= 0 && u < this_->num_nodes); + assert(this_->bucket_list != NULL); + assert(this_->bucket_ptr[u] != NULL); + + /* get current node degree */ + deg = get_deg(this_,u); + + /* remove bucket from old bucket list only + if degree of node has changed. */ + if (deg != this_->node_deg[u]) { + pbqp_remove_bucket(this_,this_->bucket_ptr[u]); + add_to_bucketlist(this_,this_->bucket_ptr[u],deg); + } +} + +/* reorder adj. nodes of a node */ +static +void reorder_adjnodes(pbqp *this_,int u) +{ + adjnode *adj_ptr; + + assert(this_!= NULL); + assert(u >= 0 && u <= this_->num_nodes); + assert(this_->adj_list != NULL); + + for(adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + reorder_node(this_,adj_ptr->adj); + } +} + +/********************************************************************************** + * creation functions + **********************************************************************************/ + +/* create new bucket entry */ +/* consistency of the bucket list is not checked! */ +static +void create_bucket(pbqp *this_,int u,int deg) +{ + bucketnode *bucket; + + assert(this_ != NULL); + assert(u >= 0 && u < this_->num_nodes); + assert(this_->bucket_list != NULL); + + bucket = (bucketnode *)malloc(sizeof(bucketnode)); + assert(bucket != NULL); + + bucket -> u = u; + this_->bucket_ptr[u] = bucket; + + add_to_bucketlist(this_,bucket,deg); +} + +/* create bucket list */ +static +void create_bucketlist(pbqp *this_) +{ + int u; + int max_deg; + int deg; + + assert(this_ != NULL); + assert(this_->bucket_list == NULL); + + /* determine max. degree of the nodes */ + max_deg = 2; /* at least of degree two! */ + for(u=0;u<this_->num_nodes;u++) { + deg = this_->node_deg[u] = get_deg(this_,u); + if (deg > max_deg) { + max_deg = deg; + } + } + this_->max_deg = max_deg; + + /* allocate bucket list */ + this_ -> bucket_list = (bucketnode **)malloc(sizeof(bucketnode *)*(max_deg + 1)); + memset(this_->bucket_list,0,sizeof(bucketnode *)*(max_deg + 1)); + assert(this_->bucket_list != NULL); + + /* insert nodes to the list */ + for(u=0;u<this_->num_nodes;u++) { + create_bucket(this_,u,this_->node_deg[u]); + } +} + +/***************************************************************************** + * PBQP simplification for trivial nodes + ****************************************************************************/ + +/* remove trivial node with cost vector length of one */ +static +void disconnect_trivialnode(pbqp *this_,int u) +{ + int v; + adjnode *adj_ptr, + *next; + PBQPMatrix *c_uv; + PBQPVector *c_v; + + assert(this_ != NULL); + assert(this_->node_costs != NULL); + assert(u >= 0 && u < this_ -> num_nodes); + assert(this_->node_costs[u]->getLength() == 1); + + /* add edge costs to node costs of adj. nodes */ + for(adj_ptr = this_->adj_list[u]; adj_ptr != NULL; adj_ptr = next){ + next = adj_ptr -> succ; + v = adj_ptr -> adj; + assert(v >= 0 && v < this_ -> num_nodes); + + /* convert matrix to cost vector offset for adj. node */ + c_uv = pbqp_get_costmatrix(this_,u,v); + c_v = new PBQPVector(c_uv->getRowAsVector(0)); + *this_->node_costs[v] += *c_v; + + /* delete edge & free vec/mat */ + delete c_v; + delete c_uv; + delete_edge(this_,u,v); + } +} + +/* find all trivial nodes and disconnect them */ +static +void eliminate_trivial_nodes(pbqp *this_) +{ + int u; + + assert(this_ != NULL); + assert(this_ -> node_costs != NULL); + + for(u=0;u < this_ -> num_nodes; u++) { + if (this_->node_costs[u]->getLength() == 1) { + disconnect_trivialnode(this_,u); + } + } +} + +/***************************************************************************** + * Normal form for PBQP + ****************************************************************************/ + +/* simplify a cost matrix. If the matrix + is independent, then simplify_matrix + returns true - otherwise false. In + vectors u and v the offset values of + the decomposition are stored. +*/ + +static +bool normalize_matrix(PBQPMatrix *m, PBQPVector *u, PBQPVector *v) +{ + assert( m != NULL); + assert( u != NULL); + assert( v != NULL); + assert( u->getLength() > 0); + assert( v->getLength() > 0); + + assert(m->getRows() == u->getLength()); + assert(m->getCols() == v->getLength()); + + /* determine u vector */ + for(unsigned r = 0; r < m->getRows(); ++r) { + PBQPNum min = m->getRowMin(r); + (*u)[r] += min; + if (!isInf(min)) { + m->subFromRow(r, min); + } else { + m->setRow(r, 0); + } + } + + /* determine v vector */ + for(unsigned c = 0; c < m->getCols(); ++c) { + PBQPNum min = m->getColMin(c); + (*v)[c] += min; + if (!isInf(min)) { + m->subFromCol(c, min); + } else { + m->setCol(c, 0); + } + } + + /* determine whether matrix is + independent or not. + */ + return m->isZero(); +} + +/* simplify single edge */ +static +void simplify_edge(pbqp *this_,int u,int v) +{ + PBQPMatrix *costs; + bool is_zero; + + assert (this_ != NULL); + assert (u >= 0 && u <this_->num_nodes); + assert (v >= 0 && v <this_->num_nodes); + assert (u != v); + + /* swap u and v if u > v in order to avoid un-necessary + tranpositions of the cost matrix */ + + if (u > v) { + int swap = u; + u = v; + v = swap; + } + + /* get cost matrix and simplify it */ + costs = get_costmatrix_ptr(this_,u,v); + is_zero=normalize_matrix(costs,this_->node_costs[u],this_->node_costs[v]); + + /* delete edge */ + if(is_zero){ + delete_edge(this_,u,v); + this_->changed = true; + } +} + +/* normalize cost matrices and remove + edges in PBQP if they ary independent, + i.e. can be decomposed into two + cost vectors. +*/ +static +void eliminate_independent_edges(pbqp *this_) +{ + int u,v; + adjnode *adj_ptr,*next; + + assert(this_ != NULL); + assert(this_ -> adj_list != NULL); + + this_->changed = false; + for(u=0;u < this_->num_nodes;u++) { + for (adj_ptr = this_ -> adj_list[u]; adj_ptr != NULL; adj_ptr = next) { + next = adj_ptr -> succ; + v = adj_ptr -> adj; + assert(v >= 0 && v < this_->num_nodes); + if (u < v) { + simplify_edge(this_,u,v); + } + } + } +} + + +/***************************************************************************** + * PBQP reduction rules + ****************************************************************************/ + +/* RI reduction + This reduction rule is applied for nodes + of degree one. */ + +static +void apply_RI(pbqp *this_,int x) +{ + int y; + unsigned xlen, + ylen; + PBQPMatrix *c_yx; + PBQPVector *c_x, *delta; + + assert(this_ != NULL); + assert(x >= 0 && x < this_->num_nodes); + assert(this_ -> adj_list[x] != NULL); + assert(this_ -> adj_list[x] -> succ == NULL); + + /* get adjacence matrix */ + y = this_ -> adj_list[x] -> adj; + assert(y >= 0 && y < this_->num_nodes); + + /* determine length of cost vectors for node x and y */ + xlen = this_ -> node_costs[x]->getLength(); + ylen = this_ -> node_costs[y]->getLength(); + + /* get cost vector c_x and matrix c_yx */ + c_x = this_ -> node_costs[x]; + c_yx = pbqp_get_costmatrix(this_,y,x); + assert (c_yx != NULL); + + + /* allocate delta vector */ + delta = new PBQPVector(ylen); + + /* compute delta vector */ + for(unsigned i = 0; i < ylen; ++i) { + PBQPNum min = (*c_yx)[i][0] + (*c_x)[0]; + for(unsigned j = 1; j < xlen; ++j) { + PBQPNum c = (*c_yx)[i][j] + (*c_x)[j]; + if ( c < min ) + min = c; + } + (*delta)[i] = min; + } + + /* add delta vector */ + *this_ -> node_costs[y] += *delta; + + /* delete node x */ + remove_node(this_,x); + + /* reorder adj. nodes of node x */ + reorder_adjnodes(this_,x); + + /* push node x on stack */ + assert(this_ -> stack_ptr < this_ -> num_nodes); + this_->stack[this_ -> stack_ptr++] = x; + + /* free vec/mat */ + delete c_yx; + delete delta; + + /* increment counter for number statistic */ + this_->num_ri++; +} + +/* RII reduction + This reduction rule is applied for nodes + of degree two. */ + +static +void apply_RII(pbqp *this_,int x) +{ + int y,z; + unsigned xlen,ylen,zlen; + adjnode *adj_yz; + + PBQPMatrix *c_yx, *c_zx; + PBQPVector *cx; + PBQPMatrix *delta; + + assert(this_ != NULL); + assert(x >= 0 && x < this_->num_nodes); + assert(this_ -> adj_list[x] != NULL); + assert(this_ -> adj_list[x] -> succ != NULL); + assert(this_ -> adj_list[x] -> succ -> succ == NULL); + + /* get adjacence matrix */ + y = this_ -> adj_list[x] -> adj; + z = this_ -> adj_list[x] -> succ -> adj; + assert(y >= 0 && y < this_->num_nodes); + assert(z >= 0 && z < this_->num_nodes); + + /* determine length of cost vectors for node x and y */ + xlen = this_ -> node_costs[x]->getLength(); + ylen = this_ -> node_costs[y]->getLength(); + zlen = this_ -> node_costs[z]->getLength(); + + /* get cost vector c_x and matrix c_yx */ + cx = this_ -> node_costs[x]; + c_yx = pbqp_get_costmatrix(this_,y,x); + c_zx = pbqp_get_costmatrix(this_,z,x); + assert(c_yx != NULL); + assert(c_zx != NULL); + + /* Colour Heuristic */ + if ( (adj_yz = find_adjnode(this_,y,z)) != NULL) { + adj_yz->tc_valid = false; + adj_yz->reverse->tc_valid = false; + } + + /* allocate delta matrix */ + delta = new PBQPMatrix(ylen, zlen); + + /* compute delta matrix */ + for(unsigned i=0;i<ylen;i++) { + for(unsigned j=0;j<zlen;j++) { + PBQPNum min = (*c_yx)[i][0] + (*c_zx)[j][0] + (*cx)[0]; + for(unsigned k=1;k<xlen;k++) { + PBQPNum c = (*c_yx)[i][k] + (*c_zx)[j][k] + (*cx)[k]; + if ( c < min ) { + min = c; + } + } + (*delta)[i][j] = min; + } + } + + /* add delta matrix */ + add_pbqp_edgecosts(this_,y,z,delta); + + /* delete node x */ + remove_node(this_,x); + + /* simplify cost matrix c_yz */ + simplify_edge(this_,y,z); + + /* reorder adj. nodes */ + reorder_adjnodes(this_,x); + + /* push node x on stack */ + assert(this_ -> stack_ptr < this_ -> num_nodes); + this_->stack[this_ -> stack_ptr++] = x; + + /* free vec/mat */ + delete c_yx; + delete c_zx; + delete delta; + + /* increment counter for number statistic */ + this_->num_rii++; + +} + +/* RN reduction */ +static +void apply_RN(pbqp *this_,int x) +{ + unsigned xlen; + + assert(this_ != NULL); + assert(x >= 0 && x < this_->num_nodes); + assert(this_ -> node_costs[x] != NULL); + + xlen = this_ -> node_costs[x] -> getLength(); + + /* after application of RN rule no optimality + can be guaranteed! */ + this_ -> optimal = false; + + /* push node x on stack */ + assert(this_ -> stack_ptr < this_ -> num_nodes); + this_->stack[this_ -> stack_ptr++] = x; + + /* delete node x */ + remove_node(this_,x); + + /* reorder adj. nodes of node x */ + reorder_adjnodes(this_,x); + + /* increment counter for number statistic */ + this_->num_rn++; +} + + +static +void compute_tc_info(pbqp *this_, adjnode *p) +{ + adjnode *r; + PBQPMatrix *m; + int x,y; + PBQPVector *c_x, *c_y; + int *row_inf_counts; + + assert(p->reverse != NULL); + + /* set flags */ + r = p->reverse; + p->tc_valid = true; + r->tc_valid = true; + + /* get edge */ + x = r->adj; + y = p->adj; + + /* get cost vectors */ + c_x = this_ -> node_costs[x]; + c_y = this_ -> node_costs[y]; + + /* get cost matrix */ + m = pbqp_get_costmatrix(this_, x, y); + + + /* allocate allowed set for edge (x,y) and (y,x) */ + if (p->tc_safe_regs == NULL) { + p->tc_safe_regs = (int *) malloc(sizeof(int) * c_x->getLength()); + } + + if (r->tc_safe_regs == NULL ) { + r->tc_safe_regs = (int *) malloc(sizeof(int) * c_y->getLength()); + } + + p->tc_impact = r->tc_impact = 0; + + row_inf_counts = (int *) alloca(sizeof(int) * c_x->getLength()); + + /* init arrays */ + p->tc_safe_regs[0] = 0; + row_inf_counts[0] = 0; + for(unsigned i = 1; i < c_x->getLength(); ++i){ + p->tc_safe_regs[i] = 1; + row_inf_counts[i] = 0; + } + + r->tc_safe_regs[0] = 0; + for(unsigned j = 1; j < c_y->getLength(); ++j){ + r->tc_safe_regs[j] = 1; + } + + for(unsigned j = 0; j < c_y->getLength(); ++j) { + int col_inf_counts = 0; + for (unsigned i = 0; i < c_x->getLength(); ++i) { + if (isInf((*m)[i][j])) { + ++col_inf_counts; + ++row_inf_counts[i]; + + p->tc_safe_regs[i] = 0; + r->tc_safe_regs[j] = 0; + } + } + if (col_inf_counts > p->tc_impact) { + p->tc_impact = col_inf_counts; + } + } + + for(unsigned i = 0; i < c_x->getLength(); ++i){ + if (row_inf_counts[i] > r->tc_impact) + { + r->tc_impact = row_inf_counts[i]; + } + } + + delete m; +} + +/* + * Checks whether node x can be locally coloured. + */ +static +int is_colorable(pbqp *this_,int x) +{ + adjnode *adj_ptr; + PBQPVector *c_x; + int result = 1; + int *allowed; + int num_allowed = 0; + unsigned total_impact = 0; + + assert(this_ != NULL); + assert(x >= 0 && x < this_->num_nodes); + assert(this_ -> node_costs[x] != NULL); + + c_x = this_ -> node_costs[x]; + + /* allocate allowed set */ + allowed = (int *)malloc(sizeof(int) * c_x->getLength()); + for(unsigned i = 0; i < c_x->getLength(); ++i){ + if (!isInf((*c_x)[i]) && i > 0) { + allowed[i] = 1; + ++num_allowed; + } else { + allowed[i] = 0; + } + } + + /* determine local minimum */ + for(adj_ptr=this_->adj_list[x] ;adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + if (!adj_ptr -> tc_valid) { + compute_tc_info(this_, adj_ptr); + } + + total_impact += adj_ptr->tc_impact; + + if (num_allowed > 0) { + for (unsigned i = 1; i < c_x->getLength(); ++i){ + if (allowed[i]){ + if (!adj_ptr->tc_safe_regs[i]){ + allowed[i] = 0; + --num_allowed; + if (num_allowed == 0) + break; + } + } + } + } + + if ( total_impact >= c_x->getLength() - 1 && num_allowed == 0 ) { + result = 0; + break; + } + } + free(allowed); + + return result; +} + +/* use briggs heuristic + note: this_ is not a general heuristic. it only is useful for + interference graphs. + */ +int pop_colorablenode(pbqp *this_) +{ + int deg; + bucketnode *min_bucket=NULL; + PBQPNum min = std::numeric_limits<PBQPNum>::infinity(); + + /* select node where the number of colors is less than the node degree */ + for(deg=this_->max_deg;deg > 2;deg--) { + bucketnode *bucket; + for(bucket=this_->bucket_list[deg];bucket!= NULL;bucket = bucket -> succ) { + int u = bucket->u; + if (is_colorable(this_,u)) { + pbqp_remove_bucket(this_,bucket); + this_->num_rn_special++; + free(bucket); + return u; + } + } + } + + /* select node with minimal ratio between average node costs and degree of node */ + for(deg=this_->max_deg;deg >2; deg--) { + bucketnode *bucket; + for(bucket=this_->bucket_list[deg];bucket!= NULL;bucket = bucket -> succ) { + PBQPNum h; + int u; + + u = bucket->u; + assert(u>=0 && u < this_->num_nodes); + h = (*this_->node_costs[u])[0] / (PBQPNum) deg; + if (h < min) { + min_bucket = bucket; + min = h; + } + } + } + + /* return node and free bucket */ + if (min_bucket != NULL) { + int u; + + pbqp_remove_bucket(this_,min_bucket); + u = min_bucket->u; + free(min_bucket); + return u; + } else { + return -1; + } +} + + +/***************************************************************************** + * PBQP graph parsing + ****************************************************************************/ + +/* reduce pbqp problem (first phase) */ +static +void reduce_pbqp(pbqp *this_) +{ + int u; + + assert(this_ != NULL); + assert(this_->bucket_list != NULL); + + for(;;){ + + if (this_->bucket_list[1] != NULL) { + u = pop_node(this_,1); + apply_RI(this_,u); + } else if (this_->bucket_list[2] != NULL) { + u = pop_node(this_,2); + apply_RII(this_,u); + } else if ((u = pop_colorablenode(this_)) != -1) { + apply_RN(this_,u); + } else { + break; + } + } +} + +/***************************************************************************** + * PBQP back propagation + ****************************************************************************/ + +/* determine solution of a reduced node. Either + RI or RII was applied for this_ node. */ +static +void determine_solution(pbqp *this_,int x) +{ + PBQPVector *v = new PBQPVector(*this_ -> node_costs[x]); + adjnode *adj_ptr; + + assert(this_ != NULL); + assert(x >= 0 && x < this_->num_nodes); + assert(this_ -> adj_list != NULL); + assert(this_ -> solution != NULL); + + for(adj_ptr=this_->adj_list[x] ;adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + int y = adj_ptr -> adj; + int y_sol = this_ -> solution[y]; + + PBQPMatrix *c_yx = pbqp_get_costmatrix(this_,y,x); + assert(y_sol >= 0 && y_sol < (int)this_->node_costs[y]->getLength()); + (*v) += c_yx->getRowAsVector(y_sol); + delete c_yx; + } + this_ -> solution[x] = v->minIndex(); + + delete v; +} + +/* back popagation phase of PBQP */ +static +void back_propagate(pbqp *this_) +{ + int i; + + assert(this_ != NULL); + assert(this_->stack != NULL); + assert(this_->stack_ptr < this_->num_nodes); + + for(i=this_ -> stack_ptr-1;i>=0;i--) { + int x = this_ -> stack[i]; + assert( x >= 0 && x < this_ -> num_nodes); + reinsert_node(this_,x); + determine_solution(this_,x); + } +} + +/* solve trivial nodes of degree zero */ +static +void determine_trivialsolution(pbqp *this_) +{ + int u; + PBQPNum delta; + + assert( this_ != NULL); + assert( this_ -> bucket_list != NULL); + + /* determine trivial solution */ + while (this_->bucket_list[0] != NULL) { + u = pop_node(this_,0); + + assert( u >= 0 && u < this_ -> num_nodes); + + this_->solution[u] = this_->node_costs[u]->minIndex(); + delta = (*this_->node_costs[u])[this_->solution[u]]; + this_->min = this_->min + delta; + + /* increment counter for number statistic */ + this_->num_r0++; + } +} + +/***************************************************************************** + * debug facilities + ****************************************************************************/ +static +void check_pbqp(pbqp *this_) +{ + int u,v; + PBQPMatrix *costs; + adjnode *adj_ptr; + + assert( this_ != NULL); + + for(u=0;u< this_->num_nodes; u++) { + assert (this_ -> node_costs[u] != NULL); + for(adj_ptr = this_ -> adj_list[u];adj_ptr != NULL; adj_ptr = adj_ptr -> succ) { + v = adj_ptr -> adj; + assert( v>= 0 && v < this_->num_nodes); + if (u < v ) { + costs = adj_ptr -> costs; + assert( costs->getRows() == this_->node_costs[u]->getLength() && + costs->getCols() == this_->node_costs[v]->getLength()); + } + } + } +} + +/***************************************************************************** + * PBQP solve routines + ****************************************************************************/ + +/* solve PBQP problem */ +void solve_pbqp(pbqp *this_) +{ + assert(this_ != NULL); + assert(!this_->solved); + + /* check vector & matrix dimensions */ + check_pbqp(this_); + + /* simplify PBQP problem */ + + /* eliminate trivial nodes, i.e. + nodes with cost vectors of length one. */ + eliminate_trivial_nodes(this_); + + /* eliminate edges with independent + cost matrices and normalize matrices */ + eliminate_independent_edges(this_); + + /* create bucket list for graph parsing */ + create_bucketlist(this_); + + /* reduce phase */ + reduce_pbqp(this_); + + /* solve trivial nodes */ + determine_trivialsolution(this_); + + /* back propagation phase */ + back_propagate(this_); + + this_->solved = true; +} + +/* get solution of a node */ +int get_pbqp_solution(pbqp *this_,int x) +{ + assert(this_ != NULL); + assert(this_->solution != NULL); + assert(this_ -> solved); + + return this_->solution[x]; +} + +/* is solution optimal? */ +bool is_pbqp_optimal(pbqp *this_) +{ + assert(this_ -> solved); + return this_->optimal; +} + +} + +/* end of pbqp.c */ diff --git a/lib/CodeGen/PBQP.h b/lib/CodeGen/PBQP.h new file mode 100644 index 0000000..5fd2c06 --- /dev/null +++ b/lib/CodeGen/PBQP.h @@ -0,0 +1,284 @@ +//===---------------- PBQP.cpp --------- PBQP Solver ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Developed by: Bernhard Scholz +// The University of Sydney +// http://www.it.usyd.edu.au/~scholz +//===----------------------------------------------------------------------===// + +// TODO: +// +// * Default to null costs on vector initialisation? +// * C++-ify the rest of the solver. + +#ifndef LLVM_CODEGEN_PBQPSOLVER_H +#define LLVM_CODEGEN_PBQPSOLVER_H + +#include <cassert> +#include <algorithm> +#include <functional> + +namespace llvm { + +//! \brief Floating point type to use in PBQP solver. +typedef double PBQPNum; + +//! \brief PBQP Vector class. +class PBQPVector { +public: + + //! \brief Construct a PBQP vector of the given size. + explicit PBQPVector(unsigned length) : + length(length), data(new PBQPNum[length]) { + std::fill(data, data + length, 0); + } + + //! \brief Copy construct a PBQP vector. + PBQPVector(const PBQPVector &v) : + length(v.length), data(new PBQPNum[length]) { + std::copy(v.data, v.data + length, data); + } + + ~PBQPVector() { delete[] data; } + + //! \brief Assignment operator. + PBQPVector& operator=(const PBQPVector &v) { + delete[] data; + length = v.length; + data = new PBQPNum[length]; + std::copy(v.data, v.data + length, data); + return *this; + } + + //! \brief Return the length of the vector + unsigned getLength() const throw () { + return length; + } + + //! \brief Element access. + PBQPNum& operator[](unsigned index) { + assert(index < length && "PBQPVector element access out of bounds."); + return data[index]; + } + + //! \brief Const element access. + const PBQPNum& operator[](unsigned index) const { + assert(index < length && "PBQPVector element access out of bounds."); + return data[index]; + } + + //! \brief Add another vector to this one. + PBQPVector& operator+=(const PBQPVector &v) { + assert(length == v.length && "PBQPVector length mismatch."); + std::transform(data, data + length, v.data, data, std::plus<PBQPNum>()); + return *this; + } + + //! \brief Subtract another vector from this one. + PBQPVector& operator-=(const PBQPVector &v) { + assert(length == v.length && "PBQPVector length mismatch."); + std::transform(data, data + length, v.data, data, std::minus<PBQPNum>()); + return *this; + } + + //! \brief Returns the index of the minimum value in this vector + unsigned minIndex() const { + return std::min_element(data, data + length) - data; + } + +private: + unsigned length; + PBQPNum *data; +}; + + +//! \brief PBQP Matrix class +class PBQPMatrix { +public: + + //! \brief Construct a PBQP Matrix with the given dimensions. + PBQPMatrix(unsigned rows, unsigned cols) : + rows(rows), cols(cols), data(new PBQPNum[rows * cols]) { + std::fill(data, data + (rows * cols), 0); + } + + //! \brief Copy construct a PBQP matrix. + PBQPMatrix(const PBQPMatrix &m) : + rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) { + std::copy(m.data, m.data + (rows * cols), data); + } + + ~PBQPMatrix() { delete[] data; } + + //! \brief Assignment operator. + PBQPMatrix& operator=(const PBQPMatrix &m) { + delete[] data; + rows = m.rows; cols = m.cols; + data = new PBQPNum[rows * cols]; + std::copy(m.data, m.data + (rows * cols), data); + return *this; + } + + //! \brief Return the number of rows in this matrix. + unsigned getRows() const throw () { return rows; } + + //! \brief Return the number of cols in this matrix. + unsigned getCols() const throw () { return cols; } + + //! \brief Matrix element access. + PBQPNum* operator[](unsigned r) { + assert(r < rows && "Row out of bounds."); + return data + (r * cols); + } + + //! \brief Matrix element access. + const PBQPNum* operator[](unsigned r) const { + assert(r < rows && "Row out of bounds."); + return data + (r * cols); + } + + //! \brief Returns the given row as a vector. + PBQPVector getRowAsVector(unsigned r) const { + PBQPVector v(cols); + for (unsigned c = 0; c < cols; ++c) + v[c] = (*this)[r][c]; + return v; + } + + //! \brief Reset the matrix to the given value. + PBQPMatrix& reset(PBQPNum val = 0) { + std::fill(data, data + (rows * cols), val); + return *this; + } + + //! \brief Set a single row of this matrix to the given value. + PBQPMatrix& setRow(unsigned r, PBQPNum val) { + assert(r < rows && "Row out of bounds."); + std::fill(data + (r * cols), data + ((r + 1) * cols), val); + return *this; + } + + //! \brief Set a single column of this matrix to the given value. + PBQPMatrix& setCol(unsigned c, PBQPNum val) { + assert(c < cols && "Column out of bounds."); + for (unsigned r = 0; r < rows; ++r) + (*this)[r][c] = val; + return *this; + } + + //! \brief Matrix transpose. + PBQPMatrix transpose() const { + PBQPMatrix m(cols, rows); + for (unsigned r = 0; r < rows; ++r) + for (unsigned c = 0; c < cols; ++c) + m[c][r] = (*this)[r][c]; + return m; + } + + //! \brief Returns the diagonal of the matrix as a vector. + //! + //! Matrix must be square. + PBQPVector diagonalize() const { + assert(rows == cols && "Attempt to diagonalize non-square matrix."); + + PBQPVector v(rows); + for (unsigned r = 0; r < rows; ++r) + v[r] = (*this)[r][r]; + return v; + } + + //! \brief Add the given matrix to this one. + PBQPMatrix& operator+=(const PBQPMatrix &m) { + assert(rows == m.rows && cols == m.cols && + "Matrix dimensions mismatch."); + std::transform(data, data + (rows * cols), m.data, data, + std::plus<PBQPNum>()); + return *this; + } + + //! \brief Returns the minimum of the given row + PBQPNum getRowMin(unsigned r) const { + assert(r < rows && "Row out of bounds"); + return *std::min_element(data + (r * cols), data + ((r + 1) * cols)); + } + + //! \brief Returns the minimum of the given column + PBQPNum getColMin(unsigned c) const { + PBQPNum minElem = (*this)[0][c]; + for (unsigned r = 1; r < rows; ++r) + if ((*this)[r][c] < minElem) minElem = (*this)[r][c]; + return minElem; + } + + //! \brief Subtracts the given scalar from the elements of the given row. + PBQPMatrix& subFromRow(unsigned r, PBQPNum val) { + assert(r < rows && "Row out of bounds"); + std::transform(data + (r * cols), data + ((r + 1) * cols), + data + (r * cols), + std::bind2nd(std::minus<PBQPNum>(), val)); + return *this; + } + + //! \brief Subtracts the given scalar from the elements of the given column. + PBQPMatrix& subFromCol(unsigned c, PBQPNum val) { + for (unsigned r = 0; r < rows; ++r) + (*this)[r][c] -= val; + return *this; + } + + //! \brief Returns true if this is a zero matrix. + bool isZero() const { + return find_if(data, data + (rows * cols), + std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) == + data + (rows * cols); + } + +private: + unsigned rows, cols; + PBQPNum *data; +}; + +#define EPS (1E-8) + +#ifndef PBQP_TYPE +#define PBQP_TYPE +struct pbqp; +typedef struct pbqp pbqp; +#endif + +/***************** + * PBQP routines * + *****************/ + +/* allocate pbqp problem */ +pbqp *alloc_pbqp(int num); + +/* add node costs */ +void add_pbqp_nodecosts(pbqp *this_,int u, PBQPVector *costs); + +/* add edge mat */ +void add_pbqp_edgecosts(pbqp *this_,int u,int v,PBQPMatrix *costs); + +/* solve PBQP problem */ +void solve_pbqp(pbqp *this_); + +/* get solution of a node */ +int get_pbqp_solution(pbqp *this_,int u); + +/* alloc PBQP */ +pbqp *alloc_pbqp(int num); + +/* free PBQP */ +void free_pbqp(pbqp *this_); + +/* is optimal */ +bool is_pbqp_optimal(pbqp *this_); + +} +#endif diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp new file mode 100644 index 0000000..c5c76fc --- /dev/null +++ b/lib/CodeGen/PHIElimination.cpp @@ -0,0 +1,431 @@ +//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates machine instruction PHI nodes by inserting copy +// instructions. This destroys SSA information, but is the desired input for +// some register allocators. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "phielim" +#include "llvm/BasicBlock.h" +#include "llvm/Instructions.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include <algorithm> +#include <map> +using namespace llvm; + +STATISTIC(NumAtomic, "Number of atomic phis lowered"); + +namespace { + class VISIBILITY_HIDDEN PNE : public MachineFunctionPass { + MachineRegisterInfo *MRI; // Machine register information + + public: + static char ID; // Pass identification, replacement for typeid + PNE() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<LiveVariables>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions + /// in predecessor basic blocks. + /// + bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); + void LowerAtomicPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt); + + /// analyzePHINodes - Gather information about the PHI nodes in + /// here. In particular, we want to map the number of uses of a virtual + /// register which is used in a PHI node. We map that to the BB the + /// vreg is coming from. This is used later to determine when the vreg + /// is killed in the BB. + /// + void analyzePHINodes(const MachineFunction& Fn); + + // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from + // SrcReg. This needs to be after any def or uses of SrcReg, but before + // any subsequent point where control flow might jump out of the basic + // block. + MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB, + unsigned SrcReg); + + // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and + // also after any exception handling labels: in landing pads execution + // starts at the label, so any copies placed before it won't be executed! + MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + // Rather than assuming that EH labels come before other kinds of labels, + // just skip all labels. + while (I != MBB.end() && + (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel())) + ++I; + return I; + } + + typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair; + typedef std::map<BBVRegPair, unsigned> VRegPHIUse; + + VRegPHIUse VRegPHIUseCount; + + // Defs of PHI sources which are implicit_def. + SmallPtrSet<MachineInstr*, 4> ImpDefs; + }; +} + +char PNE::ID = 0; +static RegisterPass<PNE> +X("phi-node-elimination", "Eliminate PHI nodes for register allocation"); + +const PassInfo *const llvm::PHIEliminationID = &X; + +bool PNE::runOnMachineFunction(MachineFunction &Fn) { + MRI = &Fn.getRegInfo(); + + analyzePHINodes(Fn); + + bool Changed = false; + + // Eliminate PHI instructions by inserting copies into predecessor blocks. + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + Changed |= EliminatePHINodes(Fn, *I); + + // Remove dead IMPLICIT_DEF instructions. + for (SmallPtrSet<MachineInstr*,4>::iterator I = ImpDefs.begin(), + E = ImpDefs.end(); I != E; ++I) { + MachineInstr *DefMI = *I; + unsigned DefReg = DefMI->getOperand(0).getReg(); + if (MRI->use_empty(DefReg)) + DefMI->eraseFromParent(); + } + + ImpDefs.clear(); + VRegPHIUseCount.clear(); + return Changed; +} + + +/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in +/// predecessor basic blocks. +/// +bool PNE::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) { + if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI) + return false; // Quick exit for basic blocks without PHIs. + + // Get an iterator to the first instruction after the last PHI node (this may + // also be the end of the basic block). + MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin()); + + while (MBB.front().getOpcode() == TargetInstrInfo::PHI) + LowerAtomicPHINode(MBB, AfterPHIsIt); + + return true; +} + +/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node +/// are implicit_def's. +static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, + const MachineRegisterInfo *MRI) { + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { + unsigned SrcReg = MPhi->getOperand(i).getReg(); + const MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (!DefMI || DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) + return false; + } + return true; +} + +// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg. +// This needs to be after any def or uses of SrcReg, but before any subsequent +// point where control flow might jump out of the basic block. +MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB, + unsigned SrcReg) { + // Handle the trivial case trivially. + if (MBB.empty()) + return MBB.begin(); + + // If this basic block does not contain an invoke, then control flow always + // reaches the end of it, so place the copy there. The logic below works in + // this case too, but is more expensive. + if (!isa<InvokeInst>(MBB.getBasicBlock()->getTerminator())) + return MBB.getFirstTerminator(); + + // Discover any definition/uses in this basic block. + SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), + RE = MRI->reg_end(); RI != RE; ++RI) { + MachineInstr *DefUseMI = &*RI; + if (DefUseMI->getParent() == &MBB) + DefUsesInMBB.insert(DefUseMI); + } + + MachineBasicBlock::iterator InsertPoint; + if (DefUsesInMBB.empty()) { + // No def/uses. Insert the copy at the start of the basic block. + InsertPoint = MBB.begin(); + } else if (DefUsesInMBB.size() == 1) { + // Insert the copy immediately after the definition/use. + InsertPoint = *DefUsesInMBB.begin(); + ++InsertPoint; + } else { + // Insert the copy immediately after the last definition/use. + InsertPoint = MBB.end(); + while (!DefUsesInMBB.count(&*--InsertPoint)) {} + ++InsertPoint; + } + + // Make sure the copy goes after any phi nodes however. + return SkipPHIsAndLabels(MBB, InsertPoint); +} + +/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, +/// under the assuption that it needs to be lowered in a way that supports +/// atomic execution of PHIs. This lowering method is always correct all of the +/// time. +/// +void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt) { + // Unlink the PHI node from the basic block, but don't delete the PHI yet. + MachineInstr *MPhi = MBB.remove(MBB.begin()); + + unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; + unsigned DestReg = MPhi->getOperand(0).getReg(); + bool isDead = MPhi->getOperand(0).isDead(); + + // Create a new register for the incoming PHI arguments. + MachineFunction &MF = *MBB.getParent(); + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); + unsigned IncomingReg = 0; + + // Insert a register to register copy at the top of the current block (but + // after any remaining phi nodes) which copies the new incoming register + // into the phi node destination. + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + if (isSourceDefinedByImplicitDef(MPhi, MRI)) + // If all sources of a PHI node are implicit_def, just emit an + // implicit_def instead of a copy. + BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + TII->get(TargetInstrInfo::IMPLICIT_DEF), DestReg); + else { + IncomingReg = MF.getRegInfo().createVirtualRegister(RC); + TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC); + } + + // Update live variable information if there is any. + LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>(); + if (LV) { + MachineInstr *PHICopy = prior(AfterPHIsIt); + + if (IncomingReg) { + // Increment use count of the newly created virtual register. + LV->getVarInfo(IncomingReg).NumUses++; + + // Add information to LiveVariables to know that the incoming value is + // killed. Note that because the value is defined in several places (once + // each for each incoming block), the "def" block and instruction fields + // for the VarInfo is not filled in. + LV->addVirtualRegisterKilled(IncomingReg, PHICopy); + } + + // Since we are going to be deleting the PHI node, if it is the last use of + // any registers, or if the value itself is dead, we need to move this + // information over to the new copy we just inserted. + LV->removeVirtualRegistersKilled(MPhi); + + // If the result is dead, update LV. + if (isDead) { + LV->addVirtualRegisterDead(DestReg, PHICopy); + LV->removeVirtualRegisterDead(DestReg, MPhi); + } + } + + // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) + --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i + 1).getMBB(), + MPhi->getOperand(i).getReg())]; + + // Now loop over all of the incoming arguments, changing them to copy into the + // IncomingReg register in the corresponding predecessor basic block. + SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; + for (int i = NumSrcs - 1; i >= 0; --i) { + unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + "Machine PHI Operands must all be virtual registers!"); + + // If source is defined by an implicit def, there is no need to insert a + // copy. + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + ImpDefs.insert(DefMI); + continue; + } + + // Get the MachineBasicBlock equivalent of the BasicBlock that is the source + // path the PHI. + MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); + + // Check to make sure we haven't already emitted the copy for this block. + // This can happen because PHI nodes may have multiple entries for the same + // basic block. + if (!MBBsInsertedInto.insert(&opBlock)) + continue; // If the copy has already been emitted, we're done. + + // Find a safe location to insert the copy, this may be the first terminator + // in the block (or end()). + MachineBasicBlock::iterator InsertPos = FindCopyInsertPoint(opBlock, SrcReg); + + // Insert the copy. + TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC); + + // Now update live variable information if we have it. Otherwise we're done + if (!LV) continue; + + // We want to be able to insert a kill of the register if this PHI (aka, the + // copy we just inserted) is the last use of the source value. Live + // variable analysis conservatively handles this by saying that the value is + // live until the end of the block the PHI entry lives in. If the value + // really is dead at the PHI copy, there will be no successor blocks which + // have the value live-in. + // + // Check to see if the copy is the last use, and if so, update the live + // variables information so that it knows the copy source instruction kills + // the incoming value. + LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg); + + // Loop over all of the successors of the basic block, checking to see if + // the value is either live in the block, or if it is killed in the block. + // Also check to see if this register is in use by another PHI node which + // has not yet been eliminated. If so, it will be killed at an appropriate + // point later. + + // Is it used by any PHI instructions in this block? + bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0; + + std::vector<MachineBasicBlock*> OpSuccBlocks; + + // Otherwise, scan successors, including the BB the PHI node lives in. + for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), + E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + + // Is it alive in this successor? + unsigned SuccIdx = SuccMBB->getNumber(); + if (InRegVI.AliveBlocks.test(SuccIdx)) { + ValueIsLive = true; + break; + } + + OpSuccBlocks.push_back(SuccMBB); + } + + // Check to see if this value is live because there is a use in a successor + // that kills it. + if (!ValueIsLive) { + switch (OpSuccBlocks.size()) { + case 1: { + MachineBasicBlock *MBB = OpSuccBlocks[0]; + for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) + if (InRegVI.Kills[i]->getParent() == MBB) { + ValueIsLive = true; + break; + } + break; + } + case 2: { + MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1]; + for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) + if (InRegVI.Kills[i]->getParent() == MBB1 || + InRegVI.Kills[i]->getParent() == MBB2) { + ValueIsLive = true; + break; + } + break; + } + default: + std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); + for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) + if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), + InRegVI.Kills[i]->getParent())) { + ValueIsLive = true; + break; + } + } + } + + // Okay, if we now know that the value is not live out of the block, we can + // add a kill marker in this block saying that it kills the incoming value! + if (!ValueIsLive) { + // In our final twist, we have to decide which instruction kills the + // register. In most cases this is the copy, however, the first + // terminator instruction at the end of the block may also use the value. + // In this case, we should mark *it* as being the killing block, not the + // copy. + MachineBasicBlock::iterator KillInst = prior(InsertPos); + MachineBasicBlock::iterator Term = opBlock.getFirstTerminator(); + if (Term != opBlock.end()) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + + // Check that no other terminators use values. +#ifndef NDEBUG + for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end(); + ++TI) { + assert(!TI->readsRegister(SrcReg) && + "Terminator instructions cannot use virtual registers unless" + "they are the first terminator in a block!"); + } +#endif + } + + // Finally, mark it killed. + LV->addVirtualRegisterKilled(SrcReg, KillInst); + + // This vreg no longer lives all of the way through opBlock. + unsigned opBlockNum = opBlock.getNumber(); + InRegVI.AliveBlocks.reset(opBlockNum); + } + } + + // Really delete the PHI instruction now! + MF.DeleteMachineInstr(MPhi); + ++NumAtomic; +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the number of uses of a virtual register which is +/// used in a PHI node. We map that to the BB the vreg is coming from. This is +/// used later to determine when the vreg is killed in the BB. +/// +void PNE::analyzePHINodes(const MachineFunction& Fn) { + for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(), + BBI->getOperand(i).getReg())]; +} diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp new file mode 100644 index 0000000..f67eb79 --- /dev/null +++ b/lib/CodeGen/Passes.cpp @@ -0,0 +1,54 @@ +//===-- Passes.cpp - Target independent code generation passes ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines interfaces to access the target independent code +// generation passes provided by the LLVM backend. +// +//===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +//===---------------------------------------------------------------------===// +/// +/// RegisterRegAlloc class - Track the registration of register allocators. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry RegisterRegAlloc::Registry; + + +//===---------------------------------------------------------------------===// +/// +/// RegAlloc command line options. +/// +//===---------------------------------------------------------------------===// +static cl::opt<RegisterRegAlloc::FunctionPassCtor, false, + RegisterPassParser<RegisterRegAlloc> > +RegAlloc("regalloc", + cl::init(&createLinearScanRegisterAllocator), + cl::desc("Register allocator to use: (default = linearscan)")); + + +//===---------------------------------------------------------------------===// +/// +/// createRegisterAllocator - choose the appropriate register allocator. +/// +//===---------------------------------------------------------------------===// +FunctionPass *llvm::createRegisterAllocator() { + RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); + + if (!Ctor) { + Ctor = RegAlloc; + RegisterRegAlloc::setDefault(RegAlloc); + } + + return Ctor(); +} diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp new file mode 100644 index 0000000..de774685 --- /dev/null +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -0,0 +1,941 @@ +//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "ScheduleDAGInstrs.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include <map> +using namespace llvm; + +STATISTIC(NumNoops, "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); + +static cl::opt<bool> +EnableAntiDepBreaking("break-anti-dependencies", + cl::desc("Break post-RA scheduling anti-dependencies"), + cl::init(true), cl::Hidden); + +static cl::opt<bool> +EnablePostRAHazardAvoidance("avoid-hazards", + cl::desc("Enable simple hazard-avoidance"), + cl::init(true), cl::Hidden); + +namespace { + class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { + public: + static char ID; + PostRAScheduler() : MachineFunctionPass(&ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "Post RA top-down list latency scheduler"; + } + + bool runOnMachineFunction(MachineFunction &Fn); + }; + char PostRAScheduler::ID = 0; + + class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + LatencyPriorityQueue AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// Topo - A topological ordering for SUnits. + ScheduleDAGTopologicalSort Topo; + + /// AllocatableSet - The set of allocatable registers. + /// We'll be ignoring anti-dependencies on non-allocatable registers, + /// because they may not be safe to break. + const BitVector AllocatableSet; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// Classes - For live regs that are only used in one register class in a + /// live range, the register class. If the register is not live, the + /// corresponding value is null. If the register is live but used in + /// multiple register classes, the corresponding value is -1 casted to a + /// pointer. + const TargetRegisterClass * + Classes[TargetRegisterInfo::FirstVirtualRegister]; + + /// RegRegs - Map registers to all their references within a live range. + std::multimap<unsigned, MachineOperand *> RegRefs; + + /// The index of the most recent kill (proceding bottom-up), or ~0u if + /// the register is not live. + unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; + + /// The index of the most recent complete def (proceding bottom up), or ~0u + /// if the register is live. + unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; + + public: + SchedulePostRATDList(MachineFunction &MF, + const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT, + ScheduleHazardRecognizer *HR) + : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), + AllocatableSet(TRI->getAllocatableSet(MF)), + HazardRec(HR) {} + + ~SchedulePostRATDList() { + delete HazardRec; + } + + /// StartBlock - Initialize register live-range state for scheduling in + /// this block. + /// + void StartBlock(MachineBasicBlock *BB); + + /// Schedule - Schedule the instruction range using list scheduling. + /// + void Schedule(); + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + void Observe(MachineInstr *MI, unsigned Count); + + /// FinishBlock - Clean up register live-range state. + /// + void FinishBlock(); + + private: + void PrescanInstruction(MachineInstr *MI); + void ScanInstruction(MachineInstr *MI, unsigned Count); + void ReleaseSucc(SUnit *SU, SDep *SuccEdge); + void ReleaseSuccessors(SUnit *SU); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); + bool BreakAntiDependencies(); + }; + + /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses + /// a coarse classification and attempts to avoid that instructions of + /// a given class aren't grouped too densely together. + class SimpleHazardRecognizer : public ScheduleHazardRecognizer { + /// Class - A simple classification for SUnits. + enum Class { + Other, Load, Store + }; + + /// Window - The Class values of the most recently issued + /// instructions. + Class Window[8]; + + /// getClass - Classify the given SUnit. + Class getClass(const SUnit *SU) { + const MachineInstr *MI = SU->getInstr(); + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.mayLoad()) + return Load; + if (TID.mayStore()) + return Store; + return Other; + } + + /// Step - Rotate the existing entries in Window and insert the + /// given class value in position as the most recent. + void Step(Class C) { + std::copy(Window+1, array_endof(Window), Window); + Window[array_lengthof(Window)-1] = C; + } + + public: + SimpleHazardRecognizer() : Window() {} + + virtual HazardType getHazardType(SUnit *SU) { + Class C = getClass(SU); + if (C == Other) + return NoHazard; + unsigned Score = 0; + for (unsigned i = 0; i != array_lengthof(Window); ++i) + if (Window[i] == C) + Score += i + 1; + if (Score > array_lengthof(Window) * 2) + return Hazard; + return NoHazard; + } + + virtual void EmitInstruction(SUnit *SU) { + Step(getClass(SU)); + } + + virtual void AdvanceCycle() { + Step(Other); + } + }; +} + +/// isSchedulingBoundary - Test if the given instruction should be +/// considered a scheduling boundary. This primarily includes labels +/// and terminators. +/// +static bool isSchedulingBoundary(const MachineInstr *MI, + const MachineFunction &MF) { + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel()) + return true; + + // Don't attempt to schedule around any instruction that modifies + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); + if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore())) + return true; + + return false; +} + +bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "PostRAScheduler\n"; + + const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? + new SimpleHazardRecognizer : + new ScheduleHazardRecognizer(); + + SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); + + // Loop over all of the basic blocks + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + // Initialize register live-range state for scheduling in this block. + Scheduler.StartBlock(MBB); + + // Schedule each sequence of instructions not interrupted by a label + // or anything else that effectively needs to shut down scheduling. + MachineBasicBlock::iterator Current = MBB->end(); + unsigned Count = MBB->size(), CurrentCount = Count; + for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { + MachineInstr *MI = prior(I); + if (isSchedulingBoundary(MI, Fn)) { + Scheduler.Run(MBB, I, Current, CurrentCount); + Scheduler.EmitSchedule(); + Current = MI; + CurrentCount = Count - 1; + Scheduler.Observe(MI, CurrentCount); + } + I = MI; + --Count; + } + assert(Count == 0 && "Instruction count mismatch!"); + assert((MBB->begin() == Current || CurrentCount != 0) && + "Instruction count mismatch!"); + Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.EmitSchedule(); + + // Clean up register live-range state. + Scheduler.FinishBlock(); + } + + return true; +} + +/// StartBlock - Initialize register live-range state for scheduling in +/// this block. +/// +void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { + // Call the superclass. + ScheduleDAGInstrs::StartBlock(BB); + + // Clear out the register class data. + std::fill(Classes, array_endof(Classes), + static_cast<const TargetRegisterClass *>(0)); + + // Initialize the indices to indicate that no registers are live. + std::fill(KillIndices, array_endof(KillIndices), ~0u); + std::fill(DefIndices, array_endof(DefIndices), BB->size()); + + // Determine the live-out physregs for this block. + if (!BB->empty() && BB->back().getDesc().isReturn()) + // In a return block, examine the function live-out regs. + for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), + E = MRI.liveout_end(); I != E; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } + else + // In a non-return block, examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } + + // Consider callee-saved registers as live-out, since we're running after + // prologue/epilogue insertion so there's no way to add additional + // saved registers. + // + // TODO: If the callee saves and restores these, then we can potentially + // use them between the save and the restore. To do that, we could scan + // the exit blocks to see which of these registers are defined. + // Alternatively, callee-saved registers that aren't saved and restored + // could be marked live-in in every block. + for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } +} + +/// Schedule - Schedule the instruction range using list scheduling. +/// +void SchedulePostRATDList::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + // Build the scheduling graph. + BuildSchedGraph(); + + if (EnableAntiDepBreaking) { + if (BreakAntiDependencies()) { + // We made changes. Update the dependency graph. + // Theoretically we could update the graph in place: + // When a live range is changed to use a different register, remove + // the def's anti-dependence *and* output-dependence edges due to + // that register, and add new anti-dependence and output-dependence + // edges based on the next live range of the register. + SUnits.clear(); + EntrySU = SUnit(); + ExitSU = SUnit(); + BuildSchedGraph(); + } + } + + AvailableQueue.initNodes(SUnits); + + ListScheduleTopDown(); + + AvailableQueue.releaseState(); +} + +/// Observe - Update liveness information to account for the current +/// instruction, which will not be scheduled. +/// +void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { + assert(Count < InsertPosIndex && "Instruction index out of expected range!"); + + // Any register which was defined within the previous scheduling region + // may have been rescheduled and its lifetime may overlap with registers + // in ways not reflected in our current liveness state. For each such + // register, adjust the liveness state to be conservatively correct. + for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) + if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { + assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); + // Mark this register to be non-renamable. + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + // Move the def index to the end of the previous region, to reflect + // that the def could theoretically have been scheduled at the end. + DefIndices[Reg] = InsertPosIndex; + } + + PrescanInstruction(MI); + ScanInstruction(MI, Count); +} + +/// FinishBlock - Clean up register live-range state. +/// +void SchedulePostRATDList::FinishBlock() { + RegRefs.clear(); + + // Call the superclass. + ScheduleDAGInstrs::FinishBlock(); +} + +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static SDep *CriticalPathStep(SUnit *SU) { + SDep *Next = 0; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + return Next; +} + +void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) { + // Scan the register operands for this instruction and update + // Classes and RegRefs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + const TargetRegisterClass *NewRC = + getInstrOperandRegClass(TRI, MI->getDesc(), i); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + // Now check for aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + // If an alias of the reg is used during the live range, give up. + // Note that this allows us to skip checking if AntiDepReg + // overlaps with any of the aliases, among other things. + unsigned AliasReg = *Alias; + if (Classes[AliasReg]) { + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + } + } + + // If we're still willing to consider this register, note the reference. + if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) + RegRefs.insert(std::make_pair(Reg, &MO)); + } +} + +void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, + unsigned Count) { + // Update liveness. + // Proceding upwards, registers that are defed but not used in this + // instruction are now dead. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + DefIndices[Reg] = Count; + KillIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + Classes[Reg] = 0; + RegRefs.erase(Reg); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + unsigned SubregReg = *Subreg; + DefIndices[SubregReg] = Count; + KillIndices[SubregReg] = ~0u; + Classes[SubregReg] = 0; + RegRefs.erase(SubregReg); + } + // Conservatively mark super-registers as unusable. + for (const unsigned *Super = TRI->getSuperRegisters(Reg); + *Super; ++Super) { + unsigned SuperReg = *Super; + Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); + } + } + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isUse()) continue; + + const TargetRegisterClass *NewRC = + getInstrOperandRegClass(TRI, MI->getDesc(), i); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + RegRefs.insert(std::make_pair(Reg, &MO)); + + // It wasn't previously live but now it is, this is a kill. + if (KillIndices[Reg] == ~0u) { + KillIndices[Reg] = Count; + DefIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + } + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (KillIndices[AliasReg] == ~0u) { + KillIndices[AliasReg] = Count; + DefIndices[AliasReg] = ~0u; + } + } + } +} + +/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path +/// of the ScheduleDAG and break them by renaming registers. +/// +bool SchedulePostRATDList::BreakAntiDependencies() { + // The code below assumes that there is at least one instruction, + // so just duck out immediately if the block is empty. + if (SUnits.empty()) return false; + + // Find the node at the bottom of the critical path. + SUnit *Max = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + SUnit *SU = &SUnits[i]; + if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) + Max = SU; + } + + DOUT << "Critical path has total latency " + << (Max->getDepth() + Max->Latency) << "\n"; + + // Track progress along the critical path through the SUnit graph as we walk + // the instructions. + SUnit *CriticalPathSU = Max; + MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); + + // Consider this pattern: + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // There are three anti-dependencies here, and without special care, + // we'd break all of them using the same register: + // A = ... + // ... = A + // B = ... + // ... = B + // B = ... + // ... = B + // B = ... + // ... = B + // because at each anti-dependence, B is the first register that + // isn't A which is free. This re-introduces anti-dependencies + // at all but one of the original anti-dependencies that we were + // trying to break. To avoid this, keep track of the most recent + // register that each register was replaced with, avoid avoid + // using it to repair an anti-dependence on the same register. + // This lets us produce this: + // A = ... + // ... = A + // B = ... + // ... = B + // C = ... + // ... = C + // B = ... + // ... = B + // This still has an anti-dependence on B, but at least it isn't on the + // original critical path. + // + // TODO: If we tracked more than one register here, we could potentially + // fix that remaining critical edge too. This is a little more involved, + // because unlike the most recent register, less recent registers should + // still be considered, though only if no other registers are available. + unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; + + // Attempt to break anti-dependence edges on the critical path. Walk the + // instructions from the bottom up, tracking information about liveness + // as we go to help determine which registers are available. + bool Changed = false; + unsigned Count = InsertPosIndex - 1; + for (MachineBasicBlock::iterator I = InsertPos, E = Begin; + I != E; --Count) { + MachineInstr *MI = --I; + + // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as + // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF + // is left behind appearing to clobber the super-register, while the + // subregister needs to remain live. So we just ignore them. + if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + continue; + + // Check if this instruction has a dependence on the critical path that + // is an anti-dependence that we may be able to break. If it is, set + // AntiDepReg to the non-zero register associated with the anti-dependence. + // + // We limit our attention to the critical path as a heuristic to avoid + // breaking anti-dependence edges that aren't going to significantly + // impact the overall schedule. There are a limited number of registers + // and we want to save them for the important edges. + // + // TODO: Instructions with multiple defs could have multiple + // anti-dependencies. The current code here only knows how to break one + // edge per instruction. Note that we'd have to be able to break all of + // the anti-dependencies in an instruction in order to be effective. + unsigned AntiDepReg = 0; + if (MI == CriticalPathMI) { + if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { + SUnit *NextSU = Edge->getSUnit(); + + // Only consider anti-dependence edges. + if (Edge->getKind() == SDep::Anti) { + AntiDepReg = Edge->getReg(); + assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); + // Don't break anti-dependencies on non-allocatable registers. + if (!AllocatableSet.test(AntiDepReg)) + AntiDepReg = 0; + else { + // If the SUnit has other dependencies on the SUnit that it + // anti-depends on, don't bother breaking the anti-dependency + // since those edges would prevent such units from being + // scheduled past each other regardless. + // + // Also, if there are dependencies on other SUnits with the + // same register as the anti-dependency, don't attempt to + // break it. + for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), + PE = CriticalPathSU->Preds.end(); P != PE; ++P) + if (P->getSUnit() == NextSU ? + (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : + (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + AntiDepReg = 0; + break; + } + } + } + CriticalPathSU = NextSU; + CriticalPathMI = CriticalPathSU->getInstr(); + } else { + // We've reached the end of the critical path. + CriticalPathSU = 0; + CriticalPathMI = 0; + } + } + + PrescanInstruction(MI); + + // If this instruction has a use of AntiDepReg, breaking it + // is invalid. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (MO.isUse() && AntiDepReg == Reg) { + AntiDepReg = 0; + break; + } + } + + // Determine AntiDepReg's register class, if it is live and is + // consistently used within a single class. + const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; + assert((AntiDepReg == 0 || RC != NULL) && + "Register should be live if it's causing an anti-dependence!"); + if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) + AntiDepReg = 0; + + // Look for a suitable register to use to break the anti-depenence. + // + // TODO: Instead of picking the first free register, consider which might + // be the best. + if (AntiDepReg != 0) { + for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), + RE = RC->allocation_order_end(MF); R != RE; ++R) { + unsigned NewReg = *R; + // Don't replace a register with itself. + if (NewReg == AntiDepReg) continue; + // Don't replace a register with one that was recently used to repair + // an anti-dependence with this AntiDepReg, because that would + // re-introduce that anti-dependence. + if (NewReg == LastNewReg[AntiDepReg]) continue; + // If NewReg is dead and NewReg's most recent def is not before + // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. + assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + if (KillIndices[NewReg] == ~0u && + Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) && + KillIndices[AntiDepReg] <= DefIndices[NewReg]) { + DOUT << "Breaking anti-dependence edge on " + << TRI->getName(AntiDepReg) + << " with " << RegRefs.count(AntiDepReg) << " references" + << " using " << TRI->getName(NewReg) << "!\n"; + + // Update the references to the old register to refer to the new + // register. + std::pair<std::multimap<unsigned, MachineOperand *>::iterator, + std::multimap<unsigned, MachineOperand *>::iterator> + Range = RegRefs.equal_range(AntiDepReg); + for (std::multimap<unsigned, MachineOperand *>::iterator + Q = Range.first, QE = Range.second; Q != QE; ++Q) + Q->second->setReg(NewReg); + + // We just went back in time and modified history; the + // liveness information for the anti-depenence reg is now + // inconsistent. Set the state as if it were dead. + Classes[NewReg] = Classes[AntiDepReg]; + DefIndices[NewReg] = DefIndices[AntiDepReg]; + KillIndices[NewReg] = KillIndices[AntiDepReg]; + assert(((KillIndices[NewReg] == ~0u) != + (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + + Classes[AntiDepReg] = 0; + DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; + KillIndices[AntiDepReg] = ~0u; + assert(((KillIndices[AntiDepReg] == ~0u) != + (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + + RegRefs.erase(AntiDepReg); + Changed = true; + LastNewReg[AntiDepReg] = NewReg; + break; + } + } + } + + ScanInstruction(MI, Count); + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. Also update its cycle bound. +void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + --SuccSU->NumPredsLeft; + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + // Compute how many cycles it will be before this actually becomes + // available. This is the max of the start time of all predecessors plus + // their latencies. + SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + PendingQueue.push_back(SuccSU); +} + +/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. +void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + ReleaseSucc(SU, &*I); +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue.ScheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void SchedulePostRATDList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue.push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + unsigned MinDepth = ~0u; + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() <= CurCycle) { + AvailableQueue.push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } else if (PendingQueue[i]->getDepth() < MinDepth) + MinDepth = PendingQueue[i]->getDepth(); + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue.empty()) { + CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; + continue; + } + + SUnit *FoundSUnit = 0; + + bool HasNoopHazards = false; + while (!AvailableQueue.empty()) { + SUnit *CurSUnit = AvailableQueue.pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue.push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule, do it now. + if (FoundSUnit) { + ScheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, just advance + // the current cycle and try again. + DOUT << "*** Advancing cycle, no work to do\n"; + HazardRec->AdvanceCycle(); + ++NumStalls; + ++CurCycle; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DOUT << "*** Emitting noop\n"; + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + ++CurCycle; + } + } + +#ifndef NDEBUG + VerifySchedule(/*isBottomUp=*/false); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createPostRAScheduler() { + return new PostRAScheduler(); +} diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp new file mode 100644 index 0000000..97d4728 --- /dev/null +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -0,0 +1,1485 @@ +//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level pre-register allocation +// live interval splitting pass. It finds live interval barriers, i.e. +// instructions which will kill all physical registers in certain register +// classes, and split all live intervals which cross the barrier. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-alloc-split" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden); +static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), cl::Hidden); +static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), cl::Hidden); + +STATISTIC(NumSplits, "Number of intervals split"); +STATISTIC(NumRemats, "Number of intervals split by rematerialization"); +STATISTIC(NumFolds, "Number of intervals split with spill folding"); +STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding"); +STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers"); +STATISTIC(NumDeadSpills, "Number of dead spills removed"); + +namespace { + class VISIBILITY_HIDDEN PreAllocSplitting : public MachineFunctionPass { + MachineFunction *CurrMF; + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo* TRI; + MachineFrameInfo *MFI; + MachineRegisterInfo *MRI; + LiveIntervals *LIs; + LiveStacks *LSs; + VirtRegMap *VRM; + + // Barrier - Current barrier being processed. + MachineInstr *Barrier; + + // BarrierMBB - Basic block where the barrier resides in. + MachineBasicBlock *BarrierMBB; + + // Barrier - Current barrier index. + unsigned BarrierIdx; + + // CurrLI - Current live interval being split. + LiveInterval *CurrLI; + + // CurrSLI - Current stack slot live interval. + LiveInterval *CurrSLI; + + // CurrSValNo - Current val# for the stack slot live interval. + VNInfo *CurrSValNo; + + // IntervalSSMap - A map from live interval to spill slots. + DenseMap<unsigned, int> IntervalSSMap; + + // Def2SpillMap - A map from a def instruction index to spill index. + DenseMap<unsigned, unsigned> Def2SpillMap; + + public: + static char ID; + PreAllocSplitting() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addPreserved<RegisterCoalescer>(); + if (StrongPHIElim) + AU.addPreservedID(StrongPHIEliminationID); + else + AU.addPreservedID(PHIEliminationID); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<VirtRegMap>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual void releaseMemory() { + IntervalSSMap.clear(); + Def2SpillMap.clear(); + } + + virtual const char *getPassName() const { + return "Pre-Register Allocaton Live Interval Splitting"; + } + + /// print - Implement the dump method. + virtual void print(std::ostream &O, const Module* M = 0) const { + LIs->print(O, M); + } + + void print(std::ostream *O, const Module* M = 0) const { + if (O) print(*O, M); + } + + private: + MachineBasicBlock::iterator + findNextEmptySlot(MachineBasicBlock*, MachineInstr*, + unsigned&); + + MachineBasicBlock::iterator + findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*, + SmallPtrSet<MachineInstr*, 4>&, unsigned&); + + MachineBasicBlock::iterator + findRestorePoint(MachineBasicBlock*, MachineInstr*, unsigned, + SmallPtrSet<MachineInstr*, 4>&, unsigned&); + + int CreateSpillStackSlot(unsigned, const TargetRegisterClass *); + + bool IsAvailableInStack(MachineBasicBlock*, unsigned, unsigned, unsigned, + unsigned&, int&) const; + + void UpdateSpillSlotInterval(VNInfo*, unsigned, unsigned); + + bool SplitRegLiveInterval(LiveInterval*); + + bool SplitRegLiveIntervals(const TargetRegisterClass **, + SmallPtrSet<LiveInterval*, 8>&); + + bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB, + MachineBasicBlock* BarrierMBB); + bool Rematerialize(unsigned vreg, VNInfo* ValNo, + MachineInstr* DefMI, + MachineBasicBlock::iterator RestorePt, + unsigned RestoreIdx, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB); + MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC, + MachineInstr* DefMI, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int& SS, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB); + MachineInstr* FoldRestore(unsigned vreg, + const TargetRegisterClass* RC, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int SS, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB); + void RenumberValno(VNInfo* VN); + void ReconstructLiveInterval(LiveInterval* LI); + bool removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split); + unsigned getNumberOfNonSpills(SmallPtrSet<MachineInstr*, 4>& MIs, + unsigned Reg, int FrameIndex, bool& TwoAddr); + VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet<MachineInstr*, 4>& Visited, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, + DenseMap<MachineInstr*, VNInfo*>& NewVNs, + DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, + DenseMap<MachineBasicBlock*, VNInfo*>& Phis, + bool IsTopLevel, bool IsIntraBlock); + VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet<MachineInstr*, 4>& Visited, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, + DenseMap<MachineInstr*, VNInfo*>& NewVNs, + DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, + DenseMap<MachineBasicBlock*, VNInfo*>& Phis, + bool IsTopLevel, bool IsIntraBlock); +}; +} // end anonymous namespace + +char PreAllocSplitting::ID = 0; + +static RegisterPass<PreAllocSplitting> +X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting"); + +const PassInfo *const llvm::PreAllocSplittingID = &X; + + +/// findNextEmptySlot - Find a gap after the given machine instruction in the +/// instruction index map. If there isn't one, return end(). +MachineBasicBlock::iterator +PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI, + unsigned &SpotIndex) { + MachineBasicBlock::iterator MII = MI; + if (++MII != MBB->end()) { + unsigned Index = LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII)); + if (Index) { + SpotIndex = Index; + return MII; + } + } + return MBB->end(); +} + +/// findSpillPoint - Find a gap as far away from the given MI that's suitable +/// for spilling the current live interval. The index must be before any +/// defs and uses of the live interval register in the mbb. Return begin() if +/// none is found. +MachineBasicBlock::iterator +PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, + MachineInstr *DefMI, + SmallPtrSet<MachineInstr*, 4> &RefsInMBB, + unsigned &SpillIndex) { + MachineBasicBlock::iterator Pt = MBB->begin(); + + MachineBasicBlock::iterator MII = MI; + MachineBasicBlock::iterator EndPt = DefMI + ? MachineBasicBlock::iterator(DefMI) : MBB->begin(); + + while (MII != EndPt && !RefsInMBB.count(MII) && + MII->getOpcode() != TRI->getCallFrameSetupOpcode()) + --MII; + if (MII == EndPt || RefsInMBB.count(MII)) return Pt; + + while (MII != EndPt && !RefsInMBB.count(MII)) { + unsigned Index = LIs->getInstructionIndex(MII); + + // We can't insert the spill between the barrier (a call), and its + // corresponding call frame setup. + if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) { + while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) { + --MII; + if (MII == EndPt) { + return Pt; + } + } + continue; + } else if (LIs->hasGapBeforeInstr(Index)) { + Pt = MII; + SpillIndex = LIs->findGapBeforeInstr(Index, true); + } + + if (RefsInMBB.count(MII)) + return Pt; + + + --MII; + } + + return Pt; +} + +/// findRestorePoint - Find a gap in the instruction index map that's suitable +/// for restoring the current live interval value. The index must be before any +/// uses of the live interval register in the mbb. Return end() if none is +/// found. +MachineBasicBlock::iterator +PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, + unsigned LastIdx, + SmallPtrSet<MachineInstr*, 4> &RefsInMBB, + unsigned &RestoreIndex) { + // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb + // begin index accordingly. + MachineBasicBlock::iterator Pt = MBB->end(); + MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator(); + + // We start at the call, so walk forward until we find the call frame teardown + // since we can't insert restores before that. Bail if we encounter a use + // during this time. + MachineBasicBlock::iterator MII = MI; + if (MII == EndPt) return Pt; + + while (MII != EndPt && !RefsInMBB.count(MII) && + MII->getOpcode() != TRI->getCallFrameDestroyOpcode()) + ++MII; + if (MII == EndPt || RefsInMBB.count(MII)) return Pt; + ++MII; + + // FIXME: Limit the number of instructions to examine to reduce + // compile time? + while (MII != EndPt) { + unsigned Index = LIs->getInstructionIndex(MII); + if (Index > LastIdx) + break; + unsigned Gap = LIs->findGapBeforeInstr(Index); + + // We can't insert a restore between the barrier (a call) and its + // corresponding call frame teardown. + if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) { + do { + if (MII == EndPt || RefsInMBB.count(MII)) return Pt; + ++MII; + } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode()); + } else if (Gap) { + Pt = MII; + RestoreIndex = Gap; + } + + if (RefsInMBB.count(MII)) + return Pt; + + ++MII; + } + + return Pt; +} + +/// CreateSpillStackSlot - Create a stack slot for the live interval being +/// split. If the live interval was previously split, just reuse the same +/// slot. +int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, + const TargetRegisterClass *RC) { + int SS; + DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg); + if (I != IntervalSSMap.end()) { + SS = I->second; + } else { + SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + IntervalSSMap[Reg] = SS; + } + + // Create live interval for stack slot. + CurrSLI = &LSs->getOrCreateInterval(SS, RC); + if (CurrSLI->hasAtLeastOneValue()) + CurrSValNo = CurrSLI->getValNumInfo(0); + else + CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator()); + return SS; +} + +/// IsAvailableInStack - Return true if register is available in a split stack +/// slot at the specified index. +bool +PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, + unsigned Reg, unsigned DefIndex, + unsigned RestoreIndex, unsigned &SpillIndex, + int& SS) const { + if (!DefMBB) + return false; + + DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg); + if (I == IntervalSSMap.end()) + return false; + DenseMap<unsigned, unsigned>::iterator II = Def2SpillMap.find(DefIndex); + if (II == Def2SpillMap.end()) + return false; + + // If last spill of def is in the same mbb as barrier mbb (where restore will + // be), make sure it's not below the intended restore index. + // FIXME: Undo the previous spill? + assert(LIs->getMBBFromIndex(II->second) == DefMBB); + if (DefMBB == BarrierMBB && II->second >= RestoreIndex) + return false; + + SS = I->second; + SpillIndex = II->second; + return true; +} + +/// UpdateSpillSlotInterval - Given the specified val# of the register live +/// interval being split, and the spill and restore indicies, update the live +/// interval of the spill stack slot. +void +PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex, + unsigned RestoreIndex) { + assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB && + "Expect restore in the barrier mbb"); + + MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex); + if (MBB == BarrierMBB) { + // Intra-block spill + restore. We are done. + LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo); + CurrSLI->addRange(SLR); + return; + } + + SmallPtrSet<MachineBasicBlock*, 4> Processed; + unsigned EndIdx = LIs->getMBBEndIdx(MBB); + LiveRange SLR(SpillIndex, EndIdx+1, CurrSValNo); + CurrSLI->addRange(SLR); + Processed.insert(MBB); + + // Start from the spill mbb, figure out the extend of the spill slot's + // live interval. + SmallVector<MachineBasicBlock*, 4> WorkList; + const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex); + if (LR->end > EndIdx) + // If live range extend beyond end of mbb, add successors to work list. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + WorkList.push_back(*SI); + + while (!WorkList.empty()) { + MachineBasicBlock *MBB = WorkList.back(); + WorkList.pop_back(); + if (Processed.count(MBB)) + continue; + unsigned Idx = LIs->getMBBStartIdx(MBB); + LR = CurrLI->getLiveRangeContaining(Idx); + if (LR && LR->valno == ValNo) { + EndIdx = LIs->getMBBEndIdx(MBB); + if (Idx <= RestoreIndex && RestoreIndex < EndIdx) { + // Spill slot live interval stops at the restore. + LiveRange SLR(Idx, RestoreIndex, CurrSValNo); + CurrSLI->addRange(SLR); + } else if (LR->end > EndIdx) { + // Live range extends beyond end of mbb, process successors. + LiveRange SLR(Idx, EndIdx+1, CurrSValNo); + CurrSLI->addRange(SLR); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + WorkList.push_back(*SI); + } else { + LiveRange SLR(Idx, LR->end, CurrSValNo); + CurrSLI->addRange(SLR); + } + Processed.insert(MBB); + } + } +} + +/// PerformPHIConstruction - From properly set up use and def lists, use a PHI +/// construction algorithm to compute the ranges and valnos for an interval. +VNInfo* +PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet<MachineInstr*, 4>& Visited, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, + DenseMap<MachineInstr*, VNInfo*>& NewVNs, + DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, + DenseMap<MachineBasicBlock*, VNInfo*>& Phis, + bool IsTopLevel, bool IsIntraBlock) { + // Return memoized result if it's available. + if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI)) + return NewVNs[UseI]; + else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI)) + return NewVNs[UseI]; + else if (!IsIntraBlock && LiveOut.count(MBB)) + return LiveOut[MBB]; + + // Check if our block contains any uses or defs. + bool ContainsDefs = Defs.count(MBB); + bool ContainsUses = Uses.count(MBB); + + VNInfo* RetVNI = 0; + + // Enumerate the cases of use/def contaning blocks. + if (!ContainsDefs && !ContainsUses) { + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses, + NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + } else if (ContainsDefs && !ContainsUses) { + SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; + + // Search for the def in this block. If we don't find it before the + // instruction we care about, go to the fallback case. Note that that + // should never happen: this cannot be intrablock, so use should + // always be an end() iterator. + assert(UseI == MBB->end() && "No use marked in intrablock"); + + MachineBasicBlock::iterator Walker = UseI; + --Walker; + while (Walker != MBB->begin()) { + if (BlockDefs.count(Walker)) + break; + --Walker; + } + + // Once we've found it, extend its VNInfo to our instruction. + unsigned DefIndex = LIs->getInstructionIndex(Walker); + DefIndex = LiveIntervals::getDefIndex(DefIndex); + unsigned EndIndex = LIs->getMBBEndIdx(MBB); + + RetVNI = NewVNs[Walker]; + LI->addRange(LiveRange(DefIndex, EndIndex+1, RetVNI)); + } else if (!ContainsDefs && ContainsUses) { + SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; + + // Search for the use in this block that precedes the instruction we care + // about, going to the fallback case if we don't find it. + if (UseI == MBB->begin()) + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + + MachineBasicBlock::iterator Walker = UseI; + --Walker; + bool found = false; + while (Walker != MBB->begin()) { + if (BlockUses.count(Walker)) { + found = true; + break; + } + --Walker; + } + + // Must check begin() too. + if (!found) { + if (BlockUses.count(Walker)) + found = true; + else + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + } + + unsigned UseIndex = LIs->getInstructionIndex(Walker); + UseIndex = LiveIntervals::getUseIndex(UseIndex); + unsigned EndIndex = 0; + if (IsIntraBlock) { + EndIndex = LIs->getInstructionIndex(UseI); + EndIndex = LiveIntervals::getUseIndex(EndIndex); + } else + EndIndex = LIs->getMBBEndIdx(MBB); + + // Now, recursively phi construct the VNInfo for the use we found, + // and then extend it to include the instruction we care about + RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, + NewVNs, LiveOut, Phis, false, true); + + LI->addRange(LiveRange(UseIndex, EndIndex+1, RetVNI)); + + // FIXME: Need to set kills properly for inter-block stuff. + if (LI->isKill(RetVNI, UseIndex)) LI->removeKill(RetVNI, UseIndex); + if (IsIntraBlock) + LI->addKill(RetVNI, EndIndex); + } else if (ContainsDefs && ContainsUses) { + SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; + SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; + + // This case is basically a merging of the two preceding case, with the + // special note that checking for defs must take precedence over checking + // for uses, because of two-address instructions. + + if (UseI == MBB->begin()) + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses, + NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + + MachineBasicBlock::iterator Walker = UseI; + --Walker; + bool foundDef = false; + bool foundUse = false; + while (Walker != MBB->begin()) { + if (BlockDefs.count(Walker)) { + foundDef = true; + break; + } else if (BlockUses.count(Walker)) { + foundUse = true; + break; + } + --Walker; + } + + // Must check begin() too. + if (!foundDef && !foundUse) { + if (BlockDefs.count(Walker)) + foundDef = true; + else if (BlockUses.count(Walker)) + foundUse = true; + else + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + } + + unsigned StartIndex = LIs->getInstructionIndex(Walker); + StartIndex = foundDef ? LiveIntervals::getDefIndex(StartIndex) : + LiveIntervals::getUseIndex(StartIndex); + unsigned EndIndex = 0; + if (IsIntraBlock) { + EndIndex = LIs->getInstructionIndex(UseI); + EndIndex = LiveIntervals::getUseIndex(EndIndex); + } else + EndIndex = LIs->getMBBEndIdx(MBB); + + if (foundDef) + RetVNI = NewVNs[Walker]; + else + RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, + NewVNs, LiveOut, Phis, false, true); + + LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI)); + + if (foundUse && LI->isKill(RetVNI, StartIndex)) + LI->removeKill(RetVNI, StartIndex); + if (IsIntraBlock) { + LI->addKill(RetVNI, EndIndex); + } + } + + // Memoize results so we don't have to recompute them. + if (!IsIntraBlock) LiveOut[MBB] = RetVNI; + else { + if (!NewVNs.count(UseI)) + NewVNs[UseI] = RetVNI; + Visited.insert(UseI); + } + + return RetVNI; +} + +/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path. +/// +VNInfo* +PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI, + MachineBasicBlock* MBB, LiveInterval* LI, + SmallPtrSet<MachineInstr*, 4>& Visited, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, + DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, + DenseMap<MachineInstr*, VNInfo*>& NewVNs, + DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, + DenseMap<MachineBasicBlock*, VNInfo*>& Phis, + bool IsTopLevel, bool IsIntraBlock) { + // NOTE: Because this is the fallback case from other cases, we do NOT + // assume that we are not intrablock here. + if (Phis.count(MBB)) return Phis[MBB]; + + unsigned StartIndex = LIs->getMBBStartIdx(MBB); + VNInfo *RetVNI = Phis[MBB] = LI->getNextValue(~0U, /*FIXME*/ 0, + LIs->getVNInfoAllocator()); + if (!IsIntraBlock) LiveOut[MBB] = RetVNI; + + // If there are no uses or defs between our starting point and the + // beginning of the block, then recursive perform phi construction + // on our predecessors. + DenseMap<MachineBasicBlock*, VNInfo*> IncomingVNs; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI, + Visited, Defs, Uses, NewVNs, + LiveOut, Phis, false, false); + if (Incoming != 0) + IncomingVNs[*PI] = Incoming; + } + + if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill) { + VNInfo* OldVN = RetVNI; + VNInfo* NewVN = IncomingVNs.begin()->second; + VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN); + if (MergedVN == OldVN) std::swap(OldVN, NewVN); + + for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator LOI = LiveOut.begin(), + LOE = LiveOut.end(); LOI != LOE; ++LOI) + if (LOI->second == OldVN) + LOI->second = MergedVN; + for (DenseMap<MachineInstr*, VNInfo*>::iterator NVI = NewVNs.begin(), + NVE = NewVNs.end(); NVI != NVE; ++NVI) + if (NVI->second == OldVN) + NVI->second = MergedVN; + for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator PI = Phis.begin(), + PE = Phis.end(); PI != PE; ++PI) + if (PI->second == OldVN) + PI->second = MergedVN; + RetVNI = MergedVN; + } else { + // Otherwise, merge the incoming VNInfos with a phi join. Create a new + // VNInfo to represent the joined value. + for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I = + IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { + I->second->hasPHIKill = true; + unsigned KillIndex = LIs->getMBBEndIdx(I->first); + if (!LiveInterval::isKill(I->second, KillIndex)) + LI->addKill(I->second, KillIndex); + } + } + + unsigned EndIndex = 0; + if (IsIntraBlock) { + EndIndex = LIs->getInstructionIndex(UseI); + EndIndex = LiveIntervals::getUseIndex(EndIndex); + } else + EndIndex = LIs->getMBBEndIdx(MBB); + LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI)); + if (IsIntraBlock) + LI->addKill(RetVNI, EndIndex); + + // Memoize results so we don't have to recompute them. + if (!IsIntraBlock) + LiveOut[MBB] = RetVNI; + else { + if (!NewVNs.count(UseI)) + NewVNs[UseI] = RetVNI; + Visited.insert(UseI); + } + + return RetVNI; +} + +/// ReconstructLiveInterval - Recompute a live interval from scratch. +void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { + BumpPtrAllocator& Alloc = LIs->getVNInfoAllocator(); + + // Clear the old ranges and valnos; + LI->clear(); + + // Cache the uses and defs of the register + typedef DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> > RegMap; + RegMap Defs, Uses; + + // Keep track of the new VNs we're creating. + DenseMap<MachineInstr*, VNInfo*> NewVNs; + SmallPtrSet<VNInfo*, 2> PhiVNs; + + // Cache defs, and create a new VNInfo for each def. + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), + DE = MRI->def_end(); DI != DE; ++DI) { + Defs[(*DI).getParent()].insert(&*DI); + + unsigned DefIdx = LIs->getInstructionIndex(&*DI); + DefIdx = LiveIntervals::getDefIndex(DefIdx); + + VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc); + + // If the def is a move, set the copy field. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + if (DstReg == LI->reg) + NewVN->copy = &*DI; + + NewVNs[&*DI] = NewVN; + } + + // Cache uses as a separate pass from actually processing them. + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), + UE = MRI->use_end(); UI != UE; ++UI) + Uses[(*UI).getParent()].insert(&*UI); + + // Now, actually process every use and use a phi construction algorithm + // to walk from it to its reaching definitions, building VNInfos along + // the way. + DenseMap<MachineBasicBlock*, VNInfo*> LiveOut; + DenseMap<MachineBasicBlock*, VNInfo*> Phis; + SmallPtrSet<MachineInstr*, 4> Visited; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), + UE = MRI->use_end(); UI != UE; ++UI) { + PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, true, true); + } + + // Add ranges for dead defs + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), + DE = MRI->def_end(); DI != DE; ++DI) { + unsigned DefIdx = LIs->getInstructionIndex(&*DI); + DefIdx = LiveIntervals::getDefIndex(DefIdx); + + if (LI->liveAt(DefIdx)) continue; + + VNInfo* DeadVN = NewVNs[&*DI]; + LI->addRange(LiveRange(DefIdx, DefIdx+1, DeadVN)); + LI->addKill(DeadVN, DefIdx); + } +} + +/// RenumberValno - Split the given valno out into a new vreg, allowing it to +/// be allocated to a different register. This function creates a new vreg, +/// copies the valno and its live ranges over to the new vreg's interval, +/// removes them from the old interval, and rewrites all uses and defs of +/// the original reg to the new vreg within those ranges. +void PreAllocSplitting::RenumberValno(VNInfo* VN) { + SmallVector<VNInfo*, 4> Stack; + SmallVector<VNInfo*, 4> VNsToCopy; + Stack.push_back(VN); + + // Walk through and copy the valno we care about, and any other valnos + // that are two-address redefinitions of the one we care about. These + // will need to be rewritten as well. We also check for safety of the + // renumbering here, by making sure that none of the valno involved has + // phi kills. + while (!Stack.empty()) { + VNInfo* OldVN = Stack.back(); + Stack.pop_back(); + + // Bail out if we ever encounter a valno that has a PHI kill. We can't + // renumber these. + if (OldVN->hasPHIKill) return; + + VNsToCopy.push_back(OldVN); + + // Locate two-address redefinitions + for (SmallVector<unsigned, 4>::iterator KI = OldVN->kills.begin(), + KE = OldVN->kills.end(); KI != KE; ++KI) { + MachineInstr* MI = LIs->getInstructionFromIndex(*KI); + unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg); + if (DefIdx == ~0U) continue; + if (MI->isRegTiedToUseOperand(DefIdx)) { + VNInfo* NextVN = + CurrLI->findDefinedVNInfo(LiveIntervals::getDefIndex(*KI)); + if (NextVN == OldVN) continue; + Stack.push_back(NextVN); + } + } + } + + // Create the new vreg + unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg)); + + // Create the new live interval + LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg); + + for (SmallVector<VNInfo*, 4>::iterator OI = VNsToCopy.begin(), OE = + VNsToCopy.end(); OI != OE; ++OI) { + VNInfo* OldVN = *OI; + + // Copy the valno over + VNInfo* NewVN = NewLI.getNextValue(OldVN->def, OldVN->copy, + LIs->getVNInfoAllocator()); + NewLI.copyValNumInfo(NewVN, OldVN); + NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN); + + // Remove the valno from the old interval + CurrLI->removeValNo(OldVN); + } + + // Rewrite defs and uses. This is done in two stages to avoid invalidating + // the reg_iterator. + SmallVector<std::pair<MachineInstr*, unsigned>, 8> OpsToChange; + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), + E = MRI->reg_end(); I != E; ++I) { + MachineOperand& MO = I.getOperand(); + unsigned InstrIdx = LIs->getInstructionIndex(&*I); + + if ((MO.isUse() && NewLI.liveAt(LiveIntervals::getUseIndex(InstrIdx))) || + (MO.isDef() && NewLI.liveAt(LiveIntervals::getDefIndex(InstrIdx)))) + OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo())); + } + + for (SmallVector<std::pair<MachineInstr*, unsigned>, 8>::iterator I = + OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) { + MachineInstr* Inst = I->first; + unsigned OpIdx = I->second; + MachineOperand& MO = Inst->getOperand(OpIdx); + MO.setReg(NewVReg); + } + + // Grow the VirtRegMap, since we've created a new vreg. + VRM->grow(); + + // The renumbered vreg shares a stack slot with the old register. + if (IntervalSSMap.count(CurrLI->reg)) + IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg]; + + NumRenumbers++; +} + +bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo, + MachineInstr* DefMI, + MachineBasicBlock::iterator RestorePt, + unsigned RestoreIdx, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { + MachineBasicBlock& MBB = *RestorePt->getParent(); + + MachineBasicBlock::iterator KillPt = BarrierMBB->end(); + unsigned KillIdx = 0; + if (ValNo->def == ~0U || DefMI->getParent() == BarrierMBB) + KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx); + else + KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx); + + if (KillPt == DefMI->getParent()->end()) + return false; + + TII->reMaterialize(MBB, RestorePt, vreg, DefMI); + LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx); + + ReconstructLiveInterval(CurrLI); + unsigned RematIdx = LIs->getInstructionIndex(prior(RestorePt)); + RematIdx = LiveIntervals::getDefIndex(RematIdx); + RenumberValno(CurrLI->findDefinedVNInfo(RematIdx)); + + ++NumSplits; + ++NumRemats; + return true; +} + +MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, + const TargetRegisterClass* RC, + MachineInstr* DefMI, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int& SS, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { + MachineBasicBlock::iterator Pt = MBB->begin(); + + // Go top down if RefsInMBB is empty. + if (RefsInMBB.empty()) + return 0; + + MachineBasicBlock::iterator FoldPt = Barrier; + while (&*FoldPt != DefMI && FoldPt != MBB->begin() && + !RefsInMBB.count(FoldPt)) + --FoldPt; + + int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg, false); + if (OpIdx == -1) + return 0; + + SmallVector<unsigned, 1> Ops; + Ops.push_back(OpIdx); + + if (!TII->canFoldMemoryOperand(FoldPt, Ops)) + return 0; + + DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(vreg); + if (I != IntervalSSMap.end()) { + SS = I->second; + } else { + SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + } + + MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), + FoldPt, Ops, SS); + + if (FMI) { + LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); + FMI = MBB->insert(MBB->erase(FoldPt), FMI); + ++NumFolds; + + IntervalSSMap[vreg] = SS; + CurrSLI = &LSs->getOrCreateInterval(SS, RC); + if (CurrSLI->hasAtLeastOneValue()) + CurrSValNo = CurrSLI->getValNumInfo(0); + else + CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator()); + } + + return FMI; +} + +MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, + const TargetRegisterClass* RC, + MachineInstr* Barrier, + MachineBasicBlock* MBB, + int SS, + SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { + if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds) + return 0; + + // Go top down if RefsInMBB is empty. + if (RefsInMBB.empty()) + return 0; + + // Can't fold a restore between a call stack setup and teardown. + MachineBasicBlock::iterator FoldPt = Barrier; + + // Advance from barrier to call frame teardown. + while (FoldPt != MBB->getFirstTerminator() && + FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { + if (RefsInMBB.count(FoldPt)) + return 0; + + ++FoldPt; + } + + if (FoldPt == MBB->getFirstTerminator()) + return 0; + else + ++FoldPt; + + // Now find the restore point. + while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) { + if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) { + while (FoldPt != MBB->getFirstTerminator() && + FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { + if (RefsInMBB.count(FoldPt)) + return 0; + + ++FoldPt; + } + + if (FoldPt == MBB->getFirstTerminator()) + return 0; + } + + ++FoldPt; + } + + if (FoldPt == MBB->getFirstTerminator()) + return 0; + + int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true); + if (OpIdx == -1) + return 0; + + SmallVector<unsigned, 1> Ops; + Ops.push_back(OpIdx); + + if (!TII->canFoldMemoryOperand(FoldPt, Ops)) + return 0; + + MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), + FoldPt, Ops, SS); + + if (FMI) { + LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); + FMI = MBB->insert(MBB->erase(FoldPt), FMI); + ++NumRestoreFolds; + } + + return FMI; +} + +/// SplitRegLiveInterval - Split (spill and restore) the given live interval +/// so it would not cross the barrier that's being processed. Shrink wrap +/// (minimize) the live interval to the last uses. +bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { + CurrLI = LI; + + // Find live range where current interval cross the barrier. + LiveInterval::iterator LR = + CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx)); + VNInfo *ValNo = LR->valno; + + if (ValNo->def == ~1U) { + // Defined by a dead def? How can this be? + assert(0 && "Val# is defined by a dead def?"); + abort(); + } + + MachineInstr *DefMI = (ValNo->def != ~0U) + ? LIs->getInstructionFromIndex(ValNo->def) : NULL; + + // If this would create a new join point, do not split. + if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) + return false; + + // Find all references in the barrier mbb. + SmallPtrSet<MachineInstr*, 4> RefsInMBB; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), + E = MRI->reg_end(); I != E; ++I) { + MachineInstr *RefMI = &*I; + if (RefMI->getParent() == BarrierMBB) + RefsInMBB.insert(RefMI); + } + + // Find a point to restore the value after the barrier. + unsigned RestoreIndex = 0; + MachineBasicBlock::iterator RestorePt = + findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex); + if (RestorePt == BarrierMBB->end()) + return false; + + if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI)) + if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, + RestoreIndex, RefsInMBB)) + return true; + + // Add a spill either before the barrier or after the definition. + MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL; + const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg); + unsigned SpillIndex = 0; + MachineInstr *SpillMI = NULL; + int SS = -1; + if (ValNo->def == ~0U) { + // If it's defined by a phi, we must split just before the barrier. + if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier, + BarrierMBB, SS, RefsInMBB))) { + SpillIndex = LIs->getInstructionIndex(SpillMI); + } else { + MachineBasicBlock::iterator SpillPt = + findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, SpillIndex); + if (SpillPt == BarrierMBB->begin()) + return false; // No gap to insert spill. + // Add spill. + + SS = CreateSpillStackSlot(CurrLI->reg, RC); + TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC); + SpillMI = prior(SpillPt); + LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex); + } + } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def, + RestoreIndex, SpillIndex, SS)) { + // If it's already split, just restore the value. There is no need to spill + // the def again. + if (!DefMI) + return false; // Def is dead. Do nothing. + + if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier, + BarrierMBB, SS, RefsInMBB))) { + SpillIndex = LIs->getInstructionIndex(SpillMI); + } else { + // Check if it's possible to insert a spill after the def MI. + MachineBasicBlock::iterator SpillPt; + if (DefMBB == BarrierMBB) { + // Add spill after the def and the last use before the barrier. + SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI, + RefsInMBB, SpillIndex); + if (SpillPt == DefMBB->begin()) + return false; // No gap to insert spill. + } else { + SpillPt = findNextEmptySlot(DefMBB, DefMI, SpillIndex); + if (SpillPt == DefMBB->end()) + return false; // No gap to insert spill. + } + // Add spill. The store instruction kills the register if def is before + // the barrier in the barrier block. + SS = CreateSpillStackSlot(CurrLI->reg, RC); + TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, + DefMBB == BarrierMBB, SS, RC); + SpillMI = prior(SpillPt); + LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex); + } + } + + // Remember def instruction index to spill index mapping. + if (DefMI && SpillMI) + Def2SpillMap[ValNo->def] = SpillIndex; + + // Add restore. + bool FoldedRestore = false; + if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier, + BarrierMBB, SS, RefsInMBB)) { + RestorePt = LMI; + RestoreIndex = LIs->getInstructionIndex(RestorePt); + FoldedRestore = true; + } else { + TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC); + MachineInstr *LoadMI = prior(RestorePt); + LIs->InsertMachineInstrInMaps(LoadMI, RestoreIndex); + } + + // Update spill stack slot live interval. + UpdateSpillSlotInterval(ValNo, LIs->getUseIndex(SpillIndex)+1, + LIs->getDefIndex(RestoreIndex)); + + ReconstructLiveInterval(CurrLI); + + if (!FoldedRestore) { + unsigned RestoreIdx = LIs->getInstructionIndex(prior(RestorePt)); + RestoreIdx = LiveIntervals::getDefIndex(RestoreIdx); + RenumberValno(CurrLI->findDefinedVNInfo(RestoreIdx)); + } + + ++NumSplits; + return true; +} + +/// SplitRegLiveIntervals - Split all register live intervals that cross the +/// barrier that's being processed. +bool +PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs, + SmallPtrSet<LiveInterval*, 8>& Split) { + // First find all the virtual registers whose live intervals are intercepted + // by the current barrier. + SmallVector<LiveInterval*, 8> Intervals; + for (const TargetRegisterClass **RC = RCs; *RC; ++RC) { + // FIXME: If it's not safe to move any instruction that defines the barrier + // register class, then it means there are some special dependencies which + // codegen is not modelling. Ignore these barriers for now. + if (!TII->isSafeToMoveRegClassDefs(*RC)) + continue; + std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC); + for (unsigned i = 0, e = VRs.size(); i != e; ++i) { + unsigned Reg = VRs[i]; + if (!LIs->hasInterval(Reg)) + continue; + LiveInterval *LI = &LIs->getInterval(Reg); + if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg)) + // Virtual register live interval is intercepted by the barrier. We + // should split and shrink wrap its interval if possible. + Intervals.push_back(LI); + } + } + + // Process the affected live intervals. + bool Change = false; + while (!Intervals.empty()) { + if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit) + break; + else if (NumSplits == 4) + Change |= Change; + LiveInterval *LI = Intervals.back(); + Intervals.pop_back(); + bool result = SplitRegLiveInterval(LI); + if (result) Split.insert(LI); + Change |= result; + } + + return Change; +} + +unsigned PreAllocSplitting::getNumberOfNonSpills( + SmallPtrSet<MachineInstr*, 4>& MIs, + unsigned Reg, int FrameIndex, + bool& FeedsTwoAddr) { + unsigned NonSpills = 0; + for (SmallPtrSet<MachineInstr*, 4>::iterator UI = MIs.begin(), UE = MIs.end(); + UI != UE; ++UI) { + int StoreFrameIndex; + unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); + if (StoreVReg != Reg || StoreFrameIndex != FrameIndex) + NonSpills++; + + int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg); + if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx)) + FeedsTwoAddr = true; + } + + return NonSpills; +} + +/// removeDeadSpills - After doing splitting, filter through all intervals we've +/// split, and see if any of the spills are unnecessary. If so, remove them. +bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { + bool changed = false; + + // Walk over all of the live intervals that were touched by the splitter, + // and see if we can do any DCE and/or folding. + for (SmallPtrSet<LiveInterval*, 8>::iterator LI = split.begin(), + LE = split.end(); LI != LE; ++LI) { + DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> > VNUseCount; + + // First, collect all the uses of the vreg, and sort them by their + // reaching definition (VNInfo). + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg), + UE = MRI->use_end(); UI != UE; ++UI) { + unsigned index = LIs->getInstructionIndex(&*UI); + index = LiveIntervals::getUseIndex(index); + + const LiveRange* LR = (*LI)->getLiveRangeContaining(index); + VNUseCount[LR->valno].insert(&*UI); + } + + // Now, take the definitions (VNInfo's) one at a time and try to DCE + // and/or fold them away. + for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(), + VE = (*LI)->vni_end(); VI != VE; ++VI) { + + if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit) + return changed; + + VNInfo* CurrVN = *VI; + + // We don't currently try to handle definitions with PHI kills, because + // it would involve processing more than one VNInfo at once. + if (CurrVN->hasPHIKill) continue; + + // We also don't try to handle the results of PHI joins, since there's + // no defining instruction to analyze. + unsigned DefIdx = CurrVN->def; + if (DefIdx == ~0U || DefIdx == ~1U) continue; + + // We're only interested in eliminating cruft introduced by the splitter, + // is of the form load-use or load-use-store. First, check that the + // definition is a load, and remember what stack slot we loaded it from. + MachineInstr* DefMI = LIs->getInstructionFromIndex(DefIdx); + int FrameIndex; + if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue; + + // If the definition has no uses at all, just DCE it. + if (VNUseCount[CurrVN].size() == 0) { + LIs->RemoveMachineInstrFromMaps(DefMI); + (*LI)->removeValNo(CurrVN); + DefMI->eraseFromParent(); + VNUseCount.erase(CurrVN); + NumDeadSpills++; + changed = true; + continue; + } + + // Second, get the number of non-store uses of the definition, as well as + // a flag indicating whether it feeds into a later two-address definition. + bool FeedsTwoAddr = false; + unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN], + (*LI)->reg, FrameIndex, + FeedsTwoAddr); + + // If there's one non-store use and it doesn't feed a two-addr, then + // this is a load-use-store case that we can try to fold. + if (NonSpillCount == 1 && !FeedsTwoAddr) { + // Start by finding the non-store use MachineInstr. + SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin(); + int StoreFrameIndex; + unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); + while (UI != VNUseCount[CurrVN].end() && + (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) { + ++UI; + if (UI != VNUseCount[CurrVN].end()) + StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); + } + if (UI == VNUseCount[CurrVN].end()) continue; + + MachineInstr* use = *UI; + + // Attempt to fold it away! + int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false); + if (OpIdx == -1) continue; + SmallVector<unsigned, 1> Ops; + Ops.push_back(OpIdx); + if (!TII->canFoldMemoryOperand(use, Ops)) continue; + + MachineInstr* NewMI = + TII->foldMemoryOperand(*use->getParent()->getParent(), + use, Ops, FrameIndex); + + if (!NewMI) continue; + + // Update relevant analyses. + LIs->RemoveMachineInstrFromMaps(DefMI); + LIs->ReplaceMachineInstrInMaps(use, NewMI); + (*LI)->removeValNo(CurrVN); + + DefMI->eraseFromParent(); + MachineBasicBlock* MBB = use->getParent(); + NewMI = MBB->insert(MBB->erase(use), NewMI); + VNUseCount[CurrVN].erase(use); + + // Remove deleted instructions. Note that we need to remove them from + // the VNInfo->use map as well, just to be safe. + for (SmallPtrSet<MachineInstr*, 4>::iterator II = + VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end(); + II != IE; ++II) { + for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator + VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE; + ++VNI) + if (VNI->first != CurrVN) + VNI->second.erase(*II); + LIs->RemoveMachineInstrFromMaps(*II); + (*II)->eraseFromParent(); + } + + VNUseCount.erase(CurrVN); + + for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator + VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI) + if (VI->second.erase(use)) + VI->second.insert(NewMI); + + NumDeadSpills++; + changed = true; + continue; + } + + // If there's more than one non-store instruction, we can't profitably + // fold it, so bail. + if (NonSpillCount) continue; + + // Otherwise, this is a load-store case, so DCE them. + for (SmallPtrSet<MachineInstr*, 4>::iterator UI = + VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end(); + UI != UI; ++UI) { + LIs->RemoveMachineInstrFromMaps(*UI); + (*UI)->eraseFromParent(); + } + + VNUseCount.erase(CurrVN); + + LIs->RemoveMachineInstrFromMaps(DefMI); + (*LI)->removeValNo(CurrVN); + DefMI->eraseFromParent(); + NumDeadSpills++; + changed = true; + } + } + + return changed; +} + +bool PreAllocSplitting::createsNewJoin(LiveRange* LR, + MachineBasicBlock* DefMBB, + MachineBasicBlock* BarrierMBB) { + if (DefMBB == BarrierMBB) + return false; + + if (LR->valno->hasPHIKill) + return false; + + unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB); + if (LR->end < MBBEnd) + return false; + + MachineLoopInfo& MLI = getAnalysis<MachineLoopInfo>(); + if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB)) + return true; + + MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); + SmallPtrSet<MachineBasicBlock*, 4> Visited; + typedef std::pair<MachineBasicBlock*, + MachineBasicBlock::succ_iterator> ItPair; + SmallVector<ItPair, 4> Stack; + Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin())); + + while (!Stack.empty()) { + ItPair P = Stack.back(); + Stack.pop_back(); + + MachineBasicBlock* PredMBB = P.first; + MachineBasicBlock::succ_iterator S = P.second; + + if (S == PredMBB->succ_end()) + continue; + else if (Visited.count(*S)) { + Stack.push_back(std::make_pair(PredMBB, ++S)); + continue; + } else + Stack.push_back(std::make_pair(PredMBB, S+1)); + + MachineBasicBlock* MBB = *S; + Visited.insert(MBB); + + if (MBB == BarrierMBB) + return true; + + MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB); + MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB); + MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom(); + while (MDTN) { + if (MDTN == DefMDTN) + return true; + else if (MDTN == BarrierMDTN) + break; + MDTN = MDTN->getIDom(); + } + + MBBEnd = LIs->getMBBEndIdx(MBB); + if (LR->end > MBBEnd) + Stack.push_back(std::make_pair(MBB, MBB->succ_begin())); + } + + return false; +} + + +bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) { + CurrMF = &MF; + TM = &MF.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + MFI = MF.getFrameInfo(); + MRI = &MF.getRegInfo(); + LIs = &getAnalysis<LiveIntervals>(); + LSs = &getAnalysis<LiveStacks>(); + VRM = &getAnalysis<VirtRegMap>(); + + bool MadeChange = false; + + // Make sure blocks are numbered in order. + MF.RenumberBlocks(); + + MachineBasicBlock *Entry = MF.begin(); + SmallPtrSet<MachineBasicBlock*,16> Visited; + + SmallPtrSet<LiveInterval*, 8> Split; + + for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + BarrierMBB = *DFI; + for (MachineBasicBlock::iterator I = BarrierMBB->begin(), + E = BarrierMBB->end(); I != E; ++I) { + Barrier = &*I; + const TargetRegisterClass **BarrierRCs = + Barrier->getDesc().getRegClassBarriers(); + if (!BarrierRCs) + continue; + BarrierIdx = LIs->getInstructionIndex(Barrier); + MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split); + } + } + + MadeChange |= removeDeadSpills(Split); + + return MadeChange; +} diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp new file mode 100644 index 0000000..9e7ad67 --- /dev/null +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -0,0 +1,679 @@ +//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +// This pass provides an optional shrink wrapping variant of prolog/epilog +// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp. +// +//===----------------------------------------------------------------------===// + +#include "PrologEpilogInserter.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/STLExtras.h" +#include <climits> + +using namespace llvm; + +char PEI::ID = 0; + +static RegisterPass<PEI> +X("prologepilog", "Prologue/Epilogue Insertion"); + +/// createPrologEpilogCodeInserter - This function returns a pass that inserts +/// prolog and epilog code, and eliminates abstract frame references. +/// +FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } + +/// runOnMachineFunction - Insert prolog/epilog code and replace abstract +/// frame indexes with appropriate references. +/// +bool PEI::runOnMachineFunction(MachineFunction &Fn) { + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + + // Get MachineModuleInfo so that we can track the construction of the + // frame. + if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>()) + Fn.getFrameInfo()->setMachineModuleInfo(MMI); + + // Allow the target machine to make some adjustments to the function + // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. + TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); + + // Scan the function for modified callee saved registers and insert spill + // code for any callee saved registers that are modified. Also calculate + // the MaxCallFrameSize and HasCalls variables for the function's frame + // information and eliminates call frame pseudo instructions. + calculateCalleeSavedRegisters(Fn); + + // Determine placement of CSR spill/restore code: + // - with shrink wrapping, place spills and restores to tightly + // enclose regions in the Machine CFG of the function where + // they are used. Without shrink wrapping + // - default (no shrink wrapping), place all spills in the + // entry block, all restores in return blocks. + placeCSRSpillsAndRestores(Fn); + + // Add the code to save and restore the callee saved registers + insertCSRSpillsAndRestores(Fn); + + // Allow the target machine to make final modifications to the function + // before the frame layout is finalized. + TRI->processFunctionBeforeFrameFinalized(Fn); + + // Calculate actual frame offsets for all abstract stack objects... + calculateFrameObjectOffsets(Fn); + + // Add prolog and epilog code to the function. This function is required + // to align the stack frame as necessary for any stack variables or + // called functions. Because of this, calculateCalleeSavedRegisters + // must be called before this function in order to set the HasCalls + // and MaxCallFrameSize variables. + insertPrologEpilogCode(Fn); + + // Replace all MO_FrameIndex operands with physical register references + // and actual offsets. + // + replaceFrameIndices(Fn); + + delete RS; + clearAllSets(); + return true; +} + +#if 0 +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + if (ShrinkWrapping || ShrinkWrapFunc != "") { + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + } + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} +#endif + +/// calculateCalleeSavedRegisters - Scan the function for modified callee saved +/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for +/// the function's frame information and eliminates call frame pseudo +/// instructions. +/// +void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); + + // Get the callee saved register list... + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + + // Get the function call frame set-up and tear-down instruction opcode + int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); + int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); + + // These are used to keep track the callee-save area. Initialize them. + MinCSFrameIndex = INT_MAX; + MaxCSFrameIndex = 0; + + // Early exit for targets which have no callee saved registers and no call + // frame setup/destroy pseudo instructions. + if ((CSRegs == 0 || CSRegs[0] == 0) && + FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) + return; + + unsigned MaxCallFrameSize = 0; + bool HasCalls = false; + + std::vector<MachineBasicBlock::iterator> FrameSDOps; + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" + " instructions should have a single immediate argument!"); + unsigned Size = I->getOperand(0).getImm(); + if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; + HasCalls = true; + FrameSDOps.push_back(I); + } + + MachineFrameInfo *FFI = Fn.getFrameInfo(); + FFI->setHasCalls(HasCalls); + FFI->setMaxCallFrameSize(MaxCallFrameSize); + + for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) { + MachineBasicBlock::iterator I = FrameSDOps[i]; + // If call frames are not being included as part of the stack frame, + // and there is no dynamic allocation (therefore referencing frame slots + // off sp), leave the pseudo ops alone. We'll eliminate them later. + if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn)) + RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + } + + // Now figure out which *callee saved* registers are modified by the current + // function, thus needing to be saved and restored in the prolog/epilog. + // + const TargetRegisterClass* const *CSRegClasses = + RegInfo->getCalleeSavedRegClasses(&Fn); + std::vector<CalleeSavedInfo> CSI; + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (Fn.getRegInfo().isPhysRegUsed(Reg)) { + // If the reg is modified, save it! + CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + } else { + for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); + *AliasSet; ++AliasSet) { // Check alias registers too. + if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { + CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + break; + } + } + } + } + + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! + + unsigned NumFixedSpillSlots; + const std::pair<unsigned,int> *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = CSI[i].getRegClass(); + + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && + FixedSlot->first != Reg) + ++FixedSlot; + + int FrameIdx; + if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + // We may not be able to sastify the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. + // Use the min. + Align = std::min(Align, StackAlign); + FrameIdx = FFI->CreateStackObject(RC->getSize(), Align); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } else { + // Spill it to the stack where we must. + FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second); + } + CSI[i].setFrameIdx(FrameIdx); + } + + FFI->setCalleeSavedInfo(CSI); +} + +/// insertCSRSpillsAndRestores - Insert spill and restore code for +/// callee saved registers used in the function, handling shrink wrapping. +/// +void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { + // Get callee saved register information. + MachineFrameInfo *FFI = Fn.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); + + // Early exit if no callee saved registers are modified! + if (CSI.empty()) + return; + + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + MachineBasicBlock::iterator I; + + if (! ShrinkWrapThisFunction) { + // Spill using target interface. + I = EntryBlock->begin(); + if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. + // It's killed at the spill. + EntryBlock->addLiveIn(CSI[i].getReg()); + + // Insert the spill to the stack frame. + TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true, + CSI[i].getFrameIdx(), CSI[i].getRegClass()); + } + } + + // Restore using target interface. + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { + MachineBasicBlock* MBB = ReturnBlocks[ri]; + I = MBB->end(); --I; + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + I = I2; + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any + // terminators that preceed it. + if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), + CSI[i].getFrameIdx(), + CSI[i].getRegClass()); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } + } + return; + } + + // Insert spills. + std::vector<CalleeSavedInfo> blockCSI; + for (CSRegBlockMap::iterator BI = CSRSave.begin(), + BE = CSRSave.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet save = BI->second; + + if (save.empty()) + continue; + + blockCSI.clear(); + for (CSRegSet::iterator RI = save.begin(), + RE = save.end(); RI != RE; ++RI) { + blockCSI.push_back(CSI[*RI]); + } + assert(blockCSI.size() > 0 && + "Could not collect callee saved register info"); + + I = MBB->begin(); + + // When shrink wrapping, use stack slot stores/loads. + for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. + // It's killed at the spill. + MBB->addLiveIn(blockCSI[i].getReg()); + + // Insert the spill to the stack frame. + TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), + true, + blockCSI[i].getFrameIdx(), + blockCSI[i].getRegClass()); + } + } + + for (CSRegBlockMap::iterator BI = CSRRestore.begin(), + BE = CSRRestore.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet restore = BI->second; + + if (restore.empty()) + continue; + + blockCSI.clear(); + for (CSRegSet::iterator RI = restore.begin(), + RE = restore.end(); RI != RE; ++RI) { + blockCSI.push_back(CSI[*RI]); + } + assert(blockCSI.size() > 0 && + "Could not find callee saved register info"); + + // If MBB is empty and needs restores, insert at the _beginning_. + if (MBB->empty()) { + I = MBB->begin(); + } else { + I = MBB->end(); + --I; + + // Skip over all terminator instructions, which are part of the + // return sequence. + if (! I->getDesc().isTerminator()) { + ++I; + } else { + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + I = I2; + } + } + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any + // terminators that preceed it. + for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), + blockCSI[i].getFrameIdx(), + blockCSI[i].getRegClass()); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +static inline void +AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, + bool StackGrowsDown, int64_t &Offset, + unsigned &MaxAlign) { + // If stack grows down, we need to add size of find the lowest address of the + // object. + if (StackGrowsDown) + Offset += FFI->getObjectSize(FrameIdx); + + unsigned Align = FFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + if (StackGrowsDown) { + FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + } else { + FFI->setObjectOffset(FrameIdx, Offset); + Offset += FFI->getObjectSize(FrameIdx); + } +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *FFI = Fn.getFrameInfo(); + + unsigned MaxAlign = FFI->getMaxAlignment(); + + // Start at the beginning of the local area. + // The Offset is the distance from the stack top in the direction + // of stack growth -- so it's always nonnegative. + int64_t Offset = TFI.getOffsetOfLocalArea(); + if (StackGrowsDown) + Offset = -Offset; + assert(Offset >= 0 + && "Local area offset should be in direction of stack growth"); + + // If there are fixed sized objects that are preallocated in the local area, + // non-fixed objects can't be allocated right at the start of local area. + // We currently don't support filling in holes in between fixed sized + // objects, so we adjust 'Offset' to point to the end of last fixed sized + // preallocated object. + for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + int64_t FixedOff; + if (StackGrowsDown) { + // The maximum distance from the stack pointer is at lower address of + // the object -- which is given by offset. For down growing stack + // the offset is negative, so we negate the offset to get the distance. + FixedOff = -FFI->getObjectOffset(i); + } else { + // The maximum distance from the start pointer is at the upper + // address of the object. + FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); + } + if (FixedOff > Offset) Offset = FixedOff; + } + + // First assign frame offsets to stack objects that are used to spill + // callee saved registers. + if (StackGrowsDown) { + for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { + // If stack grows down, we need to add size of find the lowest + // address of the object. + Offset += FFI->getObjectSize(i); + + unsigned Align = FFI->getObjectAlignment(i); + // If the alignment of this object is greater than that of the stack, + // then increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + FFI->setObjectOffset(i, -Offset); // Set the computed offset + } + } else { + int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; + for (int i = MaxCSFI; i >= MinCSFI ; --i) { + unsigned Align = FFI->getObjectAlignment(i); + // If the alignment of this object is greater than that of the stack, + // then increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + FFI->setObjectOffset(i, Offset); + Offset += FFI->getObjectSize(i); + } + } + + // Make sure the special register scavenging spill slot is closest to the + // frame pointer if a frame pointer is required. + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + if (RS && RegInfo->hasFP(Fn)) { + int SFI = RS->getScavengingFrameIndex(); + if (SFI >= 0) + AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + } + + // Make sure that the stack protector comes before the local variables on the + // stack. + if (FFI->getStackProtectorIndex() >= 0) + AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown, + Offset, MaxAlign); + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && (int)i == RS->getScavengingFrameIndex()) + continue; + if (FFI->isDeadObjectIndex(i)) + continue; + if (FFI->getStackProtectorIndex() == (int)i) + continue; + + AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign); + } + + // Make sure the special register scavenging spill slot is closest to the + // stack pointer. + if (RS && !RegInfo->hasFP(Fn)) { + int SFI = RS->getScavengingFrameIndex(); + if (SFI >= 0) + AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + } + + // Round up the size to a multiple of the alignment, but only if there are + // calls or alloca's in the function. This ensures that any calls to + // subroutines have their stack frames suitable aligned. + // Also do this if we need runtime alignment of the stack. In this case + // offsets will be relative to SP not FP; round up the stack size so this + // works. + if (!RegInfo->targetHandlesStackFrameRounding() && + (FFI->hasCalls() || FFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && + FFI->getObjectIndexEnd() != 0))) { + // If we have reserved argument space for call sites in the function + // immediately on entry to the current function, count it as part of the + // overall stack size. + if (RegInfo->hasReservedCallFrame(Fn)) + Offset += FFI->getMaxCallFrameSize(); + + unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + } + + // Update frame info to pretend that this is part of the stack... + FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea()); + + // Remember the required stack alignment in case targets need it to perform + // dynamic stack alignment. + FFI->setMaxAlignment(MaxAlign); +} + + +/// insertPrologEpilogCode - Scan the function for modified callee saved +/// registers, insert spill code for these callee saved registers, then add +/// prolog and epilog code to the function. +/// +void PEI::insertPrologEpilogCode(MachineFunction &Fn) { + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + + // Add prologue to the function... + TRI->emitPrologue(Fn); + + // Add epilogue to restore the callee-save registers in each exiting block + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + // If last instruction is a return instruction, add an epilogue + if (!I->empty() && I->back().getDesc().isReturn()) + TRI->emitEpilogue(Fn, *I); + } +} + + +/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical +/// register references and actual offsets. +/// +void PEI::replaceFrameIndices(MachineFunction &Fn) { + if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + + const TargetMachine &TM = Fn.getTarget(); + assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + const TargetFrameInfo *TFI = TM.getFrameInfo(); + bool StackGrowsDown = + TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); + + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { + int SPAdj = 0; // SP offset due to call frame setup / destroy. + if (RS) RS->enterBasicBlock(BB); + + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + if (I->getOpcode() == TargetInstrInfo::DECLARE) { + // Ignore it. + ++I; + continue; + } + + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + // Remember how much SP has been adjusted to create the call + // frame. + int Size = I->getOperand(0).getImm(); + + if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) + Size = -Size; + + SPAdj += Size; + + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = prior(I); + TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); + + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = next(PrevI); + continue; + } + + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).isFI()) { + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + + TRI.eliminateFrameIndex(MI, SPAdj, RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } + + MI = 0; + break; + } + + if (DoIncr && I != BB->end()) ++I; + + // Update register states. + if (RS && MI) RS->forward(MI); + } + + assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); + } +} + diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h new file mode 100644 index 0000000..c158dd8 --- /dev/null +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -0,0 +1,167 @@ +//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +// This pass also implements a shrink wrapping variant of prolog/epilog +// insertion. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PEI_H +#define LLVM_CODEGEN_PEI_H + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + class RegScavenger; + class MachineBasicBlock; + + class PEI : public MachineFunctionPass { + public: + static char ID; + PEI() : MachineFunctionPass(&ID) {} + + const char *getPassName() const { + return "Prolog/Epilog Insertion & Frame Finalization"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn); + + private: + RegScavenger *RS; + + // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved + // stack frame indexes. + unsigned MinCSFrameIndex, MaxCSFrameIndex; + + // Analysis info for spill/restore placement. + // "CSR": "callee saved register". + + // CSRegSet contains indices into the Callee Saved Register Info + // vector built by calculateCalleeSavedRegisters() and accessed + // via MF.getFrameInfo()->getCalleeSavedInfo(). + typedef SparseBitVector<> CSRegSet; + + // CSRegBlockMap maps MachineBasicBlocks to sets of callee + // saved register indices. + typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap; + + // Set and maps for computing CSR spill/restore placement: + // used in function (UsedCSRegs) + // used in a basic block (CSRUsed) + // anticipatable in a basic block (Antic{In,Out}) + // available in a basic block (Avail{In,Out}) + // to be spilled at the entry to a basic block (CSRSave) + // to be restored at the end of a basic block (CSRRestore) + CSRegSet UsedCSRegs; + CSRegBlockMap CSRUsed; + CSRegBlockMap AnticIn, AnticOut; + CSRegBlockMap AvailIn, AvailOut; + CSRegBlockMap CSRSave; + CSRegBlockMap CSRRestore; + + // Entry and return blocks of the current function. + MachineBasicBlock* EntryBlock; + SmallVector<MachineBasicBlock*, 4> ReturnBlocks; + + // Map of MBBs to top level MachineLoops. + DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops; + + // Flag to control shrink wrapping per-function: + // may choose to skip shrink wrapping for certain + // functions. + bool ShrinkWrapThisFunction; + +#ifndef NDEBUG + // Machine function handle. + MachineFunction* MF; + + // Flag indicating that the current function + // has at least one "short" path in the machine + // CFG from the entry block to an exit block. + bool HasFastExitPath; +#endif + + bool calculateSets(MachineFunction &Fn); + bool calcAnticInOut(MachineBasicBlock* MBB); + bool calcAvailInOut(MachineBasicBlock* MBB); + void calculateAnticAvail(MachineFunction &Fn); + bool addUsesForMEMERegion(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4>& blks); + bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks); + bool calcSpillPlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevSpills); + bool calcRestorePlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevRestores); + void placeSpillsAndRestores(MachineFunction &Fn); + void placeCSRSpillsAndRestores(MachineFunction &Fn); + void calculateCalleeSavedRegisters(MachineFunction &Fn); + void insertCSRSpillsAndRestores(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); + + // Initialize DFA sets, called before iterations. + void clearAnticAvailSets(); + // Clear all sets constructed by shrink wrapping. + void clearAllSets(); + + // Initialize all shrink wrapping data. + void initShrinkWrappingInfo(); + + // Convienences for dealing with machine loops. + MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP); + MachineLoop* getTopLevelLoopParent(MachineLoop *LP); + + // Propgate CSRs used in MBB to all MBBs of loop LP. + void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP); + + // Convenience for recognizing return blocks. + bool isReturnBlock(MachineBasicBlock* MBB); + +#ifndef NDEBUG + // Debugging methods. + + // Mark this function as having fast exit paths. + void findFastExitPath(); + + // Verify placement of spills/restores. + void verifySpillRestorePlacement(); + + std::string getBasicBlockName(const MachineBasicBlock* MBB); + std::string stringifyCSRegSet(const CSRegSet& s); + void dumpSet(const CSRegSet& s); + void dumpUsed(MachineBasicBlock* MBB); + void dumpAllUsed(); + void dumpSets(MachineBasicBlock* MBB); + void dumpSets1(MachineBasicBlock* MBB); + void dumpAllSets(); + void dumpSRSets(); +#endif + + }; +} // End llvm namespace +#endif diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp new file mode 100644 index 0000000..b4c20e6 --- /dev/null +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -0,0 +1,92 @@ +//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PseudoSourceValue class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" +#include <map> +using namespace llvm; + +static ManagedStatic<PseudoSourceValue[4]> PSVs; + +const PseudoSourceValue *PseudoSourceValue::getStack() +{ return &(*PSVs)[0]; } +const PseudoSourceValue *PseudoSourceValue::getGOT() +{ return &(*PSVs)[1]; } +const PseudoSourceValue *PseudoSourceValue::getJumpTable() +{ return &(*PSVs)[2]; } +const PseudoSourceValue *PseudoSourceValue::getConstantPool() +{ return &(*PSVs)[3]; } + +static const char *const PSVNames[] = { + "Stack", + "GOT", + "JumpTable", + "ConstantPool" +}; + +PseudoSourceValue::PseudoSourceValue() : + Value(PointerType::getUnqual(Type::Int8Ty), PseudoSourceValueVal) {} + +void PseudoSourceValue::dump() const { + print(errs()); errs() << '\n'; +} + +void PseudoSourceValue::print(raw_ostream &OS) const { + OS << PSVNames[this - *PSVs]; +} + +namespace { + /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue + /// for holding FixedStack values, which must include a frame + /// index. + class VISIBILITY_HIDDEN FixedStackPseudoSourceValue + : public PseudoSourceValue { + const int FI; + public: + explicit FixedStackPseudoSourceValue(int fi) : FI(fi) {} + + virtual bool isConstant(const MachineFrameInfo *MFI) const; + + virtual void print(raw_ostream &OS) const { + OS << "FixedStack" << FI; + } + }; +} + +static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues; + +const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) { + const PseudoSourceValue *&V = (*FSValues)[FI]; + if (!V) + V = new FixedStackPseudoSourceValue(FI); + return V; +} + +bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { + if (this == getStack()) + return false; + if (this == getGOT() || + this == getConstantPool() || + this == getJumpTable()) + return true; + assert(0 && "Unknown PseudoSourceValue!"); + return false; +} + +bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{ + return MFI && MFI->isImmutableObjectIndex(FI); +} diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt new file mode 100644 index 0000000..64374ce --- /dev/null +++ b/lib/CodeGen/README.txt @@ -0,0 +1,208 @@ +//===---------------------------------------------------------------------===// + +Common register allocation / spilling problem: + + mul lr, r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + ldr r4, [sp, #+52] + mla r4, r3, lr, r4 + +can be: + + mul lr, r4, lr + mov r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + mla r4, r3, lr, r4 + +and then "merge" mul and mov: + + mul r4, r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + mla r4, r3, lr, r4 + +It also increase the likelyhood the store may become dead. + +//===---------------------------------------------------------------------===// + +I think we should have a "hasSideEffects" flag (which is automatically set for +stuff that "isLoad" "isCall" etc), and the remat pass should eventually be able +to remat any instruction that has no side effects, if it can handle it and if +profitable. + +For now, I'd suggest having the remat stuff work like this: + +1. I need to spill/reload this thing. +2. Check to see if it has side effects. +3. Check to see if it is simple enough: e.g. it only has one register +destination and no register input. +4. If so, clone the instruction, do the xform, etc. + +Advantages of this are: + +1. the .td file describes the behavior of the instructions, not the way the + algorithm should work. +2. as remat gets smarter in the future, we shouldn't have to be changing the .td + files. +3. it is easier to explain what the flag means in the .td file, because you + don't have to pull in the explanation of how the current remat algo works. + +Some potential added complexities: + +1. Some instructions have to be glued to it's predecessor or successor. All of + the PC relative instructions and condition code setting instruction. We could + mark them as hasSideEffects, but that's not quite right. PC relative loads + from constantpools can be remat'ed, for example. But it requires more than + just cloning the instruction. Some instructions can be remat'ed but it + expands to more than one instruction. But allocator will have to make a + decision. + +4. As stated in 3, not as simple as cloning in some cases. The target will have + to decide how to remat it. For example, an ARM 2-piece constant generation + instruction is remat'ed as a load from constantpool. + +//===---------------------------------------------------------------------===// + +bb27 ... + ... + %reg1037 = ADDri %reg1039, 1 + %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10 + Successors according to CFG: 0x8b03bf0 (#5) + +bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5): + Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4) + %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0> + +Note ADDri is not a two-address instruction. However, its result %reg1037 is an +operand of the PHI node in bb76 and its operand %reg1039 is the result of the +PHI node. We should treat it as a two-address code and make sure the ADDri is +scheduled after any node that reads %reg1039. + +//===---------------------------------------------------------------------===// + +Use local info (i.e. register scavenger) to assign it a free register to allow +reuse: + ldr r3, [sp, #+4] + add r3, r3, #3 + ldr r2, [sp, #+8] + add r2, r2, #2 + ldr r1, [sp, #+4] <== + add r1, r1, #1 + ldr r0, [sp, #+4] + add r0, r0, #2 + +//===---------------------------------------------------------------------===// + +LLVM aggressively lift CSE out of loop. Sometimes this can be negative side- +effects: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +load [i + R1] +... +load [i + R2] +... +load [i + R3] + +Suppose there is high register pressure, R1, R2, R3, can be spilled. We need +to implement proper re-materialization to handle this: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +R1 = X + 4 @ re-materialized +load [i + R1] +... +R2 = X + 7 @ re-materialized +load [i + R2] +... +R3 = X + 15 @ re-materialized +load [i + R3] + +Furthermore, with re-association, we can enable sharing: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +T = i + X +load [T + 4] +... +load [T + 7] +... +load [T + 15] +//===---------------------------------------------------------------------===// + +It's not always a good idea to choose rematerialization over spilling. If all +the load / store instructions would be folded then spilling is cheaper because +it won't require new live intervals / registers. See 2003-05-31-LongShifts for +an example. + +//===---------------------------------------------------------------------===// + +With a copying garbage collector, derived pointers must not be retained across +collector safe points; the collector could move the objects and invalidate the +derived pointer. This is bad enough in the first place, but safe points can +crop up unpredictably. Consider: + + %array = load { i32, [0 x %obj] }** %array_addr + %nth_el = getelementptr { i32, [0 x %obj] }* %array, i32 0, i32 %n + %old = load %obj** %nth_el + %z = div i64 %x, %y + store %obj* %new, %obj** %nth_el + +If the i64 division is lowered to a libcall, then a safe point will (must) +appear for the call site. If a collection occurs, %array and %nth_el no longer +point into the correct object. + +The fix for this is to copy address calculations so that dependent pointers +are never live across safe point boundaries. But the loads cannot be copied +like this if there was an intervening store, so may be hard to get right. + +Only a concurrent mutator can trigger a collection at the libcall safe point. +So single-threaded programs do not have this requirement, even with a copying +collector. Still, LLVM optimizations would probably undo a front-end's careful +work. + +//===---------------------------------------------------------------------===// + +The ocaml frametable structure supports liveness information. It would be good +to support it. + +//===---------------------------------------------------------------------===// + +The FIXME in ComputeCommonTailLength in BranchFolding.cpp needs to be +revisited. The check is there to work around a misuse of directives in inline +assembly. + +//===---------------------------------------------------------------------===// + +It would be good to detect collector/target compatibility instead of silently +doing the wrong thing. + +//===---------------------------------------------------------------------===// + +It would be really nice to be able to write patterns in .td files for copies, +which would eliminate a bunch of explicit predicates on them (e.g. no side +effects). Once this is in place, it would be even better to have tblgen +synthesize the various copy insertion/inspection methods in TargetInstrInfo. + +//===---------------------------------------------------------------------===// + +Stack coloring improvments: + +1. Do proper LiveStackAnalysis on all stack objects including those which are + not spill slots. +2. Reorder objects to fill in gaps between objects. + e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4 diff --git a/lib/CodeGen/RegAllocBigBlock.cpp b/lib/CodeGen/RegAllocBigBlock.cpp new file mode 100644 index 0000000..91e4099 --- /dev/null +++ b/lib/CodeGen/RegAllocBigBlock.cpp @@ -0,0 +1,892 @@ +//===- RegAllocBigBlock.cpp - A register allocator for large basic blocks -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RABigBlock class +// +//===----------------------------------------------------------------------===// + +// This register allocator is derived from RegAllocLocal.cpp. Like it, this +// allocator works on one basic block at a time, oblivious to others. +// However, the algorithm used here is suited for long blocks of +// instructions - registers are spilled by greedily choosing those holding +// values that will not be needed for the longest amount of time. This works +// particularly well for blocks with 10 or more times as many instructions +// as machine registers, but can be used for general code. +// +//===----------------------------------------------------------------------===// +// +// TODO: - automagically invoke linearscan for (groups of) small BBs? +// - break ties when picking regs? (probably not worth it in a +// JIT context) +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumFolded, "Number of loads/stores folded into instructions"); + +static RegisterRegAlloc + bigBlockRegAlloc("bigblock", "Big-block register allocator", + createBigBlockRegisterAllocator); + +namespace { +/// VRegKeyInfo - Defines magic values required to use VirtRegs as DenseMap +/// keys. + struct VRegKeyInfo { + static inline unsigned getEmptyKey() { return -1U; } + static inline unsigned getTombstoneKey() { return -2U; } + static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; } + static unsigned getHashValue(const unsigned &Key) { return Key; } + }; + + +/// This register allocator is derived from RegAllocLocal.cpp. Like it, this +/// allocator works on one basic block at a time, oblivious to others. +/// However, the algorithm used here is suited for long blocks of +/// instructions - registers are spilled by greedily choosing those holding +/// values that will not be needed for the longest amount of time. This works +/// particularly well for blocks with 10 or more times as many instructions +/// as machine registers, but can be used for general code. +/// +/// TODO: - automagically invoke linearscan for (groups of) small BBs? +/// - break ties when picking regs? (probably not worth it in a +/// JIT context) +/// + class VISIBILITY_HIDDEN RABigBlock : public MachineFunctionPass { + public: + static char ID; + RABigBlock() : MachineFunctionPass(&ID) {} + private: + /// TM - For getting at TargetMachine info + /// + const TargetMachine *TM; + + /// MF - Our generic MachineFunction pointer + /// + MachineFunction *MF; + + /// RegInfo - For dealing with machine register info (aliases, folds + /// etc) + const TargetRegisterInfo *RegInfo; + + typedef SmallVector<unsigned, 2> VRegTimes; + + /// VRegReadTable - maps VRegs in a BB to the set of times they are read + /// + DenseMap<unsigned, VRegTimes*, VRegKeyInfo> VRegReadTable; + + /// VRegReadIdx - keeps track of the "current time" in terms of + /// positions in VRegReadTable + DenseMap<unsigned, unsigned , VRegKeyInfo> VRegReadIdx; + + /// StackSlotForVirtReg - Maps virtual regs to the frame index where these + /// values are spilled. + IndexedMap<unsigned, VirtReg2IndexFunctor> StackSlotForVirtReg; + + /// Virt2PhysRegMap - This map contains entries for each virtual register + /// that is currently available in a physical register. + IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap; + + /// PhysRegsUsed - This array is effectively a map, containing entries for + /// each physical register that currently has a value (ie, it is in + /// Virt2PhysRegMap). The value mapped to is the virtual register + /// corresponding to the physical register (the inverse of the + /// Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned + /// because it is used by a future instruction, and to -2 if it is not + /// allocatable. If the entry for a physical register is -1, then the + /// physical register is "not in the map". + /// + std::vector<int> PhysRegsUsed; + + /// VirtRegModified - This bitset contains information about which virtual + /// registers need to be spilled back to memory when their registers are + /// scavenged. If a virtual register has simply been rematerialized, there + /// is no reason to spill it to memory when we need the register back. + /// + std::vector<int> VirtRegModified; + + /// MBBLastInsnTime - the number of the the last instruction in MBB + /// + int MBBLastInsnTime; + + /// MBBCurTime - the number of the the instruction being currently processed + /// + int MBBCurTime; + + unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { + return Virt2PhysRegMap[VirtReg]; + } + + unsigned &getVirt2StackSlot(unsigned VirtReg) { + return StackSlotForVirtReg[VirtReg]; + } + + /// markVirtRegModified - Lets us flip bits in the VirtRegModified bitset + /// + void markVirtRegModified(unsigned Reg, bool Val = true) { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + Reg -= TargetRegisterInfo::FirstVirtualRegister; + if (VirtRegModified.size() <= Reg) + VirtRegModified.resize(Reg+1); + VirtRegModified[Reg] = Val; + } + + /// isVirtRegModified - Lets us query the VirtRegModified bitset + /// + bool isVirtRegModified(unsigned Reg) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size() + && "Illegal virtual register!"); + return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; + } + + public: + /// getPassName - returns the BigBlock allocator's name + /// + virtual const char *getPassName() const { + return "BigBlock Register Allocator"; + } + + /// getAnalaysisUsage - declares the required analyses + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// runOnMachineFunction - Register allocate the whole function + /// + bool runOnMachineFunction(MachineFunction &Fn); + + /// AllocateBasicBlock - Register allocate the specified basic block. + /// + void AllocateBasicBlock(MachineBasicBlock &MBB); + + /// FillVRegReadTable - Fill out the table of vreg read times given a BB + /// + void FillVRegReadTable(MachineBasicBlock &MBB); + + /// areRegsEqual - This method returns true if the specified registers are + /// related to each other. To do this, it checks to see if they are equal + /// or if the first register is in the alias set of the second register. + /// + bool areRegsEqual(unsigned R1, unsigned R2) const { + if (R1 == R2) return true; + for (const unsigned *AliasSet = RegInfo->getAliasSet(R2); + *AliasSet; ++AliasSet) { + if (*AliasSet == R1) return true; + } + return false; + } + + /// getStackSpaceFor - This returns the frame index of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// removePhysReg - This method marks the specified physical register as no + /// longer being in use. + /// + void removePhysReg(unsigned PhysReg); + + /// spillVirtReg - This method spills the value specified by PhysReg into + /// the virtual register slot specified by VirtReg. It then updates the RA + /// data structures to indicate the fact that PhysReg is now available. + /// + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned VirtReg, unsigned PhysReg); + + /// spillPhysReg - This method spills the specified physical register into + /// the virtual register slot associated with it. If OnlyVirtRegs is set to + /// true, then the request is ignored if the physical register does not + /// contain a virtual register. + /// + void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs = false); + + /// assignVirtToPhysReg - This method updates local state so that we know + /// that PhysReg is the proper container for VirtReg now. The physical + /// register must not be used for anything else when this is called. + /// + void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); + + /// isPhysRegAvailable - Return true if the specified physical register is + /// free and available for use. This also includes checking to see if + /// aliased registers are all free... + /// + bool isPhysRegAvailable(unsigned PhysReg) const; + + /// getFreeReg - Look to see if there is a free register available in the + /// specified register class. If not, return 0. + /// + unsigned getFreeReg(const TargetRegisterClass *RC); + + /// chooseReg - Pick a physical register to hold the specified + /// virtual register by choosing the one which will be read furthest + /// in the future. + /// + unsigned chooseReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg); + + /// reloadVirtReg - This method transforms the specified specified virtual + /// register use to refer to a physical register. This method may do this + /// in one of several ways: if the register is available in a physical + /// register already, it uses that physical register. If the value is not + /// in a physical register, and if there are physical registers available, + /// it loads it into a register. If register pressure is high, and it is + /// possible, it tries to fold the load of the virtual register into the + /// instruction itself. It avoids doing this if register pressure is low to + /// improve the chance that subsequent instructions can use the reloaded + /// value. This method returns the modified instruction. + /// + MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum); + + }; + char RABigBlock::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual register +/// to be held on the stack. +int RABigBlock::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + int FrameIdx = getVirt2StackSlot(VirtReg); + + if (FrameIdx) + return FrameIdx - 1; // Already has space allocated? + + // Allocate a new stack object for this spill location... + FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot... + getVirt2StackSlot(VirtReg) = FrameIdx + 1; + return FrameIdx; +} + + +/// removePhysReg - This method marks the specified physical register as no +/// longer being in use. +/// +void RABigBlock::removePhysReg(unsigned PhysReg) { + PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used +} + + +/// spillVirtReg - This method spills the value specified by PhysReg into the +/// virtual register slot specified by VirtReg. It then updates the RA data +/// structures to indicate the fact that PhysReg is now available. +/// +void RABigBlock::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + assert(VirtReg && "Spilling a physical register is illegal!" + " Must not have appropriate kill for the register or use exists beyond" + " the intended one."); + DOUT << " Spilling register " << RegInfo->getName(PhysReg) + << " containing %reg" << VirtReg; + + const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo(); + + if (!isVirtRegModified(VirtReg)) + DOUT << " which has not been modified, so no store necessary!"; + + // Otherwise, there is a virtual register corresponding to this physical + // register. We only need to spill it into its stack slot if it has been + // modified. + if (isVirtRegModified(VirtReg)) { + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DOUT << " to stack slot #" << FrameIndex; + TII->storeRegToStackSlot(MBB, I, PhysReg, true, FrameIndex, RC); + ++NumStores; // Update statistics + } + + getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available + + DOUT << "\n"; + removePhysReg(PhysReg); +} + + +/// spillPhysReg - This method spills the specified physical register into the +/// virtual register slot associated with it. If OnlyVirtRegs is set to true, +/// then the request is ignored if the physical register does not contain a +/// virtual register. +/// +void RABigBlock::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs) { + if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! + assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); + if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) + spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); + } else { + // If the selected register aliases any other registers, we must make + // sure that one of the aliases isn't alive. + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register. + PhysRegsUsed[*AliasSet] != -2) // If allocatable. + if (PhysRegsUsed[*AliasSet]) + spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); + } +} + + +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. +/// +void RABigBlock::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); + // Update information to note the fact that this register was just used, and + // it holds VirtReg. + PhysRegsUsed[PhysReg] = VirtReg; + getVirt2PhysRegMapSlot(VirtReg) = PhysReg; +} + + +/// isPhysRegAvailable - Return true if the specified physical register is free +/// and available for use. This also includes checking to see if aliased +/// registers are all free... +/// +bool RABigBlock::isPhysRegAvailable(unsigned PhysReg) const { + if (PhysRegsUsed[PhysReg] != -1) return false; + + // If the selected register aliases any other allocated registers, it is + // not free! + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use? + return false; // Can't use this reg then. + return true; +} + + +/// getFreeReg - Look to see if there is a free register available in the +/// specified register class. If not, return 0. +/// +unsigned RABigBlock::getFreeReg(const TargetRegisterClass *RC) { + // Get iterators defining the range of registers that are valid to allocate in + // this class, which also specifies the preferred allocation order. + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + for (; RI != RE; ++RI) + if (isPhysRegAvailable(*RI)) { // Is reg unused? + assert(*RI != 0 && "Cannot use register!"); + return *RI; // Found an unused register! + } + return 0; +} + + +/// chooseReg - Pick a physical register to hold the specified +/// virtual register by choosing the one whose value will be read +/// furthest in the future. +/// +unsigned RABigBlock::chooseReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned VirtReg) { + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + // First check to see if we have a free register of the requested type... + unsigned PhysReg = getFreeReg(RC); + + // If we didn't find an unused register, find the one which will be + // read at the most distant point in time. + if (PhysReg == 0) { + unsigned delay=0, longest_delay=0; + VRegTimes* ReadTimes; + + unsigned curTime = MBBCurTime; + + // for all physical regs in the RC, + for(TargetRegisterClass::iterator pReg = RC->begin(); + pReg != RC->end(); ++pReg) { + // how long until they're read? + if(PhysRegsUsed[*pReg]>0) { // ignore non-allocatable regs + ReadTimes = VRegReadTable[PhysRegsUsed[*pReg]]; + if(ReadTimes && !ReadTimes->empty()) { + unsigned& pt = VRegReadIdx[PhysRegsUsed[*pReg]]; + while(pt < ReadTimes->size() && (*ReadTimes)[pt] < curTime) { + ++pt; + } + + if(pt < ReadTimes->size()) + delay = (*ReadTimes)[pt] - curTime; + else + delay = MBBLastInsnTime + 1 - curTime; + } else { + // This register is only defined, but never + // read in this MBB. Therefore the next read + // happens after the end of this MBB + delay = MBBLastInsnTime + 1 - curTime; + } + + + if(delay > longest_delay) { + longest_delay = delay; + PhysReg = *pReg; + } + } + } + + if(PhysReg == 0) { // ok, now we're desperate. We couldn't choose + // a register to spill by looking through the + // read timetable, so now we just spill the + // first allocatable register we find. + + // for all physical regs in the RC, + for(TargetRegisterClass::iterator pReg = RC->begin(); + pReg != RC->end(); ++pReg) { + // if we find a register we can spill + if(PhysRegsUsed[*pReg]>=-1) + PhysReg = *pReg; // choose it to be spilled + } + } + + assert(PhysReg && "couldn't choose a register to spill :( "); + // TODO: assert that RC->contains(PhysReg) / handle aliased registers? + + // since we needed to look in the table we need to spill this register. + spillPhysReg(MBB, I, PhysReg); + } + + // assign the vreg to our chosen physical register + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; // and return it +} + + +/// reloadVirtReg - This method transforms an instruction with a virtual +/// register use to one that references a physical register. It does this as +/// follows: +/// +/// 1) If the register is already in a physical register, it uses it. +/// 2) Otherwise, if there is a free physical register, it uses that. +/// 3) Otherwise, it calls chooseReg() to get the physical register +/// holding the most distantly needed value, generating a spill in +/// the process. +/// +/// This method returns the modified instruction. +MachineInstr *RABigBlock::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum) { + unsigned VirtReg = MI->getOperand(OpNum).getReg(); + const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo(); + + // If the virtual register is already available in a physical register, + // just update the instruction and return. + if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { + MI->getOperand(OpNum).setReg(PR); + return MI; + } + + // Otherwise, if we have free physical registers available to hold the + // value, use them. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + unsigned PhysReg = getFreeReg(RC); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + + if (PhysReg) { // we have a free register, so use it. + assignVirtToPhysReg(VirtReg, PhysReg); + } else { // no free registers available. + // try to fold the spill into the instruction + SmallVector<unsigned, 1> Ops; + Ops.push_back(OpNum); + if(MachineInstr* FMI = TII->foldMemoryOperand(*MF, MI, Ops, FrameIndex)) { + ++NumFolded; + FMI->copyKillDeadInfo(MI); + return MBB.insert(MBB.erase(MI), FMI); + } + + // determine which of the physical registers we'll kill off, since we + // couldn't fold. + PhysReg = chooseReg(MBB, MI, VirtReg); + } + + // this virtual register is now unmodified (since we just reloaded it) + markVirtRegModified(VirtReg, false); + + DOUT << " Reloading %reg" << VirtReg << " into " + << RegInfo->getName(PhysReg) << "\n"; + + // Add move instruction(s) + TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); + ++NumLoads; // Update statistics + + MF->getRegInfo().setPhysRegUsed(PhysReg); + MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register + return MI; +} + +/// Fill out the vreg read timetable. Since ReadTime increases +/// monotonically, the individual readtime sets will be sorted +/// in ascending order. +void RABigBlock::FillVRegReadTable(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII; + unsigned ReadTime; + + for(ReadTime=0, MII = MBB.begin(); MII != MBB.end(); ++ReadTime, ++MII) { + MachineInstr *MI = MII; + + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + // look for vreg reads.. + if (MO.isReg() && !MO.isDef() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + // ..and add them to the read table. + VRegTimes* &Times = VRegReadTable[MO.getReg()]; + if(!VRegReadTable[MO.getReg()]) { + Times = new VRegTimes; + VRegReadIdx[MO.getReg()] = 0; + } + Times->push_back(ReadTime); + } + } + + } + + MBBLastInsnTime = ReadTime; + + for(DenseMap<unsigned, VRegTimes*, VRegKeyInfo>::iterator Reads = VRegReadTable.begin(); + Reads != VRegReadTable.end(); ++Reads) { + if(Reads->second) { + DOUT << "Reads[" << Reads->first << "]=" << Reads->second->size() << "\n"; + } + } +} + +/// isReadModWriteImplicitKill - True if this is an implicit kill for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + MO.isDef() && !MO.isDead()) + return true; + } + return false; +} + +/// isReadModWriteImplicitDef - True if this is an implicit def for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + !MO.isDef() && MO.isKill()) + return true; + } + return false; +} + + +void RABigBlock::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII = MBB.begin(); + const TargetInstrInfo &TII = *TM->getInstrInfo(); + + DEBUG(const BasicBlock *LBB = MBB.getBasicBlock(); + if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName()); + + // If this is the first basic block in the machine function, add live-in + // registers as active. + if (&MBB == &*MF->begin()) { + for (MachineRegisterInfo::livein_iterator + I = MF->getRegInfo().livein_begin(), + E = MF->getRegInfo().livein_end(); I != E; ++I) { + unsigned Reg = I->first; + MF->getRegInfo().setPhysRegUsed(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*AliasSet); + } + } + } + } + + // Otherwise, sequentially allocate each instruction in the MBB. + MBBCurTime = -1; + while (MII != MBB.end()) { + MachineInstr *MI = MII++; + MBBCurTime++; + const TargetInstrDesc &TID = MI->getDesc(); + DEBUG(DOUT << "\nTime=" << MBBCurTime << " Starting RegAlloc of: " << *MI; + DOUT << " Regs have values: "; + for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + DOUT << "[" << RegInfo->getName(i) + << ",%reg" << PhysRegsUsed[i] << "] "; + DOUT << "\n"); + + SmallVector<unsigned, 8> Kills; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isKill()) { + if (!MO.isImplicit()) + Kills.push_back(MO.getReg()); + else if (!isReadModWriteImplicitKill(MI, MO.getReg())) + // These are extra physical register kills when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + Kills.push_back(MO.getReg()); + } + } + + // Get the used operands into registers. This has the potential to spill + // incoming values if we are out of registers. Note that we completely + // ignore physical register uses here. We assume that if an explicit + // physical register is referenced by the instruction, that it is guaranteed + // to be live-in, or the input is badly hosed. + // + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + // here we are looking for only used operands (never def&use) + if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + MI = reloadVirtReg(MBB, MI, i); + } + + // If this instruction is the last user of this register, kill the + // value, freeing the register being used, so it doesn't need to be + // spilled to memory. + // + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned VirtReg = Kills[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + // If the virtual register was never materialized into a register, it + // might not be in the map, but it won't hurt to zero it out anyway. + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } else { + assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) && + "Silently clearing a virtual register?"); + } + + if (PhysReg) { + DOUT << " Last use of " << RegInfo->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = RegInfo->getSubRegisters(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Last use of " + << RegInfo->getName(*AliasSet) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Loop over all of the operands of the instruction, spilling registers that + // are defined, and marking explicit destinations in the PhysRegsUsed map. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() && + TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*AliasSet); + } + } + } + } + + // Loop over the implicit defs, spilling them as well. + if (TID.getImplicitDefs()) { + for (const unsigned *ImplicitDefs = TID.getImplicitDefs(); + *ImplicitDefs; ++ImplicitDefs) { + unsigned Reg = *ImplicitDefs; + if (PhysRegsUsed[Reg] != -2) { + spillPhysReg(MBB, MI, Reg, true); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + } + MF->getRegInfo().setPhysRegUsed(Reg); + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*AliasSet); + } + } + } + } + + SmallVector<unsigned, 8> DeadDefs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDead()) + DeadDefs.push_back(MO.getReg()); + } + + // Okay, we have allocated all of the source operands and spilled any values + // that would be destroyed by defs of this instruction. Loop over the + // explicit defs and assign them to a register, spilling incoming values if + // we need to scavenge a register. + // + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = chooseReg(MBB, MI, DestVirtReg); + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + MI->getOperand(i).setReg(DestPhysReg); // Assign the output register + } + } + + // If this instruction defines any registers that are immediately dead, + // kill them now. + // + for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { + unsigned VirtReg = DeadDefs[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + assert(PhysReg != 0); + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } + + if (PhysReg) { + DOUT << " Register " << RegInfo->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it from live set\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Register " << RegInfo->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it from live set\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Finally, if this is a noop copy instruction, zap it. + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (TII.isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + SrcReg == DstReg) + MBB.erase(MI); + } + + MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + + // Spill all physical registers holding virtual registers now. + for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { + if (unsigned VirtReg = PhysRegsUsed[i]) + spillVirtReg(MBB, MI, VirtReg, i); + else + removePhysReg(i); + } +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RABigBlock::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "Machine Function " << "\n"; + MF = &Fn; + TM = &Fn.getTarget(); + RegInfo = TM->getRegisterInfo(); + + PhysRegsUsed.assign(RegInfo->getNumRegs(), -1); + + // At various places we want to efficiently check to see whether a register + // is allocatable. To handle this, we mark all unallocatable registers as + // being pinned down, permanently. + { + BitVector Allocable = RegInfo->getAllocatableSet(Fn); + for (unsigned i = 0, e = Allocable.size(); i != e; ++i) + if (!Allocable[i]) + PhysRegsUsed[i] = -2; // Mark the reg unallocable. + } + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + Virt2PhysRegMap.grow(MF->getRegInfo().getLastVirtReg()); + StackSlotForVirtReg.grow(MF->getRegInfo().getLastVirtReg()); + VirtRegModified.resize(MF->getRegInfo().getLastVirtReg() - + TargetRegisterInfo::FirstVirtualRegister + 1, 0); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + // fill out the read timetable + FillVRegReadTable(*MBB); + // use it to allocate the BB + AllocateBasicBlock(*MBB); + // clear it + VRegReadTable.clear(); + } + + StackSlotForVirtReg.clear(); + PhysRegsUsed.clear(); + VirtRegModified.clear(); + Virt2PhysRegMap.clear(); + return true; +} + +FunctionPass *llvm::createBigBlockRegisterAllocator() { + return new RABigBlock(); +} + diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp new file mode 100644 index 0000000..ee118de --- /dev/null +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -0,0 +1,1535 @@ +//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a linear scan register allocator. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "VirtRegMap.h" +#include "VirtRegRewriter.h" +#include "Spiller.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include <algorithm> +#include <set> +#include <queue> +#include <memory> +#include <cmath> +#include <iostream> + +using namespace llvm; + +STATISTIC(NumIters , "Number of iterations performed"); +STATISTIC(NumBacktracks, "Number of times we had to backtrack"); +STATISTIC(NumCoalesce, "Number of copies coalesced"); +STATISTIC(NumDowngrade, "Number of registers downgraded"); + +static cl::opt<bool> +NewHeuristic("new-spilling-heuristic", + cl::desc("Use new spilling heuristic"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +PreSplitIntervals("pre-alloc-split", + cl::desc("Pre-register allocation live interval splitting"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +NewSpillFramework("new-spill-framework", + cl::desc("New spilling framework"), + cl::init(false), cl::Hidden); + +static RegisterRegAlloc +linearscanRegAlloc("linearscan", "linear scan register allocator", + createLinearScanRegisterAllocator); + +namespace { + struct VISIBILITY_HIDDEN RALinScan : public MachineFunctionPass { + static char ID; + RALinScan() : MachineFunctionPass(&ID) {} + + typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr; + typedef SmallVector<IntervalPtr, 32> IntervalPtrs; + private: + /// RelatedRegClasses - This structure is built the first time a function is + /// compiled, and keeps track of which register classes have registers that + /// belong to multiple classes or have aliases that are in other classes. + EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses; + DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg; + + // NextReloadMap - For each register in the map, it maps to the another + // register which is defined by a reload from the same stack slot and + // both reloads are in the same basic block. + DenseMap<unsigned, unsigned> NextReloadMap; + + // DowngradedRegs - A set of registers which are being "downgraded", i.e. + // un-favored for allocation. + SmallSet<unsigned, 8> DowngradedRegs; + + // DowngradeMap - A map from virtual registers to physical registers being + // downgraded for the virtual registers. + DenseMap<unsigned, unsigned> DowngradeMap; + + MachineFunction* mf_; + MachineRegisterInfo* mri_; + const TargetMachine* tm_; + const TargetRegisterInfo* tri_; + const TargetInstrInfo* tii_; + BitVector allocatableRegs_; + LiveIntervals* li_; + LiveStacks* ls_; + const MachineLoopInfo *loopInfo; + + /// handled_ - Intervals are added to the handled_ set in the order of their + /// start value. This is uses for backtracking. + std::vector<LiveInterval*> handled_; + + /// fixed_ - Intervals that correspond to machine registers. + /// + IntervalPtrs fixed_; + + /// active_ - Intervals that are currently being processed, and which have a + /// live range active for the current point. + IntervalPtrs active_; + + /// inactive_ - Intervals that are currently being processed, but which have + /// a hold at the current point. + IntervalPtrs inactive_; + + typedef std::priority_queue<LiveInterval*, + SmallVector<LiveInterval*, 64>, + greater_ptr<LiveInterval> > IntervalHeap; + IntervalHeap unhandled_; + + /// regUse_ - Tracks register usage. + SmallVector<unsigned, 32> regUse_; + SmallVector<unsigned, 32> regUseBackUp_; + + /// vrm_ - Tracks register assignments. + VirtRegMap* vrm_; + + std::auto_ptr<VirtRegRewriter> rewriter_; + + std::auto_ptr<Spiller> spiller_; + + public: + virtual const char* getPassName() const { + return "Linear Scan Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveIntervals>(); + if (StrongPHIElim) + AU.addRequiredID(StrongPHIEliminationID); + // Make sure PassManager knows which analyses to make available + // to coalescing and which analyses coalescing invalidates. + AU.addRequiredTransitive<RegisterCoalescer>(); + if (PreSplitIntervals) + AU.addRequiredID(PreAllocSplittingID); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<VirtRegMap>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - register allocate the whole function + bool runOnMachineFunction(MachineFunction&); + + private: + /// linearScan - the linear scan algorithm + void linearScan(); + + /// initIntervalSets - initialize the interval sets. + /// + void initIntervalSets(); + + /// processActiveIntervals - expire old intervals and move non-overlapping + /// ones to the inactive list. + void processActiveIntervals(unsigned CurPoint); + + /// processInactiveIntervals - expire old intervals and move overlapping + /// ones to the active list. + void processInactiveIntervals(unsigned CurPoint); + + /// hasNextReloadInterval - Return the next liveinterval that's being + /// defined by a reload from the same SS as the specified one. + LiveInterval *hasNextReloadInterval(LiveInterval *cur); + + /// DowngradeRegister - Downgrade a register for allocation. + void DowngradeRegister(LiveInterval *li, unsigned Reg); + + /// UpgradeRegister - Upgrade a register for allocation. + void UpgradeRegister(unsigned Reg); + + /// assignRegOrStackSlotAtInterval - assign a register if one + /// is available, or spill. + void assignRegOrStackSlotAtInterval(LiveInterval* cur); + + void updateSpillWeights(std::vector<float> &Weights, + unsigned reg, float weight, + const TargetRegisterClass *RC); + + /// findIntervalsToSpill - Determine the intervals to spill for the + /// specified interval. It's passed the physical registers whose spill + /// weight is the lowest among all the registers whose live intervals + /// conflict with the interval. + void findIntervalsToSpill(LiveInterval *cur, + std::vector<std::pair<unsigned,float> > &Candidates, + unsigned NumCands, + SmallVector<LiveInterval*, 8> &SpillIntervals); + + /// attemptTrivialCoalescing - If a simple interval is defined by a copy, + /// try allocate the definition the same register as the source register + /// if the register is not defined during live time of the interval. This + /// eliminate a copy. This is used to coalesce copies which were not + /// coalesced away before allocation either due to dest and src being in + /// different register classes or because the coalescer was overly + /// conservative. + unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg); + + /// + /// Register usage / availability tracking helpers. + /// + + void initRegUses() { + regUse_.resize(tri_->getNumRegs(), 0); + regUseBackUp_.resize(tri_->getNumRegs(), 0); + } + + void finalizeRegUses() { +#ifndef NDEBUG + // Verify all the registers are "freed". + bool Error = false; + for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { + if (regUse_[i] != 0) { + cerr << tri_->getName(i) << " is still in use!\n"; + Error = true; + } + } + if (Error) + abort(); +#endif + regUse_.clear(); + regUseBackUp_.clear(); + } + + void addRegUse(unsigned physReg) { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + ++regUse_[physReg]; + for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) + ++regUse_[*as]; + } + + void delRegUse(unsigned physReg) { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + assert(regUse_[physReg] != 0); + --regUse_[physReg]; + for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) { + assert(regUse_[*as] != 0); + --regUse_[*as]; + } + } + + bool isRegAvail(unsigned physReg) const { + assert(TargetRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + return regUse_[physReg] == 0; + } + + void backUpRegUses() { + regUseBackUp_ = regUse_; + } + + void restoreRegUses() { + regUse_ = regUseBackUp_; + } + + /// + /// Register handling helpers. + /// + + /// getFreePhysReg - return a free physical register for this virtual + /// register interval if we have one, otherwise return 0. + unsigned getFreePhysReg(LiveInterval* cur); + unsigned getFreePhysReg(const TargetRegisterClass *RC, + unsigned MaxInactiveCount, + SmallVector<unsigned, 256> &inactiveCounts, + bool SkipDGRegs); + + /// assignVirt2StackSlot - assigns this virtual register to a + /// stack slot. returns the stack slot + int assignVirt2StackSlot(unsigned virtReg); + + void ComputeRelatedRegClasses(); + + template <typename ItTy> + void printIntervals(const char* const str, ItTy i, ItTy e) const { + if (str) DOUT << str << " intervals:\n"; + for (; i != e; ++i) { + DOUT << "\t" << *i->first << " -> "; + unsigned reg = i->first->reg; + if (TargetRegisterInfo::isVirtualRegister(reg)) { + reg = vrm_->getPhys(reg); + } + DOUT << tri_->getName(reg) << '\n'; + } + } + }; + char RALinScan::ID = 0; +} + +static RegisterPass<RALinScan> +X("linearscan-regalloc", "Linear Scan Register Allocator"); + +bool validateRegAlloc(MachineFunction *mf, LiveIntervals *lis, + VirtRegMap *vrm) { + + MachineRegisterInfo *mri = &mf->getRegInfo(); + const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); + bool allocationValid = true; + + + for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); + itr != end; ++itr) { + + LiveInterval *li = itr->second; + + if (TargetRegisterInfo::isPhysicalRegister(li->reg)) { + continue; + } + + if (vrm->hasPhys(li->reg)) { + const TargetRegisterClass *trc = mri->getRegClass(li->reg); + + if (lis->hasInterval(vrm->getPhys(li->reg))) { + if (li->overlaps(lis->getInterval(vrm->getPhys(li->reg)))) { + std::cerr << "vreg " << li->reg << " overlaps its assigned preg " + << vrm->getPhys(li->reg) << "(" << tri->getName(vrm->getPhys(li->reg)) << ")\n"; + } + } + + TargetRegisterClass::iterator fReg = + std::find(trc->allocation_order_begin(*mf), trc->allocation_order_end(*mf), + vrm->getPhys(li->reg)); + + if (fReg == trc->allocation_order_end(*mf)) { + std::cerr << "preg " << vrm->getPhys(li->reg) + << "(" << tri->getName(vrm->getPhys(li->reg)) << ") is not in the allocation set for vreg " + << li->reg << "\n"; + allocationValid &= false; + } + } + else { + std::cerr << "No preg for vreg " << li->reg << "\n"; + // What about conflicting loads/stores? + continue; + } + + for (LiveIntervals::iterator itr2 = next(itr); itr2 != end; ++itr2) { + + LiveInterval *li2 = itr2->second; + + if (li2->empty()) + continue; + + if (TargetRegisterInfo::isPhysicalRegister(li2->reg)) { + if (li->overlaps(*li2)) { + if (vrm->getPhys(li->reg) == li2->reg || + tri->areAliases(vrm->getPhys(li->reg), li2->reg)) { + std::cerr << "vreg " << li->reg << " overlaps preg " + << li2->reg << "(" << tri->getName(li2->reg) << ") which aliases " + << vrm->getPhys(li->reg) << "(" << tri->getName(vrm->getPhys(li->reg)) << ")\n"; + allocationValid &= false; + } + } + } + else { + + if (!vrm->hasPhys(li2->reg)) { + continue; + } + + if (li->overlaps(*li2)) { + if (vrm->getPhys(li->reg) == vrm->getPhys(li2->reg) || + tri->areAliases(vrm->getPhys(li->reg), vrm->getPhys(li2->reg))) { + std::cerr << "vreg " << li->reg << " (preg " << vrm->getPhys(li->reg) + << ") overlaps vreg " << li2->reg << " (preg " << vrm->getPhys(li2->reg) + << ") and " << vrm->getPhys(li->reg) << " aliases " << vrm->getPhys(li2->reg) << "\n"; + allocationValid &= false; + } + } + } + } + + } + + return allocationValid; + +} + + +void RALinScan::ComputeRelatedRegClasses() { + // First pass, add all reg classes to the union, and determine at least one + // reg class that each register is in. + bool HasAliases = false; + for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(), + E = tri_->regclass_end(); RCI != E; ++RCI) { + RelatedRegClasses.insert(*RCI); + for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); + I != E; ++I) { + HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0; + + const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; + if (PRC) { + // Already processed this register. Just make sure we know that + // multiple register classes share a register. + RelatedRegClasses.unionSets(PRC, *RCI); + } else { + PRC = *RCI; + } + } + } + + // Second pass, now that we know conservatively what register classes each reg + // belongs to, add info about aliases. We don't need to do this for targets + // without register aliases. + if (HasAliases) + for (DenseMap<unsigned, const TargetRegisterClass*>::iterator + I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end(); + I != E; ++I) + for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) + RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]); +} + +/// attemptTrivialCoalescing - If a simple interval is defined by a copy, +/// try allocate the definition the same register as the source register +/// if the register is not defined during live time of the interval. This +/// eliminate a copy. This is used to coalesce copies which were not +/// coalesced away before allocation either due to dest and src being in +/// different register classes or because the coalescer was overly +/// conservative. +unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { + if ((cur.preference && cur.preference == Reg) || !cur.containsOneValue()) + return Reg; + + VNInfo *vni = cur.begin()->valno; + if (!vni->def || vni->def == ~1U || vni->def == ~0U) + return Reg; + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg; + if (!CopyMI || + !tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + return Reg; + PhysReg = SrcReg; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (!vrm_->isAssignedReg(SrcReg)) + return Reg; + PhysReg = vrm_->getPhys(SrcReg); + } + if (Reg == PhysReg) + return Reg; + + const TargetRegisterClass *RC = mri_->getRegClass(cur.reg); + if (!RC->contains(PhysReg)) + return Reg; + + // Try to coalesce. + if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) { + DOUT << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg) + << '\n'; + vrm_->clearVirt(cur.reg); + vrm_->assignVirt2Phys(cur.reg, PhysReg); + + // Remove unnecessary kills since a copy does not clobber the register. + if (li_->hasInterval(SrcReg)) { + LiveInterval &SrcLI = li_->getInterval(SrcReg); + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(cur.reg), + E = mri_->reg_end(); I != E; ++I) { + MachineOperand &O = I.getOperand(); + if (!O.isUse() || !O.isKill()) + continue; + MachineInstr *MI = &*I; + if (SrcLI.liveAt(li_->getDefIndex(li_->getInstructionIndex(MI)))) + O.setIsKill(false); + } + } + + ++NumCoalesce; + return SrcReg; + } + + return Reg; +} + +bool RALinScan::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &fn.getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + allocatableRegs_ = tri_->getAllocatableSet(fn); + li_ = &getAnalysis<LiveIntervals>(); + ls_ = &getAnalysis<LiveStacks>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + // We don't run the coalescer here because we have no reason to + // interact with it. If the coalescer requires interaction, it + // won't do anything. If it doesn't require interaction, we assume + // it was run as a separate pass. + + // If this is the first function compiled, compute the related reg classes. + if (RelatedRegClasses.empty()) + ComputeRelatedRegClasses(); + + // Also resize register usage trackers. + initRegUses(); + + vrm_ = &getAnalysis<VirtRegMap>(); + if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); + + if (NewSpillFramework) { + spiller_.reset(createSpiller(mf_, li_, ls_, vrm_)); + } + + initIntervalSets(); + + linearScan(); + + if (NewSpillFramework) { + bool allocValid = validateRegAlloc(mf_, li_, vrm_); + } + + // Rewrite spill code and update the PhysRegsUsed set. + rewriter_->runOnMachineFunction(*mf_, *vrm_, li_); + + assert(unhandled_.empty() && "Unhandled live intervals remain!"); + + finalizeRegUses(); + + fixed_.clear(); + active_.clear(); + inactive_.clear(); + handled_.clear(); + NextReloadMap.clear(); + DowngradedRegs.clear(); + DowngradeMap.clear(); + spiller_.reset(0); + + return true; +} + +/// initIntervalSets - initialize the interval sets. +/// +void RALinScan::initIntervalSets() +{ + assert(unhandled_.empty() && fixed_.empty() && + active_.empty() && inactive_.empty() && + "interval sets should be empty on initialization"); + + handled_.reserve(li_->getNumIntervals()); + + for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { + if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { + mri_->setPhysRegUsed(i->second->reg); + fixed_.push_back(std::make_pair(i->second, i->second->begin())); + } else + unhandled_.push(i->second); + } +} + +void RALinScan::linearScan() +{ + // linear scan algorithm + DOUT << "********** LINEAR SCAN **********\n"; + DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n'; + + DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end())); + + while (!unhandled_.empty()) { + // pick the interval with the earliest start point + LiveInterval* cur = unhandled_.top(); + unhandled_.pop(); + ++NumIters; + DOUT << "\n*** CURRENT ***: " << *cur << '\n'; + + if (!cur->empty()) { + processActiveIntervals(cur->beginNumber()); + processInactiveIntervals(cur->beginNumber()); + + assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && + "Can only allocate virtual registers!"); + } + + // Allocating a virtual register. try to find a free + // physical register or spill an interval (possibly this one) in order to + // assign it one. + assignRegOrStackSlotAtInterval(cur); + + DEBUG(printIntervals("active", active_.begin(), active_.end())); + DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end())); + } + + // Expire any remaining active intervals + while (!active_.empty()) { + IntervalPtr &IP = active_.back(); + unsigned reg = IP.first->reg; + DOUT << "\tinterval " << *IP.first << " expired\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + delRegUse(reg); + active_.pop_back(); + } + + // Expire any remaining inactive intervals + DEBUG(for (IntervalPtrs::reverse_iterator + i = inactive_.rbegin(); i != inactive_.rend(); ++i) + DOUT << "\tinterval " << *i->first << " expired\n"); + inactive_.clear(); + + // Add live-ins to every BB except for entry. Also perform trivial coalescing. + MachineFunction::iterator EntryMBB = mf_->begin(); + SmallVector<MachineBasicBlock*, 8> LiveInMBBs; + for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { + LiveInterval &cur = *i->second; + unsigned Reg = 0; + bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg); + if (isPhys) + Reg = cur.reg; + else if (vrm_->isAssignedReg(cur.reg)) + Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg)); + if (!Reg) + continue; + // Ignore splited live intervals. + if (!isPhys && vrm_->getPreSplitReg(cur.reg)) + continue; + for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); + I != E; ++I) { + const LiveRange &LR = *I; + if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) { + for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i) + if (LiveInMBBs[i] != EntryMBB) + LiveInMBBs[i]->addLiveIn(Reg); + LiveInMBBs.clear(); + } + } + } + + DOUT << *vrm_; + + // Look for physical registers that end up not being allocated even though + // register allocator had to spill other registers in its register class. + if (ls_->getNumIntervals() == 0) + return; + if (!vrm_->FindUnusedRegisters(tri_, li_)) + return; +} + +/// processActiveIntervals - expire old intervals and move non-overlapping ones +/// to the inactive list. +void RALinScan::processActiveIntervals(unsigned CurPoint) +{ + DOUT << "\tprocessing active intervals:\n"; + + for (unsigned i = 0, e = active_.size(); i != e; ++i) { + LiveInterval *Interval = active_[i].first; + LiveInterval::iterator IntervalPos = active_[i].second; + unsigned reg = Interval->reg; + + IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); + + if (IntervalPos == Interval->end()) { // Remove expired intervals. + DOUT << "\t\tinterval " << *Interval << " expired\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + delRegUse(reg); + + // Pop off the end of the list. + active_[i] = active_.back(); + active_.pop_back(); + --i; --e; + + } else if (IntervalPos->start > CurPoint) { + // Move inactive intervals to inactive list. + DOUT << "\t\tinterval " << *Interval << " inactive\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + delRegUse(reg); + // add to inactive. + inactive_.push_back(std::make_pair(Interval, IntervalPos)); + + // Pop off the end of the list. + active_[i] = active_.back(); + active_.pop_back(); + --i; --e; + } else { + // Otherwise, just update the iterator position. + active_[i].second = IntervalPos; + } + } +} + +/// processInactiveIntervals - expire old intervals and move overlapping +/// ones to the active list. +void RALinScan::processInactiveIntervals(unsigned CurPoint) +{ + DOUT << "\tprocessing inactive intervals:\n"; + + for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { + LiveInterval *Interval = inactive_[i].first; + LiveInterval::iterator IntervalPos = inactive_[i].second; + unsigned reg = Interval->reg; + + IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); + + if (IntervalPos == Interval->end()) { // remove expired intervals. + DOUT << "\t\tinterval " << *Interval << " expired\n"; + + // Pop off the end of the list. + inactive_[i] = inactive_.back(); + inactive_.pop_back(); + --i; --e; + } else if (IntervalPos->start <= CurPoint) { + // move re-activated intervals in active list + DOUT << "\t\tinterval " << *Interval << " active\n"; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + addRegUse(reg); + // add to active + active_.push_back(std::make_pair(Interval, IntervalPos)); + + // Pop off the end of the list. + inactive_[i] = inactive_.back(); + inactive_.pop_back(); + --i; --e; + } else { + // Otherwise, just update the iterator position. + inactive_[i].second = IntervalPos; + } + } +} + +/// updateSpillWeights - updates the spill weights of the specifed physical +/// register and its weight. +void RALinScan::updateSpillWeights(std::vector<float> &Weights, + unsigned reg, float weight, + const TargetRegisterClass *RC) { + SmallSet<unsigned, 4> Processed; + SmallSet<unsigned, 4> SuperAdded; + SmallVector<unsigned, 4> Supers; + Weights[reg] += weight; + Processed.insert(reg); + for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) { + Weights[*as] += weight; + Processed.insert(*as); + if (tri_->isSubRegister(*as, reg) && + SuperAdded.insert(*as) && + RC->contains(*as)) { + Supers.push_back(*as); + } + } + + // If the alias is a super-register, and the super-register is in the + // register class we are trying to allocate. Then add the weight to all + // sub-registers of the super-register even if they are not aliases. + // e.g. allocating for GR32, bh is not used, updating bl spill weight. + // bl should get the same spill weight otherwise it will be choosen + // as a spill candidate since spilling bh doesn't make ebx available. + for (unsigned i = 0, e = Supers.size(); i != e; ++i) { + for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) + if (!Processed.count(*sr)) + Weights[*sr] += weight; + } +} + +static +RALinScan::IntervalPtrs::iterator +FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { + for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end(); + I != E; ++I) + if (I->first == LI) return I; + return IP.end(); +} + +static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){ + for (unsigned i = 0, e = V.size(); i != e; ++i) { + RALinScan::IntervalPtr &IP = V[i]; + LiveInterval::iterator I = std::upper_bound(IP.first->begin(), + IP.second, Point); + if (I != IP.first->begin()) --I; + IP.second = I; + } +} + +/// addStackInterval - Create a LiveInterval for stack if the specified live +/// interval has been spilled. +static void addStackInterval(LiveInterval *cur, LiveStacks *ls_, + LiveIntervals *li_, + MachineRegisterInfo* mri_, VirtRegMap &vrm_) { + int SS = vrm_.getStackSlot(cur->reg); + if (SS == VirtRegMap::NO_STACK_SLOT) + return; + + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + LiveInterval &SI = ls_->getOrCreateInterval(SS, RC); + + VNInfo *VNI; + if (SI.hasAtLeastOneValue()) + VNI = SI.getValNumInfo(0); + else + VNI = SI.getNextValue(~0U, 0, ls_->getVNInfoAllocator()); + + LiveInterval &RI = li_->getInterval(cur->reg); + // FIXME: This may be overly conservative. + SI.MergeRangesInAsValue(RI, VNI); +} + +/// getConflictWeight - Return the number of conflicts between cur +/// live interval and defs and uses of Reg weighted by loop depthes. +static +float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_, + MachineRegisterInfo *mri_, + const MachineLoopInfo *loopInfo) { + float Conflicts = 0; + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), + E = mri_->reg_end(); I != E; ++I) { + MachineInstr *MI = &*I; + if (cur->liveAt(li_->getInstructionIndex(MI))) { + unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent()); + Conflicts += powf(10.0f, (float)loopDepth); + } + } + return Conflicts; +} + +/// findIntervalsToSpill - Determine the intervals to spill for the +/// specified interval. It's passed the physical registers whose spill +/// weight is the lowest among all the registers whose live intervals +/// conflict with the interval. +void RALinScan::findIntervalsToSpill(LiveInterval *cur, + std::vector<std::pair<unsigned,float> > &Candidates, + unsigned NumCands, + SmallVector<LiveInterval*, 8> &SpillIntervals) { + // We have figured out the *best* register to spill. But there are other + // registers that are pretty good as well (spill weight within 3%). Spill + // the one that has fewest defs and uses that conflict with cur. + float Conflicts[3] = { 0.0f, 0.0f, 0.0f }; + SmallVector<LiveInterval*, 8> SLIs[3]; + + DOUT << "\tConsidering " << NumCands << " candidates: "; + DEBUG(for (unsigned i = 0; i != NumCands; ++i) + DOUT << tri_->getName(Candidates[i].first) << " "; + DOUT << "\n";); + + // Calculate the number of conflicts of each candidate. + for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { + unsigned Reg = i->first->reg; + unsigned PhysReg = vrm_->getPhys(Reg); + if (!cur->overlapsFrom(*i->first, i->second)) + continue; + for (unsigned j = 0; j < NumCands; ++j) { + unsigned Candidate = Candidates[j].first; + if (tri_->regsOverlap(PhysReg, Candidate)) { + if (NumCands > 1) + Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); + SLIs[j].push_back(i->first); + } + } + } + + for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){ + unsigned Reg = i->first->reg; + unsigned PhysReg = vrm_->getPhys(Reg); + if (!cur->overlapsFrom(*i->first, i->second-1)) + continue; + for (unsigned j = 0; j < NumCands; ++j) { + unsigned Candidate = Candidates[j].first; + if (tri_->regsOverlap(PhysReg, Candidate)) { + if (NumCands > 1) + Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); + SLIs[j].push_back(i->first); + } + } + } + + // Which is the best candidate? + unsigned BestCandidate = 0; + float MinConflicts = Conflicts[0]; + for (unsigned i = 1; i != NumCands; ++i) { + if (Conflicts[i] < MinConflicts) { + BestCandidate = i; + MinConflicts = Conflicts[i]; + } + } + + std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(), + std::back_inserter(SpillIntervals)); +} + +namespace { + struct WeightCompare { + typedef std::pair<unsigned, float> RegWeightPair; + bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const { + return LHS.second < RHS.second; + } + }; +} + +static bool weightsAreClose(float w1, float w2) { + if (!NewHeuristic) + return false; + + float diff = w1 - w2; + if (diff <= 0.02f) // Within 0.02f + return true; + return (diff / w2) <= 0.05f; // Within 5%. +} + +LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) { + DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg); + if (I == NextReloadMap.end()) + return 0; + return &li_->getInterval(I->second); +} + +void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) { + bool isNew = DowngradedRegs.insert(Reg); + isNew = isNew; // Silence compiler warning. + assert(isNew && "Multiple reloads holding the same register?"); + DowngradeMap.insert(std::make_pair(li->reg, Reg)); + for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) { + isNew = DowngradedRegs.insert(*AS); + isNew = isNew; // Silence compiler warning. + assert(isNew && "Multiple reloads holding the same register?"); + DowngradeMap.insert(std::make_pair(li->reg, *AS)); + } + ++NumDowngrade; +} + +void RALinScan::UpgradeRegister(unsigned Reg) { + if (Reg) { + DowngradedRegs.erase(Reg); + for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) + DowngradedRegs.erase(*AS); + } +} + +namespace { + struct LISorter { + bool operator()(LiveInterval* A, LiveInterval* B) { + return A->beginNumber() < B->beginNumber(); + } + }; +} + +/// assignRegOrStackSlotAtInterval - assign a register if one is available, or +/// spill. +void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) +{ + DOUT << "\tallocating current interval: "; + + // This is an implicitly defined live interval, just assign any register. + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + if (cur->empty()) { + unsigned physReg = cur->preference; + if (!physReg) + physReg = *RC->allocation_order_begin(*mf_); + DOUT << tri_->getName(physReg) << '\n'; + // Note the register is not really in use. + vrm_->assignVirt2Phys(cur->reg, physReg); + return; + } + + backUpRegUses(); + + std::vector<std::pair<unsigned, float> > SpillWeightsToAdd; + unsigned StartPosition = cur->beginNumber(); + const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); + + // If start of this live interval is defined by a move instruction and its + // source is assigned a physical register that is compatible with the target + // register class, then we should try to assign it the same register. + // This can happen when the move is from a larger register class to a smaller + // one, e.g. X86::mov32to32_. These move instructions are not coalescable. + if (!cur->preference && cur->hasAtLeastOneValue()) { + VNInfo *vni = cur->begin()->valno; + if (vni->def && vni->def != ~1U && vni->def != ~0U) { + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (CopyMI && + tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) { + unsigned Reg = 0; + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + Reg = SrcReg; + else if (vrm_->isAssignedReg(SrcReg)) + Reg = vrm_->getPhys(SrcReg); + if (Reg) { + if (SrcSubReg) + Reg = tri_->getSubReg(Reg, SrcSubReg); + if (DstSubReg) + Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); + if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) + cur->preference = Reg; + } + } + } + } + + // For every interval in inactive we overlap with, mark the + // register as not free and update spill weights. + for (IntervalPtrs::const_iterator i = inactive_.begin(), + e = inactive_.end(); i != e; ++i) { + unsigned Reg = i->first->reg; + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Can only allocate virtual registers!"); + const TargetRegisterClass *RegRC = mri_->getRegClass(Reg); + // If this is not in a related reg class to the register we're allocating, + // don't check it. + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && + cur->overlapsFrom(*i->first, i->second-1)) { + Reg = vrm_->getPhys(Reg); + addRegUse(Reg); + SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight)); + } + } + + // Speculatively check to see if we can get a register right now. If not, + // we know we won't be able to by adding more constraints. If so, we can + // check to see if it is valid. Doing an exhaustive search of the fixed_ list + // is very bad (it contains all callee clobbered registers for any functions + // with a call), so we want to avoid doing that if possible. + unsigned physReg = getFreePhysReg(cur); + unsigned BestPhysReg = physReg; + if (physReg) { + // We got a register. However, if it's in the fixed_ list, we might + // conflict with it. Check to see if we conflict with it or any of its + // aliases. + SmallSet<unsigned, 8> RegAliases; + for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS) + RegAliases.insert(*AS); + + bool ConflictsWithFixed = false; + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) { + // Okay, this reg is on the fixed list. Check to see if we actually + // conflict. + LiveInterval *I = IP.first; + if (I->endNumber() > StartPosition) { + LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); + IP.second = II; + if (II != I->begin() && II->start > StartPosition) + --II; + if (cur->overlapsFrom(*I, II)) { + ConflictsWithFixed = true; + break; + } + } + } + } + + // Okay, the register picked by our speculative getFreePhysReg call turned + // out to be in use. Actually add all of the conflicting fixed registers to + // regUse_ so we can do an accurate query. + if (ConflictsWithFixed) { + // For every interval in fixed we overlap with, mark the register as not + // free and update spill weights. + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + LiveInterval *I = IP.first; + + const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && + I->endNumber() > StartPosition) { + LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); + IP.second = II; + if (II != I->begin() && II->start > StartPosition) + --II; + if (cur->overlapsFrom(*I, II)) { + unsigned reg = I->reg; + addRegUse(reg); + SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight)); + } + } + } + + // Using the newly updated regUse_ object, which includes conflicts in the + // future, see if there are any registers available. + physReg = getFreePhysReg(cur); + } + } + + // Restore the physical register tracker, removing information about the + // future. + restoreRegUses(); + + // If we find a free register, we are done: assign this virtual to + // the free physical register and add this interval to the active + // list. + if (physReg) { + DOUT << tri_->getName(physReg) << '\n'; + vrm_->assignVirt2Phys(cur->reg, physReg); + addRegUse(physReg); + active_.push_back(std::make_pair(cur, cur->begin())); + handled_.push_back(cur); + + // "Upgrade" the physical register since it has been allocated. + UpgradeRegister(physReg); + if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { + // "Downgrade" physReg to try to keep physReg from being allocated until + // the next reload from the same SS is allocated. + NextReloadLI->preference = physReg; + DowngradeRegister(cur, physReg); + } + return; + } + DOUT << "no free registers\n"; + + // Compile the spill weights into an array that is better for scanning. + std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f); + for (std::vector<std::pair<unsigned, float> >::iterator + I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) + updateSpillWeights(SpillWeights, I->first, I->second, RC); + + // for each interval in active, update spill weights. + for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); + i != e; ++i) { + unsigned reg = i->first->reg; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + updateSpillWeights(SpillWeights, reg, i->first->weight, RC); + } + + DOUT << "\tassigning stack slot at interval "<< *cur << ":\n"; + + // Find a register to spill. + float minWeight = HUGE_VALF; + unsigned minReg = 0; /*cur->preference*/; // Try the pref register first. + + bool Found = false; + std::vector<std::pair<unsigned,float> > RegsWeights; + if (!minReg || SpillWeights[minReg] == HUGE_VALF) + for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), + e = RC->allocation_order_end(*mf_); i != e; ++i) { + unsigned reg = *i; + float regWeight = SpillWeights[reg]; + if (minWeight > regWeight) + Found = true; + RegsWeights.push_back(std::make_pair(reg, regWeight)); + } + + // If we didn't find a register that is spillable, try aliases? + if (!Found) { + for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), + e = RC->allocation_order_end(*mf_); i != e; ++i) { + unsigned reg = *i; + // No need to worry about if the alias register size < regsize of RC. + // We are going to spill all registers that alias it anyway. + for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) + RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as])); + } + } + + // Sort all potential spill candidates by weight. + std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare()); + minReg = RegsWeights[0].first; + minWeight = RegsWeights[0].second; + if (minWeight == HUGE_VALF) { + // All registers must have inf weight. Just grab one! + minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_); + if (cur->weight == HUGE_VALF || + li_->getApproximateInstructionCount(*cur) == 0) { + // Spill a physical register around defs and uses. + if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) { + // spillPhysRegAroundRegDefsUses may have invalidated iterator stored + // in fixed_. Reset them. + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + LiveInterval *I = IP.first; + if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg)) + IP.second = I->advanceTo(I->begin(), StartPosition); + } + + DowngradedRegs.clear(); + assignRegOrStackSlotAtInterval(cur); + } else { + cerr << "Ran out of registers during register allocation!\n"; + exit(1); + } + return; + } + } + + // Find up to 3 registers to consider as spill candidates. + unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1; + while (LastCandidate > 1) { + if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight)) + break; + --LastCandidate; + } + + DOUT << "\t\tregister(s) with min weight(s): "; + DEBUG(for (unsigned i = 0; i != LastCandidate; ++i) + DOUT << tri_->getName(RegsWeights[i].first) + << " (" << RegsWeights[i].second << ")\n"); + + // If the current has the minimum weight, we need to spill it and + // add any added intervals back to unhandled, and restart + // linearscan. + if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { + DOUT << "\t\t\tspilling(c): " << *cur << '\n'; + SmallVector<LiveInterval*, 8> spillIs; + std::vector<LiveInterval*> added; + + if (!NewSpillFramework) { + added = li_->addIntervalsForSpills(*cur, spillIs, loopInfo, *vrm_); + } else { + added = spiller_->spill(cur); + } + + std::sort(added.begin(), added.end(), LISorter()); + addStackInterval(cur, ls_, li_, mri_, *vrm_); + if (added.empty()) + return; // Early exit if all spills were folded. + + // Merge added with unhandled. Note that we have already sorted + // intervals returned by addIntervalsForSpills by their starting + // point. + // This also update the NextReloadMap. That is, it adds mapping from a + // register defined by a reload from SS to the next reload from SS in the + // same basic block. + MachineBasicBlock *LastReloadMBB = 0; + LiveInterval *LastReload = 0; + int LastReloadSS = VirtRegMap::NO_STACK_SLOT; + for (unsigned i = 0, e = added.size(); i != e; ++i) { + LiveInterval *ReloadLi = added[i]; + if (ReloadLi->weight == HUGE_VALF && + li_->getApproximateInstructionCount(*ReloadLi) == 0) { + unsigned ReloadIdx = ReloadLi->beginNumber(); + MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); + int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); + if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { + // Last reload of same SS is in the same MBB. We want to try to + // allocate both reloads the same register and make sure the reg + // isn't clobbered in between if at all possible. + assert(LastReload->beginNumber() < ReloadIdx); + NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); + } + LastReloadMBB = ReloadMBB; + LastReload = ReloadLi; + LastReloadSS = ReloadSS; + } + unhandled_.push(ReloadLi); + } + return; + } + + ++NumBacktracks; + + // Push the current interval back to unhandled since we are going + // to re-run at least this iteration. Since we didn't modify it it + // should go back right in the front of the list + unhandled_.push(cur); + + assert(TargetRegisterInfo::isPhysicalRegister(minReg) && + "did not choose a register to spill?"); + + // We spill all intervals aliasing the register with + // minimum weight, rollback to the interval with the earliest + // start point and let the linear scan algorithm run again + SmallVector<LiveInterval*, 8> spillIs; + + // Determine which intervals have to be spilled. + findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs); + + // Set of spilled vregs (used later to rollback properly) + SmallSet<unsigned, 8> spilled; + + // The earliest start of a Spilled interval indicates up to where + // in handled we need to roll back + + unsigned earliestStart = cur->beginNumber(); + LiveInterval *earliestStartInterval = cur; + + // Spill live intervals of virtual regs mapped to the physical register we + // want to clear (and its aliases). We only spill those that overlap with the + // current interval as the rest do not affect its allocation. we also keep + // track of the earliest start of all spilled live intervals since this will + // mark our rollback point. + std::vector<LiveInterval*> added; + while (!spillIs.empty()) { + bool epicFail = false; + LiveInterval *sli = spillIs.back(); + spillIs.pop_back(); + DOUT << "\t\t\tspilling(a): " << *sli << '\n'; + earliestStart = std::min(earliestStart, sli->beginNumber()); + earliestStartInterval = + (earliestStartInterval->beginNumber() < sli->beginNumber()) ? + earliestStartInterval : sli; + + if (earliestStartInterval->beginNumber()!=earliestStart) { + epicFail |= true; + std::cerr << "What the 1 - " + << "earliestStart = " << earliestStart + << "earliestStartInterval = " << earliestStartInterval->beginNumber() + << "\n"; + } + + std::vector<LiveInterval*> newIs; + if (!NewSpillFramework) { + newIs = li_->addIntervalsForSpills(*sli, spillIs, loopInfo, *vrm_); + } else { + newIs = spiller_->spill(sli); + } + addStackInterval(sli, ls_, li_, mri_, *vrm_); + std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); + spilled.insert(sli->reg); + + if (earliestStartInterval->beginNumber()!=earliestStart) { + epicFail |= true; + std::cerr << "What the 2 - " + << "earliestStart = " << earliestStart + << "earliestStartInterval = " << earliestStartInterval->beginNumber() + << "\n"; + } + + if (epicFail) { + //abort(); + } + } + + earliestStart = earliestStartInterval->beginNumber(); + + DOUT << "\t\trolling back to: " << earliestStart << '\n'; + + // Scan handled in reverse order up to the earliest start of a + // spilled live interval and undo each one, restoring the state of + // unhandled. + while (!handled_.empty()) { + LiveInterval* i = handled_.back(); + // If this interval starts before t we are done. + if (i->beginNumber() < earliestStart) + break; + DOUT << "\t\t\tundo changes for: " << *i << '\n'; + handled_.pop_back(); + + // When undoing a live interval allocation we must know if it is active or + // inactive to properly update regUse_ and the VirtRegMap. + IntervalPtrs::iterator it; + if ((it = FindIntervalInVector(active_, i)) != active_.end()) { + active_.erase(it); + assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); + if (!spilled.count(i->reg)) + unhandled_.push(i); + delRegUse(vrm_->getPhys(i->reg)); + vrm_->clearVirt(i->reg); + } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) { + inactive_.erase(it); + assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); + if (!spilled.count(i->reg)) + unhandled_.push(i); + vrm_->clearVirt(i->reg); + } else { + assert(TargetRegisterInfo::isVirtualRegister(i->reg) && + "Can only allocate virtual registers!"); + vrm_->clearVirt(i->reg); + unhandled_.push(i); + } + + DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg); + if (ii == DowngradeMap.end()) + // It interval has a preference, it must be defined by a copy. Clear the + // preference now since the source interval allocation may have been + // undone as well. + i->preference = 0; + else { + UpgradeRegister(ii->second); + } + } + + // Rewind the iterators in the active, inactive, and fixed lists back to the + // point we reverted to. + RevertVectorIteratorsTo(active_, earliestStart); + RevertVectorIteratorsTo(inactive_, earliestStart); + RevertVectorIteratorsTo(fixed_, earliestStart); + + // Scan the rest and undo each interval that expired after t and + // insert it in active (the next iteration of the algorithm will + // put it in inactive if required) + for (unsigned i = 0, e = handled_.size(); i != e; ++i) { + LiveInterval *HI = handled_[i]; + if (!HI->expiredAt(earliestStart) && + HI->expiredAt(cur->beginNumber())) { + DOUT << "\t\t\tundo changes for: " << *HI << '\n'; + active_.push_back(std::make_pair(HI, HI->begin())); + assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); + addRegUse(vrm_->getPhys(HI->reg)); + } + } + + // Merge added with unhandled. + // This also update the NextReloadMap. That is, it adds mapping from a + // register defined by a reload from SS to the next reload from SS in the + // same basic block. + MachineBasicBlock *LastReloadMBB = 0; + LiveInterval *LastReload = 0; + int LastReloadSS = VirtRegMap::NO_STACK_SLOT; + std::sort(added.begin(), added.end(), LISorter()); + for (unsigned i = 0, e = added.size(); i != e; ++i) { + LiveInterval *ReloadLi = added[i]; + if (ReloadLi->weight == HUGE_VALF && + li_->getApproximateInstructionCount(*ReloadLi) == 0) { + unsigned ReloadIdx = ReloadLi->beginNumber(); + MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); + int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); + if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { + // Last reload of same SS is in the same MBB. We want to try to + // allocate both reloads the same register and make sure the reg + // isn't clobbered in between if at all possible. + assert(LastReload->beginNumber() < ReloadIdx); + NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); + } + LastReloadMBB = ReloadMBB; + LastReload = ReloadLi; + LastReloadSS = ReloadSS; + } + unhandled_.push(ReloadLi); + } +} + +unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC, + unsigned MaxInactiveCount, + SmallVector<unsigned, 256> &inactiveCounts, + bool SkipDGRegs) { + unsigned FreeReg = 0; + unsigned FreeRegInactiveCount = 0; + + TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_); + TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_); + assert(I != E && "No allocatable register in this register class!"); + + // Scan for the first available register. + for (; I != E; ++I) { + unsigned Reg = *I; + // Ignore "downgraded" registers. + if (SkipDGRegs && DowngradedRegs.count(Reg)) + continue; + if (isRegAvail(Reg)) { + FreeReg = Reg; + if (FreeReg < inactiveCounts.size()) + FreeRegInactiveCount = inactiveCounts[FreeReg]; + else + FreeRegInactiveCount = 0; + break; + } + } + + // If there are no free regs, or if this reg has the max inactive count, + // return this register. + if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) + return FreeReg; + + // Continue scanning the registers, looking for the one with the highest + // inactive count. Alkis found that this reduced register pressure very + // slightly on X86 (in rev 1.94 of this file), though this should probably be + // reevaluated now. + for (; I != E; ++I) { + unsigned Reg = *I; + // Ignore "downgraded" registers. + if (SkipDGRegs && DowngradedRegs.count(Reg)) + continue; + if (isRegAvail(Reg) && Reg < inactiveCounts.size() && + FreeRegInactiveCount < inactiveCounts[Reg]) { + FreeReg = Reg; + FreeRegInactiveCount = inactiveCounts[Reg]; + if (FreeRegInactiveCount == MaxInactiveCount) + break; // We found the one with the max inactive count. + } + } + + return FreeReg; +} + +/// getFreePhysReg - return a free physical register for this virtual register +/// interval if we have one, otherwise return 0. +unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { + SmallVector<unsigned, 256> inactiveCounts; + unsigned MaxInactiveCount = 0; + + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); + + for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); + i != e; ++i) { + unsigned reg = i->first->reg; + assert(TargetRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + + // If this is not in a related reg class to the register we're allocating, + // don't check it. + const TargetRegisterClass *RegRC = mri_->getRegClass(reg); + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { + reg = vrm_->getPhys(reg); + if (inactiveCounts.size() <= reg) + inactiveCounts.resize(reg+1); + ++inactiveCounts[reg]; + MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]); + } + } + + // If copy coalescer has assigned a "preferred" register, check if it's + // available first. + if (cur->preference) { + DOUT << "(preferred: " << tri_->getName(cur->preference) << ") "; + if (isRegAvail(cur->preference) && + RC->contains(cur->preference)) + return cur->preference; + } + + if (!DowngradedRegs.empty()) { + unsigned FreeReg = getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, + true); + if (FreeReg) + return FreeReg; + } + return getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, false); +} + +FunctionPass* llvm::createLinearScanRegisterAllocator() { + return new RALinScan(); +} diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp new file mode 100644 index 0000000..e1cc20c --- /dev/null +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -0,0 +1,1068 @@ +//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This register allocator allocates registers to a basic block at a time, +// attempting to keep values in registers and reusing registers as appropriate. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); + +static RegisterRegAlloc + localRegAlloc("local", "local register allocator", + createLocalRegisterAllocator); + +namespace { + class VISIBILITY_HIDDEN RALocal : public MachineFunctionPass { + public: + static char ID; + RALocal() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {} + private: + const TargetMachine *TM; + MachineFunction *MF; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // StackSlotForVirtReg - Maps virtual regs to the frame index where these + // values are spilled. + IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; + + // Virt2PhysRegMap - This map contains entries for each virtual register + // that is currently available in a physical register. + IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap; + + unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { + return Virt2PhysRegMap[VirtReg]; + } + + // PhysRegsUsed - This array is effectively a map, containing entries for + // each physical register that currently has a value (ie, it is in + // Virt2PhysRegMap). The value mapped to is the virtual register + // corresponding to the physical register (the inverse of the + // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned + // because it is used by a future instruction, and to -2 if it is not + // allocatable. If the entry for a physical register is -1, then the + // physical register is "not in the map". + // + std::vector<int> PhysRegsUsed; + + // PhysRegsUseOrder - This contains a list of the physical registers that + // currently have a virtual register value in them. This list provides an + // ordering of registers, imposing a reallocation order. This list is only + // used if all registers are allocated and we have to spill one, in which + // case we spill the least recently used register. Entries at the front of + // the list are the least recently used registers, entries at the back are + // the most recently used. + // + std::vector<unsigned> PhysRegsUseOrder; + + // Virt2LastUseMap - This maps each virtual register to its last use + // (MachineInstr*, operand index pair). + IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor> + Virt2LastUseMap; + + std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + return Virt2LastUseMap[Reg]; + } + + // VirtRegModified - This bitset contains information about which virtual + // registers need to be spilled back to memory when their registers are + // scavenged. If a virtual register has simply been rematerialized, there + // is no reason to spill it to memory when we need the register back. + // + BitVector VirtRegModified; + + // UsedInMultipleBlocks - Tracks whether a particular register is used in + // more than one block. + BitVector UsedInMultipleBlocks; + + void markVirtRegModified(unsigned Reg, bool Val = true) { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + Reg -= TargetRegisterInfo::FirstVirtualRegister; + if (Val) + VirtRegModified.set(Reg); + else + VirtRegModified.reset(Reg); + } + + bool isVirtRegModified(unsigned Reg) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size() + && "Illegal virtual register!"); + return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; + } + + void AddToPhysRegsUseOrder(unsigned Reg) { + std::vector<unsigned>::iterator It = + std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg); + if (It != PhysRegsUseOrder.end()) + PhysRegsUseOrder.erase(It); + PhysRegsUseOrder.push_back(Reg); + } + + void MarkPhysRegRecentlyUsed(unsigned Reg) { + if (PhysRegsUseOrder.empty() || + PhysRegsUseOrder.back() == Reg) return; // Already most recently used + + for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) + if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) { + unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle + PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1); + // Add it to the end of the list + PhysRegsUseOrder.push_back(RegMatch); + if (RegMatch == Reg) + return; // Found an exact match, exit early + } + } + + public: + virtual const char *getPassName() const { + return "Local Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// runOnMachineFunction - Register allocate the whole function + bool runOnMachineFunction(MachineFunction &Fn); + + /// AllocateBasicBlock - Register allocate the specified basic block. + void AllocateBasicBlock(MachineBasicBlock &MBB); + + + /// areRegsEqual - This method returns true if the specified registers are + /// related to each other. To do this, it checks to see if they are equal + /// or if the first register is in the alias set of the second register. + /// + bool areRegsEqual(unsigned R1, unsigned R2) const { + if (R1 == R2) return true; + for (const unsigned *AliasSet = TRI->getAliasSet(R2); + *AliasSet; ++AliasSet) { + if (*AliasSet == R1) return true; + } + return false; + } + + /// getStackSpaceFor - This returns the frame index of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// removePhysReg - This method marks the specified physical register as no + /// longer being in use. + /// + void removePhysReg(unsigned PhysReg); + + /// spillVirtReg - This method spills the value specified by PhysReg into + /// the virtual register slot specified by VirtReg. It then updates the RA + /// data structures to indicate the fact that PhysReg is now available. + /// + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned VirtReg, unsigned PhysReg); + + /// spillPhysReg - This method spills the specified physical register into + /// the virtual register slot associated with it. If OnlyVirtRegs is set to + /// true, then the request is ignored if the physical register does not + /// contain a virtual register. + /// + void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs = false); + + /// assignVirtToPhysReg - This method updates local state so that we know + /// that PhysReg is the proper container for VirtReg now. The physical + /// register must not be used for anything else when this is called. + /// + void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); + + /// isPhysRegAvailable - Return true if the specified physical register is + /// free and available for use. This also includes checking to see if + /// aliased registers are all free... + /// + bool isPhysRegAvailable(unsigned PhysReg) const; + + /// getFreeReg - Look to see if there is a free register available in the + /// specified register class. If not, return 0. + /// + unsigned getFreeReg(const TargetRegisterClass *RC); + + /// getReg - Find a physical register to hold the specified virtual + /// register. If all compatible physical registers are used, this method + /// spills the last used virtual register to the stack, and uses that + /// register. If NoFree is true, that means the caller knows there isn't + /// a free register, do not call getFreeReg(). + unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg, bool NoFree = false); + + /// reloadVirtReg - This method transforms the specified virtual + /// register use to refer to a physical register. This method may do this + /// in one of several ways: if the register is available in a physical + /// register already, it uses that physical register. If the value is not + /// in a physical register, and if there are physical registers available, + /// it loads it into a register. If register pressure is high, and it is + /// possible, it tries to fold the load of the virtual register into the + /// instruction itself. It avoids doing this if register pressure is low to + /// improve the chance that subsequent instructions can use the reloaded + /// value. This method returns the modified instruction. + /// + MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum, SmallSet<unsigned, 4> &RRegs); + + /// ComputeLocalLiveness - Computes liveness of registers within a basic + /// block, setting the killed/dead flags as appropriate. + void ComputeLocalLiveness(MachineBasicBlock& MBB); + + void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, + unsigned PhysReg); + }; + char RALocal::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual register +/// to be held on the stack. +int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + int SS = StackSlotForVirtReg[VirtReg]; + if (SS != -1) + return SS; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot... + StackSlotForVirtReg[VirtReg] = FrameIdx; + return FrameIdx; +} + + +/// removePhysReg - This method marks the specified physical register as no +/// longer being in use. +/// +void RALocal::removePhysReg(unsigned PhysReg) { + PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used + + std::vector<unsigned>::iterator It = + std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg); + if (It != PhysRegsUseOrder.end()) + PhysRegsUseOrder.erase(It); +} + + +/// spillVirtReg - This method spills the value specified by PhysReg into the +/// virtual register slot specified by VirtReg. It then updates the RA data +/// structures to indicate the fact that PhysReg is now available. +/// +void RALocal::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + assert(VirtReg && "Spilling a physical register is illegal!" + " Must not have appropriate kill for the register or use exists beyond" + " the intended one."); + DOUT << " Spilling register " << TRI->getName(PhysReg) + << " containing %reg" << VirtReg; + + if (!isVirtRegModified(VirtReg)) { + DOUT << " which has not been modified, so no store necessary!"; + std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg); + if (LastUse.first) + LastUse.first->getOperand(LastUse.second).setIsKill(); + } else { + // Otherwise, there is a virtual register corresponding to this physical + // register. We only need to spill it into its stack slot if it has been + // modified. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DOUT << " to stack slot #" << FrameIndex; + // If the instruction reads the register that's spilled, (e.g. this can + // happen if it is a move to a physical register), then the spill + // instruction is not a kill. + bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg)); + TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC); + ++NumStores; // Update statistics + } + + getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available + + DOUT << "\n"; + removePhysReg(PhysReg); +} + + +/// spillPhysReg - This method spills the specified physical register into the +/// virtual register slot associated with it. If OnlyVirtRegs is set to true, +/// then the request is ignored if the physical register does not contain a +/// virtual register. +/// +void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs) { + if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! + assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); + if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) + spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); + } else { + // If the selected register aliases any other registers, we must make + // sure that one of the aliases isn't alive. + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register. + PhysRegsUsed[*AliasSet] != -2) // If allocatable. + if (PhysRegsUsed[*AliasSet]) + spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); + } +} + + +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. +/// +void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); + // Update information to note the fact that this register was just used, and + // it holds VirtReg. + PhysRegsUsed[PhysReg] = VirtReg; + getVirt2PhysRegMapSlot(VirtReg) = PhysReg; + AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg +} + + +/// isPhysRegAvailable - Return true if the specified physical register is free +/// and available for use. This also includes checking to see if aliased +/// registers are all free... +/// +bool RALocal::isPhysRegAvailable(unsigned PhysReg) const { + if (PhysRegsUsed[PhysReg] != -1) return false; + + // If the selected register aliases any other allocated registers, it is + // not free! + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use? + return false; // Can't use this reg then. + return true; +} + + +/// getFreeReg - Look to see if there is a free register available in the +/// specified register class. If not, return 0. +/// +unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) { + // Get iterators defining the range of registers that are valid to allocate in + // this class, which also specifies the preferred allocation order. + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + for (; RI != RE; ++RI) + if (isPhysRegAvailable(*RI)) { // Is reg unused? + assert(*RI != 0 && "Cannot use register!"); + return *RI; // Found an unused register! + } + return 0; +} + + +/// getReg - Find a physical register to hold the specified virtual +/// register. If all compatible physical registers are used, this method spills +/// the last used virtual register to the stack, and uses that register. +/// +unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned VirtReg, bool NoFree) { + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + + // First check to see if we have a free register of the requested type... + unsigned PhysReg = NoFree ? 0 : getFreeReg(RC); + + // If we didn't find an unused register, scavenge one now! + if (PhysReg == 0) { + assert(!PhysRegsUseOrder.empty() && "No allocated registers??"); + + // Loop over all of the preallocated registers from the least recently used + // to the most recently used. When we find one that is capable of holding + // our register, use it. + for (unsigned i = 0; PhysReg == 0; ++i) { + assert(i != PhysRegsUseOrder.size() && + "Couldn't find a register of the appropriate class!"); + + unsigned R = PhysRegsUseOrder[i]; + + // We can only use this register if it holds a virtual register (ie, it + // can be spilled). Do not use it if it is an explicitly allocated + // physical register! + assert(PhysRegsUsed[R] != -1 && + "PhysReg in PhysRegsUseOrder, but is not allocated?"); + if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) { + // If the current register is compatible, use it. + if (RC->contains(R)) { + PhysReg = R; + break; + } else { + // If one of the registers aliased to the current register is + // compatible, use it. + for (const unsigned *AliasIt = TRI->getAliasSet(R); + *AliasIt; ++AliasIt) { + if (RC->contains(*AliasIt) && + // If this is pinned down for some reason, don't use it. For + // example, if CL is pinned, and we run across CH, don't use + // CH as justification for using scavenging ECX (which will + // fail). + PhysRegsUsed[*AliasIt] != 0 && + + // Make sure the register is allocatable. Don't allocate SIL on + // x86-32. + PhysRegsUsed[*AliasIt] != -2) { + PhysReg = *AliasIt; // Take an aliased register + break; + } + } + } + } + } + + assert(PhysReg && "Physical register not assigned!?!?"); + + // At this point PhysRegsUseOrder[i] is the least recently used register of + // compatible register class. Spill it to memory and reap its remains. + spillPhysReg(MBB, I, PhysReg); + } + + // Now that we know which register we need to assign this to, do it now! + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; +} + + +/// reloadVirtReg - This method transforms the specified virtual +/// register use to refer to a physical register. This method may do this in +/// one of several ways: if the register is available in a physical register +/// already, it uses that physical register. If the value is not in a physical +/// register, and if there are physical registers available, it loads it into a +/// register. If register pressure is high, and it is possible, it tries to +/// fold the load of the virtual register into the instruction itself. It +/// avoids doing this if register pressure is low to improve the chance that +/// subsequent instructions can use the reloaded value. This method returns the +/// modified instruction. +/// +MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum, + SmallSet<unsigned, 4> &ReloadedRegs) { + unsigned VirtReg = MI->getOperand(OpNum).getReg(); + + // If the virtual register is already available, just update the instruction + // and return. + if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { + MarkPhysRegRecentlyUsed(PR); // Already have this value available! + MI->getOperand(OpNum).setReg(PR); // Assign the input register + getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + return MI; + } + + // Otherwise, we need to fold it into the current instruction, or reload it. + // If we have registers available to hold the value, use them. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + unsigned PhysReg = getFreeReg(RC); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + + if (PhysReg) { // Register is available, allocate it! + assignVirtToPhysReg(VirtReg, PhysReg); + } else { // No registers available. + // Force some poor hapless value out of the register file to + // make room for the new register, and reload it. + PhysReg = getReg(MBB, MI, VirtReg, true); + } + + markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded + + DOUT << " Reloading %reg" << VirtReg << " into " + << TRI->getName(PhysReg) << "\n"; + + // Add move instruction(s) + TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); + ++NumLoads; // Update statistics + + MF->getRegInfo().setPhysRegUsed(PhysReg); + MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register + getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + + if (!ReloadedRegs.insert(PhysReg)) { + cerr << "Ran out of registers during register allocation!\n"; + if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { + cerr << "Please check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(cerr.stream(), TM); + } + exit(1); + } + for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); + *SubRegs; ++SubRegs) { + if (!ReloadedRegs.insert(*SubRegs)) { + cerr << "Ran out of registers during register allocation!\n"; + if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { + cerr << "Please check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(cerr.stream(), TM); + } + exit(1); + } + } + + return MI; +} + +/// isReadModWriteImplicitKill - True if this is an implicit kill for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + MO.isDef() && !MO.isDead()) + return true; + } + return false; +} + +/// isReadModWriteImplicitDef - True if this is an implicit def for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + !MO.isDef() && MO.isKill()) + return true; + } + return false; +} + +// precedes - Helper function to determine with MachineInstr A +// precedes MachineInstr B within the same MBB. +static bool precedes(MachineBasicBlock::iterator A, + MachineBasicBlock::iterator B) { + if (A == B) + return false; + + MachineBasicBlock::iterator I = A->getParent()->begin(); + while (I != A->getParent()->end()) { + if (I == A) + return true; + else if (I == B) + return false; + + ++I; + } + + return false; +} + +/// ComputeLocalLiveness - Computes liveness of registers within a basic +/// block, setting the killed/dead flags as appropriate. +void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { + MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo(); + // Keep track of the most recently seen previous use or def of each reg, + // so that we can update them with dead/kill markers. + DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand& MO = I->getOperand(i); + // Uses don't trigger any flags, but we need to save + // them for later. Also, we have to process these + // _before_ processing the defs, since an instr + // uses regs before it defs them. + if (MO.isReg() && MO.getReg() && MO.isUse()) { + LastUseDef[MO.getReg()] = std::make_pair(I, i); + + + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; + + const unsigned* Aliases = TRI->getAliasSet(MO.getReg()); + if (Aliases) { + while (*Aliases) { + DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator + alias = LastUseDef.find(*Aliases); + + if (alias != LastUseDef.end() && alias->second.first != I) + LastUseDef[*Aliases] = std::make_pair(I, i); + + ++Aliases; + } + } + } + } + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand& MO = I->getOperand(i); + // Defs others than 2-addr redefs _do_ trigger flag changes: + // - A def followed by a def is dead + // - A use followed by a def is a kill + if (MO.isReg() && MO.getReg() && MO.isDef()) { + DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator + last = LastUseDef.find(MO.getReg()); + if (last != LastUseDef.end()) { + // Check if this is a two address instruction. If so, then + // the def does not kill the use. + if (last->second.first == I && + I->isRegTiedToUseOperand(i)) + continue; + + MachineOperand& lastUD = + last->second.first->getOperand(last->second.second); + if (lastUD.isDef()) + lastUD.setIsDead(true); + else + lastUD.setIsKill(true); + } + + LastUseDef[MO.getReg()] = std::make_pair(I, i); + } + } + } + + // Live-out (of the function) registers contain return values of the function, + // so we need to make sure they are alive at return time. + if (!MBB.empty() && MBB.back().getDesc().isReturn()) { + MachineInstr* Ret = &MBB.back(); + for (MachineRegisterInfo::liveout_iterator + I = MF->getRegInfo().liveout_begin(), + E = MF->getRegInfo().liveout_end(); I != E; ++I) + if (!Ret->readsRegister(*I)) { + Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); + LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1); + } + } + + // Finally, loop over the final use/def of each reg + // in the block and determine if it is dead. + for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator + I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) { + MachineInstr* MI = I->second.first; + unsigned idx = I->second.second; + MachineOperand& MO = MI->getOperand(idx); + + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg()); + + // A crude approximation of "live-out" calculation + bool usedOutsideBlock = isPhysReg ? false : + UsedInMultipleBlocks.test(MO.getReg() - + TargetRegisterInfo::FirstVirtualRegister); + if (!isPhysReg && !usedOutsideBlock) + for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), + UE = MRI.reg_end(); UI != UE; ++UI) + // Two cases: + // - used in another block + // - used in the same block before it is defined (loop) + if (UI->getParent() != &MBB || + (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) { + UsedInMultipleBlocks.set(MO.getReg() - + TargetRegisterInfo::FirstVirtualRegister); + usedOutsideBlock = true; + break; + } + + // Physical registers and those that are not live-out of the block + // are killed/dead at their last use/def within this block. + if (isPhysReg || !usedOutsideBlock) { + if (MO.isUse()) { + // Don't mark uses that are tied to defs as kills. + if (!MI->isRegTiedToDefOperand(idx)) + MO.setIsKill(true); + } else + MO.setIsDead(true); + } + } +} + +void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII = MBB.begin(); + + DEBUG(const BasicBlock *LBB = MBB.getBasicBlock(); + if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName()); + + // Add live-in registers as active. + for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), + E = MBB.livein_end(); I != E; ++I) { + unsigned Reg = *I; + MF->getRegInfo().setPhysRegUsed(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(Reg); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + AddToPhysRegsUseOrder(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*SubRegs); + } + } + } + + ComputeLocalLiveness(MBB); + + // Otherwise, sequentially allocate each instruction in the MBB. + while (MII != MBB.end()) { + MachineInstr *MI = MII++; + const TargetInstrDesc &TID = MI->getDesc(); + DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI; + DOUT << " Regs have values: "; + for (unsigned i = 0; i != TRI->getNumRegs(); ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + DOUT << "[" << TRI->getName(i) + << ",%reg" << PhysRegsUsed[i] << "] "; + DOUT << "\n"); + + // Loop over the implicit uses, making sure that they are at the head of the + // use order list, so they don't get reallocated. + if (TID.ImplicitUses) { + for (const unsigned *ImplicitUses = TID.ImplicitUses; + *ImplicitUses; ++ImplicitUses) + MarkPhysRegRecentlyUsed(*ImplicitUses); + } + + SmallVector<unsigned, 8> Kills; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isKill()) { + if (!MO.isImplicit()) + Kills.push_back(MO.getReg()); + else if (!isReadModWriteImplicitKill(MI, MO.getReg())) + // These are extra physical register kills when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + Kills.push_back(MO.getReg()); + } + } + + // If any physical regs are earlyclobber, spill any value they might + // have in them, then mark them unallocatable. + // If any virtual regs are earlyclobber, allocate them now (before + // freeing inputs that are killed). + if (MI->getOpcode()==TargetInstrInfo::INLINEASM) { + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.isEarlyClobber() && + MO.getReg()) { + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = getReg(MBB, MI, DestVirtReg); + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + getVirtRegLastUse(DestVirtReg) = + std::make_pair((MachineInstr*)0, 0); + DOUT << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"; + MO.setReg(DestPhysReg); // Assign the earlyclobber register + } else { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(Reg); + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + MF->getRegInfo().setPhysRegUsed(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(*SubRegs); + } + } + } + } + } + } + + // Get the used operands into registers. This has the potential to spill + // incoming values if we are out of registers. Note that we completely + // ignore physical register uses here. We assume that if an explicit + // physical register is referenced by the instruction, that it is guaranteed + // to be live-in, or the input is badly hosed. + // + SmallSet<unsigned, 4> ReloadedRegs; + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + // here we are looking for only used operands (never def&use) + if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + MI = reloadVirtReg(MBB, MI, i, ReloadedRegs); + } + + // If this instruction is the last user of this register, kill the + // value, freeing the register being used, so it doesn't need to be + // spilled to memory. + // + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned VirtReg = Kills[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + // If the virtual register was never materialized into a register, it + // might not be in the map, but it won't hurt to zero it out anyway. + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } else { + assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) && + "Silently clearing a virtual register?"); + } + + if (PhysReg) { + DOUT << " Last use of " << TRI->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(PhysReg); + for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + DOUT << " Last use of " + << TRI->getName(*SubRegs) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(*SubRegs); + } + } + } + } + + // Loop over all of the operands of the instruction, spilling registers that + // are defined, and marking explicit destinations in the PhysRegsUsed map. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() && + !MO.isEarlyClobber() && + TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(Reg); + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + MF->getRegInfo().setPhysRegUsed(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(*SubRegs); + } + } + } + } + + // Loop over the implicit defs, spilling them as well. + if (TID.ImplicitDefs) { + for (const unsigned *ImplicitDefs = TID.ImplicitDefs; + *ImplicitDefs; ++ImplicitDefs) { + unsigned Reg = *ImplicitDefs; + if (PhysRegsUsed[Reg] != -2) { + spillPhysReg(MBB, MI, Reg, true); + AddToPhysRegsUseOrder(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + } + MF->getRegInfo().setPhysRegUsed(Reg); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + AddToPhysRegsUseOrder(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*SubRegs); + } + } + } + } + + SmallVector<unsigned, 8> DeadDefs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDead()) + DeadDefs.push_back(MO.getReg()); + } + + // Okay, we have allocated all of the source operands and spilled any values + // that would be destroyed by defs of this instruction. Loop over the + // explicit defs and assign them to a register, spilling incoming values if + // we need to scavenge a register. + // + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() && + !MO.isEarlyClobber() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = getReg(MBB, MI, DestVirtReg); + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); + DOUT << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"; + MO.setReg(DestPhysReg); // Assign the output register + } + } + + // If this instruction defines any registers that are immediately dead, + // kill them now. + // + for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { + unsigned VirtReg = DeadDefs[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + assert(PhysReg != 0); + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } + + if (PhysReg) { + DOUT << " Register " << TRI->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it from live set\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Register " << TRI->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it from live set\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Finally, if this is a noop copy instruction, zap it. (Except that if + // the copy is dead, it must be kept to avoid messing up liveness info for + // the register scavenger. See pr4100.) + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + SrcReg == DstReg && DeadDefs.empty()) + MBB.erase(MI); + } + + MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + + // Spill all physical registers holding virtual registers now. + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { + if (unsigned VirtReg = PhysRegsUsed[i]) + spillVirtReg(MBB, MI, VirtReg, i); + else + removePhysReg(i); + } + +#if 0 + // This checking code is very expensive. + bool AllOk = true; + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) + if (unsigned PR = Virt2PhysRegMap[i]) { + cerr << "Register still mapped: " << i << " -> " << PR << "\n"; + AllOk = false; + } + assert(AllOk && "Virtual registers still in phys regs?"); +#endif + + // Clear any physical register which appear live at the end of the basic + // block, but which do not hold any virtual registers. e.g., the stack + // pointer. + PhysRegsUseOrder.clear(); +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RALocal::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "Machine Function " << "\n"; + MF = &Fn; + TM = &Fn.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + + PhysRegsUsed.assign(TRI->getNumRegs(), -1); + + // At various places we want to efficiently check to see whether a register + // is allocatable. To handle this, we mark all unallocatable registers as + // being pinned down, permanently. + { + BitVector Allocable = TRI->getAllocatableSet(Fn); + for (unsigned i = 0, e = Allocable.size(); i != e; ++i) + if (!Allocable[i]) + PhysRegsUsed[i] = -2; // Mark the reg unallocable. + } + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); + StackSlotForVirtReg.grow(LastVirtReg); + Virt2PhysRegMap.grow(LastVirtReg); + Virt2LastUseMap.grow(LastVirtReg); + VirtRegModified.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister); + UsedInMultipleBlocks.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) + AllocateBasicBlock(*MBB); + + StackSlotForVirtReg.clear(); + PhysRegsUsed.clear(); + VirtRegModified.clear(); + UsedInMultipleBlocks.clear(); + Virt2PhysRegMap.clear(); + Virt2LastUseMap.clear(); + return true; +} + +FunctionPass *llvm::createLocalRegisterAllocator() { + return new RALocal(); +} diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp new file mode 100644 index 0000000..61450a7 --- /dev/null +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -0,0 +1,871 @@ +//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based +// register allocator for LLVM. This allocator works by constructing a PBQP +// problem representing the register allocation problem under consideration, +// solving this using a PBQP solver, and mapping the solution back to a +// register assignment. If any variables are selected for spilling then spill +// code is inserted and the process repeated. +// +// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned +// for register allocation. For more information on PBQP for register +// allocation, see the following papers: +// +// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with +// PBQP. In Proceedings of the 7th Joint Modular Languages Conference +// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361. +// +// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular +// architectures. In Proceedings of the Joint Conference on Languages, +// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York, +// NY, USA, 139-148. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" + +#include "PBQP.h" +#include "VirtRegMap.h" +#include "VirtRegRewriter.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <limits> +#include <map> +#include <memory> +#include <set> +#include <vector> + +using namespace llvm; + +static RegisterRegAlloc +registerPBQPRepAlloc("pbqp", "PBQP register allocator", + createPBQPRegisterAllocator); + +namespace { + + //! + //! PBQP based allocators solve the register allocation problem by mapping + //! register allocation problems to Partitioned Boolean Quadratic + //! Programming problems. + class VISIBILITY_HIDDEN PBQPRegAlloc : public MachineFunctionPass { + public: + + static char ID; + + //! Construct a PBQP register allocator. + PBQPRegAlloc() : MachineFunctionPass((intptr_t)&ID) {} + + //! Return the pass name. + virtual const char* getPassName() const throw() { + return "PBQP Register Allocator"; + } + + //! PBQP analysis usage. + virtual void getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<LiveIntervals>(); + au.addRequiredTransitive<RegisterCoalescer>(); + au.addRequired<LiveStacks>(); + au.addPreserved<LiveStacks>(); + au.addRequired<MachineLoopInfo>(); + au.addPreserved<MachineLoopInfo>(); + au.addRequired<VirtRegMap>(); + MachineFunctionPass::getAnalysisUsage(au); + } + + //! Perform register allocation + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + typedef std::map<const LiveInterval*, unsigned> LI2NodeMap; + typedef std::vector<const LiveInterval*> Node2LIMap; + typedef std::vector<unsigned> AllowedSet; + typedef std::vector<AllowedSet> AllowedSetMap; + typedef std::set<unsigned> RegSet; + typedef std::pair<unsigned, unsigned> RegPair; + typedef std::map<RegPair, PBQPNum> CoalesceMap; + + typedef std::set<LiveInterval*> LiveIntervalSet; + + MachineFunction *mf; + const TargetMachine *tm; + const TargetRegisterInfo *tri; + const TargetInstrInfo *tii; + const MachineLoopInfo *loopInfo; + MachineRegisterInfo *mri; + + LiveIntervals *lis; + LiveStacks *lss; + VirtRegMap *vrm; + + LI2NodeMap li2Node; + Node2LIMap node2LI; + AllowedSetMap allowedSets; + LiveIntervalSet vregIntervalsToAlloc, + emptyVRegIntervals; + + + //! Builds a PBQP cost vector. + template <typename RegContainer> + PBQPVector* buildCostVector(unsigned vReg, + const RegContainer &allowed, + const CoalesceMap &cealesces, + PBQPNum spillCost) const; + + //! \brief Builds a PBQP interference matrix. + //! + //! @return Either a pointer to a non-zero PBQP matrix representing the + //! allocation option costs, or a null pointer for a zero matrix. + //! + //! Expects allowed sets for two interfering LiveIntervals. These allowed + //! sets should contain only allocable registers from the LiveInterval's + //! register class, with any interfering pre-colored registers removed. + template <typename RegContainer> + PBQPMatrix* buildInterferenceMatrix(const RegContainer &allowed1, + const RegContainer &allowed2) const; + + //! + //! Expects allowed sets for two potentially coalescable LiveIntervals, + //! and an estimated benefit due to coalescing. The allowed sets should + //! contain only allocable registers from the LiveInterval's register + //! classes, with any interfering pre-colored registers removed. + template <typename RegContainer> + PBQPMatrix* buildCoalescingMatrix(const RegContainer &allowed1, + const RegContainer &allowed2, + PBQPNum cBenefit) const; + + //! \brief Finds coalescing opportunities and returns them as a map. + //! + //! Any entries in the map are guaranteed coalescable, even if their + //! corresponding live intervals overlap. + CoalesceMap findCoalesces(); + + //! \brief Finds the initial set of vreg intervals to allocate. + void findVRegIntervalsToAlloc(); + + //! \brief Constructs a PBQP problem representation of the register + //! allocation problem for this function. + //! + //! @return a PBQP solver object for the register allocation problem. + pbqp* constructPBQPProblem(); + + //! \brief Adds a stack interval if the given live interval has been + //! spilled. Used to support stack slot coloring. + void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri); + + //! \brief Given a solved PBQP problem maps this solution back to a register + //! assignment. + bool mapPBQPToRegAlloc(pbqp *problem); + + //! \brief Postprocessing before final spilling. Sets basic block "live in" + //! variables. + void finalizeAlloc() const; + + }; + + char PBQPRegAlloc::ID = 0; +} + + +template <typename RegContainer> +PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg, + const RegContainer &allowed, + const CoalesceMap &coalesces, + PBQPNum spillCost) const { + + typedef typename RegContainer::const_iterator AllowedItr; + + // Allocate vector. Additional element (0th) used for spill option + PBQPVector *v = new PBQPVector(allowed.size() + 1); + + (*v)[0] = spillCost; + + // Iterate over the allowed registers inserting coalesce benefits if there + // are any. + unsigned ai = 0; + for (AllowedItr itr = allowed.begin(), end = allowed.end(); + itr != end; ++itr, ++ai) { + + unsigned pReg = *itr; + + CoalesceMap::const_iterator cmItr = + coalesces.find(RegPair(vReg, pReg)); + + // No coalesce - on to the next preg. + if (cmItr == coalesces.end()) + continue; + + // We have a coalesce - insert the benefit. + (*v)[ai + 1] = -cmItr->second; + } + + return v; +} + +template <typename RegContainer> +PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix( + const RegContainer &allowed1, const RegContainer &allowed2) const { + + typedef typename RegContainer::const_iterator RegContainerIterator; + + // Construct a PBQP matrix representing the cost of allocation options. The + // rows and columns correspond to the allocation options for the two live + // intervals. Elements will be infinite where corresponding registers alias, + // since we cannot allocate aliasing registers to interfering live intervals. + // All other elements (non-aliasing combinations) will have zero cost. Note + // that the spill option (element 0,0) has zero cost, since we can allocate + // both intervals to memory safely (the cost for each individual allocation + // to memory is accounted for by the cost vectors for each live interval). + PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1); + + // Assume this is a zero matrix until proven otherwise. Zero matrices occur + // between interfering live ranges with non-overlapping register sets (e.g. + // non-overlapping reg classes, or disjoint sets of allowed regs within the + // same class). The term "overlapping" is used advisedly: sets which do not + // intersect, but contain registers which alias, will have non-zero matrices. + // We optimize zero matrices away to improve solver speed. + bool isZeroMatrix = true; + + + // Row index. Starts at 1, since the 0th row is for the spill option, which + // is always zero. + unsigned ri = 1; + + // Iterate over allowed sets, insert infinities where required. + for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end(); + a1Itr != a1End; ++a1Itr) { + + // Column index, starts at 1 as for row index. + unsigned ci = 1; + unsigned reg1 = *a1Itr; + + for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end(); + a2Itr != a2End; ++a2Itr) { + + unsigned reg2 = *a2Itr; + + // If the row/column regs are identical or alias insert an infinity. + if ((reg1 == reg2) || tri->areAliases(reg1, reg2)) { + (*m)[ri][ci] = std::numeric_limits<PBQPNum>::infinity(); + isZeroMatrix = false; + } + + ++ci; + } + + ++ri; + } + + // If this turns out to be a zero matrix... + if (isZeroMatrix) { + // free it and return null. + delete m; + return 0; + } + + // ...otherwise return the cost matrix. + return m; +} + +template <typename RegContainer> +PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix( + const RegContainer &allowed1, const RegContainer &allowed2, + PBQPNum cBenefit) const { + + typedef typename RegContainer::const_iterator RegContainerIterator; + + // Construct a PBQP Matrix representing the benefits of coalescing. As with + // interference matrices the rows and columns represent allowed registers + // for the LiveIntervals which are (potentially) to be coalesced. The amount + // -cBenefit will be placed in any element representing the same register + // for both intervals. + PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1); + + // Reset costs to zero. + m->reset(0); + + // Assume the matrix is zero till proven otherwise. Zero matrices will be + // optimized away as in the interference case. + bool isZeroMatrix = true; + + // Row index. Starts at 1, since the 0th row is for the spill option, which + // is always zero. + unsigned ri = 1; + + // Iterate over the allowed sets, insert coalescing benefits where + // appropriate. + for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end(); + a1Itr != a1End; ++a1Itr) { + + // Column index, starts at 1 as for row index. + unsigned ci = 1; + unsigned reg1 = *a1Itr; + + for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end(); + a2Itr != a2End; ++a2Itr) { + + // If the row and column represent the same register insert a beneficial + // cost to preference this allocation - it would allow us to eliminate a + // move instruction. + if (reg1 == *a2Itr) { + (*m)[ri][ci] = -cBenefit; + isZeroMatrix = false; + } + + ++ci; + } + + ++ri; + } + + // If this turns out to be a zero matrix... + if (isZeroMatrix) { + // ...free it and return null. + delete m; + return 0; + } + + return m; +} + +PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { + + typedef MachineFunction::const_iterator MFIterator; + typedef MachineBasicBlock::const_iterator MBBIterator; + typedef LiveInterval::const_vni_iterator VNIIterator; + + CoalesceMap coalescesFound; + + // To find coalesces we need to iterate over the function looking for + // copy instructions. + for (MFIterator bbItr = mf->begin(), bbEnd = mf->end(); + bbItr != bbEnd; ++bbItr) { + + const MachineBasicBlock *mbb = &*bbItr; + + for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end(); + iItr != iEnd; ++iItr) { + + const MachineInstr *instr = &*iItr; + unsigned srcReg, dstReg, srcSubReg, dstSubReg; + + // If this isn't a copy then continue to the next instruction. + if (!tii->isMoveInstr(*instr, srcReg, dstReg, srcSubReg, dstSubReg)) + continue; + + // If the registers are already the same our job is nice and easy. + if (dstReg == srcReg) + continue; + + bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg), + dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg); + + // If both registers are physical then we can't coalesce. + if (srcRegIsPhysical && dstRegIsPhysical) + continue; + + // If it's a copy that includes a virtual register but the source and + // destination classes differ then we can't coalesce, so continue with + // the next instruction. + const TargetRegisterClass *srcRegClass = srcRegIsPhysical ? + tri->getPhysicalRegisterRegClass(srcReg) : mri->getRegClass(srcReg); + + const TargetRegisterClass *dstRegClass = dstRegIsPhysical ? + tri->getPhysicalRegisterRegClass(dstReg) : mri->getRegClass(dstReg); + + if (srcRegClass != dstRegClass) + continue; + + // We also need any physical regs to be allocable, coalescing with + // a non-allocable register is invalid. + if (srcRegIsPhysical) { + if (std::find(srcRegClass->allocation_order_begin(*mf), + srcRegClass->allocation_order_end(*mf), srcReg) == + srcRegClass->allocation_order_end(*mf)) + continue; + } + + if (dstRegIsPhysical) { + if (std::find(dstRegClass->allocation_order_begin(*mf), + dstRegClass->allocation_order_end(*mf), dstReg) == + dstRegClass->allocation_order_end(*mf)) + continue; + } + + // If we've made it here we have a copy with compatible register classes. + // We can probably coalesce, but we need to consider overlap. + const LiveInterval *srcLI = &lis->getInterval(srcReg), + *dstLI = &lis->getInterval(dstReg); + + if (srcLI->overlaps(*dstLI)) { + // Even in the case of an overlap we might still be able to coalesce, + // but we need to make sure that no definition of either range occurs + // while the other range is live. + + // Otherwise start by assuming we're ok. + bool badDef = false; + + // Test all defs of the source range. + for (VNIIterator + vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end(); + vniItr != vniEnd; ++vniItr) { + + // If we find a def that kills the coalescing opportunity then + // record it and break from the loop. + if (dstLI->liveAt((*vniItr)->def)) { + badDef = true; + break; + } + } + + // If we have a bad def give up, continue to the next instruction. + if (badDef) + continue; + + // Otherwise test definitions of the destination range. + for (VNIIterator + vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end(); + vniItr != vniEnd; ++vniItr) { + + // We want to make sure we skip the copy instruction itself. + if ((*vniItr)->copy == instr) + continue; + + if (srcLI->liveAt((*vniItr)->def)) { + badDef = true; + break; + } + } + + // As before a bad def we give up and continue to the next instr. + if (badDef) + continue; + } + + // If we make it to here then either the ranges didn't overlap, or they + // did, but none of their definitions would prevent us from coalescing. + // We're good to go with the coalesce. + + float cBenefit = powf(10.0f, loopInfo->getLoopDepth(mbb)) / 5.0; + + coalescesFound[RegPair(srcReg, dstReg)] = cBenefit; + coalescesFound[RegPair(dstReg, srcReg)] = cBenefit; + } + + } + + return coalescesFound; +} + +void PBQPRegAlloc::findVRegIntervalsToAlloc() { + + // Iterate over all live ranges. + for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); + itr != end; ++itr) { + + // Ignore physical ones. + if (TargetRegisterInfo::isPhysicalRegister(itr->first)) + continue; + + LiveInterval *li = itr->second; + + // If this live interval is non-empty we will use pbqp to allocate it. + // Empty intervals we allocate in a simple post-processing stage in + // finalizeAlloc. + if (!li->empty()) { + vregIntervalsToAlloc.insert(li); + } + else { + emptyVRegIntervals.insert(li); + } + } +} + +pbqp* PBQPRegAlloc::constructPBQPProblem() { + + typedef std::vector<const LiveInterval*> LIVector; + typedef std::vector<unsigned> RegVector; + + // This will store the physical intervals for easy reference. + LIVector physIntervals; + + // Start by clearing the old node <-> live interval mappings & allowed sets + li2Node.clear(); + node2LI.clear(); + allowedSets.clear(); + + // Populate physIntervals, update preg use: + for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); + itr != end; ++itr) { + + if (TargetRegisterInfo::isPhysicalRegister(itr->first)) { + physIntervals.push_back(itr->second); + mri->setPhysRegUsed(itr->second->reg); + } + } + + // Iterate over vreg intervals, construct live interval <-> node number + // mappings. + for (LiveIntervalSet::const_iterator + itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end(); + itr != end; ++itr) { + const LiveInterval *li = *itr; + + li2Node[li] = node2LI.size(); + node2LI.push_back(li); + } + + // Get the set of potential coalesces. + CoalesceMap coalesces(findCoalesces()); + + // Construct a PBQP solver for this problem + pbqp *solver = alloc_pbqp(vregIntervalsToAlloc.size()); + + // Resize allowedSets container appropriately. + allowedSets.resize(vregIntervalsToAlloc.size()); + + // Iterate over virtual register intervals to compute allowed sets... + for (unsigned node = 0; node < node2LI.size(); ++node) { + + // Grab pointers to the interval and its register class. + const LiveInterval *li = node2LI[node]; + const TargetRegisterClass *liRC = mri->getRegClass(li->reg); + + // Start by assuming all allocable registers in the class are allowed... + RegVector liAllowed(liRC->allocation_order_begin(*mf), + liRC->allocation_order_end(*mf)); + + // Eliminate the physical registers which overlap with this range, along + // with all their aliases. + for (LIVector::iterator pItr = physIntervals.begin(), + pEnd = physIntervals.end(); pItr != pEnd; ++pItr) { + + if (!li->overlaps(**pItr)) + continue; + + unsigned pReg = (*pItr)->reg; + + // If we get here then the live intervals overlap, but we're still ok + // if they're coalescable. + if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end()) + continue; + + // If we get here then we have a genuine exclusion. + + // Remove the overlapping reg... + RegVector::iterator eraseItr = + std::find(liAllowed.begin(), liAllowed.end(), pReg); + + if (eraseItr != liAllowed.end()) + liAllowed.erase(eraseItr); + + const unsigned *aliasItr = tri->getAliasSet(pReg); + + if (aliasItr != 0) { + // ...and its aliases. + for (; *aliasItr != 0; ++aliasItr) { + RegVector::iterator eraseItr = + std::find(liAllowed.begin(), liAllowed.end(), *aliasItr); + + if (eraseItr != liAllowed.end()) { + liAllowed.erase(eraseItr); + } + } + } + } + + // Copy the allowed set into a member vector for use when constructing cost + // vectors & matrices, and mapping PBQP solutions back to assignments. + allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end()); + + // Set the spill cost to the interval weight, or epsilon if the + // interval weight is zero + PBQPNum spillCost = (li->weight != 0.0) ? + li->weight : std::numeric_limits<PBQPNum>::min(); + + // Build a cost vector for this interval. + add_pbqp_nodecosts(solver, node, + buildCostVector(li->reg, allowedSets[node], coalesces, + spillCost)); + + } + + + // Now add the cost matrices... + for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) { + const LiveInterval *li = node2LI[node1]; + + // Test for live range overlaps and insert interference matrices. + for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) { + const LiveInterval *li2 = node2LI[node2]; + + CoalesceMap::const_iterator cmItr = + coalesces.find(RegPair(li->reg, li2->reg)); + + PBQPMatrix *m = 0; + + if (cmItr != coalesces.end()) { + m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2], + cmItr->second); + } + else if (li->overlaps(*li2)) { + m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]); + } + + if (m != 0) { + add_pbqp_edgecosts(solver, node1, node2, m); + delete m; + } + } + } + + // We're done, PBQP problem constructed - return it. + return solver; +} + +void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, + MachineRegisterInfo* mri) { + int stackSlot = vrm->getStackSlot(spilled->reg); + + if (stackSlot == VirtRegMap::NO_STACK_SLOT) + return; + + const TargetRegisterClass *RC = mri->getRegClass(spilled->reg); + LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC); + + VNInfo *vni; + if (stackInterval.getNumValNums() != 0) + vni = stackInterval.getValNumInfo(0); + else + vni = stackInterval.getNextValue(-0U, 0, lss->getVNInfoAllocator()); + + LiveInterval &rhsInterval = lis->getInterval(spilled->reg); + stackInterval.MergeRangesInAsValue(rhsInterval, vni); +} + +bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) { + + // Set to true if we have any spills + bool anotherRoundNeeded = false; + + // Clear the existing allocation. + vrm->clearAllVirt(); + + // Iterate over the nodes mapping the PBQP solution to a register assignment. + for (unsigned node = 0; node < node2LI.size(); ++node) { + unsigned virtReg = node2LI[node]->reg, + allocSelection = get_pbqp_solution(problem, node); + + // If the PBQP solution is non-zero it's a physical register... + if (allocSelection != 0) { + // Get the physical reg, subtracting 1 to account for the spill option. + unsigned physReg = allowedSets[node][allocSelection - 1]; + + DOUT << "VREG " << virtReg << " -> " << tri->getName(physReg) << "\n"; + + assert(physReg != 0); + + // Add to the virt reg map and update the used phys regs. + vrm->assignVirt2Phys(virtReg, physReg); + } + // ...Otherwise it's a spill. + else { + + // Make sure we ignore this virtual reg on the next round + // of allocation + vregIntervalsToAlloc.erase(&lis->getInterval(virtReg)); + + // Insert spill ranges for this live range + const LiveInterval *spillInterval = node2LI[node]; + double oldSpillWeight = spillInterval->weight; + SmallVector<LiveInterval*, 8> spillIs; + std::vector<LiveInterval*> newSpills = + lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm); + addStackInterval(spillInterval, mri); + + DOUT << "VREG " << virtReg << " -> SPILLED (Cost: " + << oldSpillWeight << ", New vregs: "; + + // Copy any newly inserted live intervals into the list of regs to + // allocate. + for (std::vector<LiveInterval*>::const_iterator + itr = newSpills.begin(), end = newSpills.end(); + itr != end; ++itr) { + + assert(!(*itr)->empty() && "Empty spill range."); + + DOUT << (*itr)->reg << " "; + + vregIntervalsToAlloc.insert(*itr); + } + + DOUT << ")\n"; + + // We need another round if spill intervals were added. + anotherRoundNeeded |= !newSpills.empty(); + } + } + + return !anotherRoundNeeded; +} + +void PBQPRegAlloc::finalizeAlloc() const { + typedef LiveIntervals::iterator LIIterator; + typedef LiveInterval::Ranges::const_iterator LRIterator; + + // First allocate registers for the empty intervals. + for (LiveIntervalSet::const_iterator + itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end(); + itr != end; ++itr) { + LiveInterval *li = *itr; + + unsigned physReg = li->preference; + + if (physReg == 0) { + const TargetRegisterClass *liRC = mri->getRegClass(li->reg); + physReg = *liRC->allocation_order_begin(*mf); + } + + vrm->assignVirt2Phys(li->reg, physReg); + } + + // Finally iterate over the basic blocks to compute and set the live-in sets. + SmallVector<MachineBasicBlock*, 8> liveInMBBs; + MachineBasicBlock *entryMBB = &*mf->begin(); + + for (LIIterator liItr = lis->begin(), liEnd = lis->end(); + liItr != liEnd; ++liItr) { + + const LiveInterval *li = liItr->second; + unsigned reg = 0; + + // Get the physical register for this interval + if (TargetRegisterInfo::isPhysicalRegister(li->reg)) { + reg = li->reg; + } + else if (vrm->isAssignedReg(li->reg)) { + reg = vrm->getPhys(li->reg); + } + else { + // Ranges which are assigned a stack slot only are ignored. + continue; + } + + // Ignore unallocated vregs: + if (reg == 0) { + continue; + } + + // Iterate over the ranges of the current interval... + for (LRIterator lrItr = li->begin(), lrEnd = li->end(); + lrItr != lrEnd; ++lrItr) { + + // Find the set of basic blocks which this range is live into... + if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) { + // And add the physreg for this interval to their live-in sets. + for (unsigned i = 0; i < liveInMBBs.size(); ++i) { + if (liveInMBBs[i] != entryMBB) { + if (!liveInMBBs[i]->isLiveIn(reg)) { + liveInMBBs[i]->addLiveIn(reg); + } + } + } + liveInMBBs.clear(); + } + } + } + +} + +bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { + + mf = &MF; + tm = &mf->getTarget(); + tri = tm->getRegisterInfo(); + tii = tm->getInstrInfo(); + mri = &mf->getRegInfo(); + + lis = &getAnalysis<LiveIntervals>(); + lss = &getAnalysis<LiveStacks>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + vrm = &getAnalysis<VirtRegMap>(); + + DOUT << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"; + + // Allocator main loop: + // + // * Map current regalloc problem to a PBQP problem + // * Solve the PBQP problem + // * Map the solution back to a register allocation + // * Spill if necessary + // + // This process is continued till no more spills are generated. + + // Find the vreg intervals in need of allocation. + findVRegIntervalsToAlloc(); + + // If there aren't any then we're done here. + if (vregIntervalsToAlloc.empty() && emptyVRegIntervals.empty()) + return true; + + // If there are non-empty intervals allocate them using pbqp. + if (!vregIntervalsToAlloc.empty()) { + + bool pbqpAllocComplete = false; + unsigned round = 0; + + while (!pbqpAllocComplete) { + DOUT << " PBQP Regalloc round " << round << ":\n"; + + pbqp *problem = constructPBQPProblem(); + + solve_pbqp(problem); + + pbqpAllocComplete = mapPBQPToRegAlloc(problem); + + free_pbqp(problem); + + ++round; + } + } + + // Finalise allocation, allocate empty ranges. + finalizeAlloc(); + + vregIntervalsToAlloc.clear(); + emptyVRegIntervals.clear(); + li2Node.clear(); + node2LI.clear(); + allowedSets.clear(); + + DOUT << "Post alloc VirtRegMap:\n" << *vrm << "\n"; + + // Run rewriter + std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter()); + + rewriter->runOnMachineFunction(*mf, *vrm, lis); + + return true; +} + +FunctionPass* llvm::createPBQPRegisterAllocator() { + return new PBQPRegAlloc(); +} + + +#undef DEBUG_TYPE diff --git a/lib/CodeGen/RegAllocSimple.cpp b/lib/CodeGen/RegAllocSimple.cpp new file mode 100644 index 0000000..447e54c --- /dev/null +++ b/lib/CodeGen/RegAllocSimple.cpp @@ -0,0 +1,257 @@ +//===-- RegAllocSimple.cpp - A simple generic register allocator ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple register allocator. *Very* simple: It immediate +// spills every value right after it is computed, and it reloads all used +// operands from the spill area to temporary registers before each instruction. +// It does not keep values in registers across instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <map> +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); + +namespace { + static RegisterRegAlloc + simpleRegAlloc("simple", "simple register allocator", + createSimpleRegisterAllocator); + + class VISIBILITY_HIDDEN RegAllocSimple : public MachineFunctionPass { + public: + static char ID; + RegAllocSimple() : MachineFunctionPass(&ID) {} + private: + MachineFunction *MF; + const TargetMachine *TM; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // StackSlotForVirtReg - Maps SSA Regs => frame index on the stack where + // these values are spilled + std::map<unsigned, int> StackSlotForVirtReg; + + // RegsUsed - Keep track of what registers are currently in use. This is a + // bitset. + std::vector<bool> RegsUsed; + + // RegClassIdx - Maps RegClass => which index we can take a register + // from. Since this is a simple register allocator, when we need a register + // of a certain class, we just take the next available one. + std::map<const TargetRegisterClass*, unsigned> RegClassIdx; + + public: + virtual const char *getPassName() const { + return "Simple Register Allocator"; + } + + /// runOnMachineFunction - Register allocate the whole function + bool runOnMachineFunction(MachineFunction &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(PHIEliminationID); // Eliminate PHI nodes + MachineFunctionPass::getAnalysisUsage(AU); + } + private: + /// AllocateBasicBlock - Register allocate the specified basic block. + void AllocateBasicBlock(MachineBasicBlock &MBB); + + /// getStackSpaceFor - This returns the offset of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// Given a virtual register, return a compatible physical register that is + /// currently unused. + /// + /// Side effect: marks that register as being used until manually cleared + /// + unsigned getFreeReg(unsigned virtualReg); + + /// Moves value from memory into that register + unsigned reloadVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, unsigned VirtReg); + + /// Saves reg value on the stack (maps virtual register to stack value) + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg); + }; + char RegAllocSimple::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual +/// register to be held on the stack. +int RegAllocSimple::getStackSpaceFor(unsigned VirtReg, + const TargetRegisterClass *RC) { + // Find the location VirtReg would belong... + std::map<unsigned, int>::iterator I = StackSlotForVirtReg.find(VirtReg); + + if (I != StackSlotForVirtReg.end()) + return I->second; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot... + StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx)); + + return FrameIdx; +} + +unsigned RegAllocSimple::getFreeReg(unsigned virtualReg) { + const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtualReg); + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); +#ifndef NDEBUG + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); +#endif + + while (1) { + unsigned regIdx = RegClassIdx[RC]++; + assert(RI+regIdx != RE && "Not enough registers!"); + unsigned PhysReg = *(RI+regIdx); + + if (!RegsUsed[PhysReg]) { + MF->getRegInfo().setPhysRegUsed(PhysReg); + return PhysReg; + } + } +} + +unsigned RegAllocSimple::reloadVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg) { + const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(VirtReg); + int FrameIdx = getStackSpaceFor(VirtReg, RC); + unsigned PhysReg = getFreeReg(VirtReg); + + // Add move instruction(s) + ++NumLoads; + TII->loadRegFromStackSlot(MBB, I, PhysReg, FrameIdx, RC); + return PhysReg; +} + +void RegAllocSimple::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(VirtReg); + + int FrameIdx = getStackSpaceFor(VirtReg, RC); + + // Add move instruction(s) + ++NumStores; + TII->storeRegToStackSlot(MBB, I, PhysReg, true, FrameIdx, RC); +} + + +void RegAllocSimple::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) { + // Made to combat the incorrect allocation of r2 = add r1, r1 + std::map<unsigned, unsigned> Virt2PhysRegMap; + + RegsUsed.resize(TRI->getNumRegs()); + + // This is a preliminary pass that will invalidate any registers that are + // used by the instruction (including implicit uses). + const TargetInstrDesc &Desc = MI->getDesc(); + const unsigned *Regs; + if (Desc.ImplicitUses) { + for (Regs = Desc.ImplicitUses; *Regs; ++Regs) + RegsUsed[*Regs] = true; + } + + if (Desc.ImplicitDefs) { + for (Regs = Desc.ImplicitDefs; *Regs; ++Regs) { + RegsUsed[*Regs] = true; + MF->getRegInfo().setPhysRegUsed(*Regs); + } + } + + // Loop over uses, move from memory into registers. + for (int i = MI->getNumOperands() - 1; i >= 0; --i) { + MachineOperand &MO = MI->getOperand(i); + + if (MO.isReg() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned virtualReg = (unsigned) MO.getReg(); + DOUT << "op: " << MO << "\n"; + DOUT << "\t inst[" << i << "]: "; + DEBUG(MI->print(*cerr.stream(), TM)); + + // make sure the same virtual register maps to the same physical + // register in any given instruction + unsigned physReg = Virt2PhysRegMap[virtualReg]; + if (physReg == 0) { + if (MO.isDef()) { + unsigned TiedOp; + if (!MI->isRegTiedToUseOperand(i, &TiedOp)) { + physReg = getFreeReg(virtualReg); + } else { + // must be same register number as the source operand that is + // tied to. This maps a = b + c into b = b + c, and saves b into + // a's spot. + assert(MI->getOperand(TiedOp).isReg() && + MI->getOperand(TiedOp).getReg() && + MI->getOperand(TiedOp).isUse() && + "Two address instruction invalid!"); + + physReg = MI->getOperand(TiedOp).getReg(); + } + spillVirtReg(MBB, next(MI), virtualReg, physReg); + } else { + physReg = reloadVirtReg(MBB, MI, virtualReg); + Virt2PhysRegMap[virtualReg] = physReg; + } + } + MO.setReg(physReg); + DOUT << "virt: " << virtualReg << ", phys: " << MO.getReg() << "\n"; + } + } + RegClassIdx.clear(); + RegsUsed.clear(); + } +} + + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RegAllocSimple::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "Machine Function\n"; + MF = &Fn; + TM = &MF->getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) + AllocateBasicBlock(*MBB); + + StackSlotForVirtReg.clear(); + return true; +} + +FunctionPass *llvm::createSimpleRegisterAllocator() { + return new RegAllocSimple(); +} diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp new file mode 100644 index 0000000..1131e3d --- /dev/null +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -0,0 +1,41 @@ +//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the generic RegisterCoalescer interface which +// is used as the common interface used by all clients and +// implementations of register coalescing. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Pass.h" + +using namespace llvm; + +// Register the RegisterCoalescer interface, providing a nice name to refer to. +static RegisterAnalysisGroup<RegisterCoalescer> Z("Register Coalescer"); +char RegisterCoalescer::ID = 0; + +// RegisterCoalescer destructor: DO NOT move this to the header file +// for RegisterCoalescer or else clients of the RegisterCoalescer +// class may not depend on the RegisterCoalescer.o file in the current +// .a file, causing alias analysis support to not be included in the +// tool correctly! +// +RegisterCoalescer::~RegisterCoalescer() {} + +// Because of the way .a files work, we must force the SimpleRC +// implementation to be pulled in if the RegisterCoalescer classes are +// pulled in. Otherwise we run the risk of RegisterCoalescer being +// used, but the default implementation not being linked into the tool +// that uses it. +DEFINING_FILE_FOR(RegisterCoalescer) diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp new file mode 100644 index 0000000..944468e --- /dev/null +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -0,0 +1,480 @@ +//===-- RegisterScavenging.cpp - Machine register scavenging --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine register scavenger. It can provide +// information, such as unused registers, at any point in a machine basic block. +// It also provides a mechanism to make registers available by evicting them to +// spill slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reg-scavenging" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +/// RedefinesSuperRegPart - Return true if the specified register is redefining +/// part of a super-register. +static bool RedefinesSuperRegPart(const MachineInstr *MI, unsigned SubReg, + const TargetRegisterInfo *TRI) { + bool SeenSuperUse = false; + bool SeenSuperDef = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (TRI->isSuperRegister(SubReg, MO.getReg())) { + if (MO.isUse()) + SeenSuperUse = true; + else if (MO.isImplicit()) + SeenSuperDef = true; + } + } + + return SeenSuperDef && SeenSuperUse; +} + +static bool RedefinesSuperRegPart(const MachineInstr *MI, + const MachineOperand &MO, + const TargetRegisterInfo *TRI) { + assert(MO.isReg() && MO.isDef() && "Not a register def!"); + return RedefinesSuperRegPart(MI, MO.getReg(), TRI); +} + +/// setUsed - Set the register and its sub-registers as being used. +void RegScavenger::setUsed(unsigned Reg, bool ImpDef) { + RegsAvailable.reset(Reg); + ImplicitDefed[Reg] = ImpDef; + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + RegsAvailable.reset(SubReg); + ImplicitDefed[SubReg] = ImpDef; + } +} + +/// setUnused - Set the register and its sub-registers as being unused. +void RegScavenger::setUnused(unsigned Reg, const MachineInstr *MI) { + RegsAvailable.set(Reg); + ImplicitDefed.reset(Reg); + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + if (!RedefinesSuperRegPart(MI, Reg, TRI)) { + RegsAvailable.set(SubReg); + ImplicitDefed.reset(SubReg); + } +} + +void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { + MachineFunction &MF = *mbb->getParent(); + const TargetMachine &TM = MF.getTarget(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + MRI = &MF.getRegInfo(); + + assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && + "Target changed?"); + + if (!MBB) { + NumPhysRegs = TRI->getNumRegs(); + RegsAvailable.resize(NumPhysRegs); + ImplicitDefed.resize(NumPhysRegs); + + // Create reserved registers bitvector. + ReservedRegs = TRI->getReservedRegs(MF); + + // Create callee-saved registers bitvector. + CalleeSavedRegs.resize(NumPhysRegs); + const unsigned *CSRegs = TRI->getCalleeSavedRegs(); + if (CSRegs != NULL) + for (unsigned i = 0; CSRegs[i]; ++i) + CalleeSavedRegs.set(CSRegs[i]); + } + + MBB = mbb; + ScavengedReg = 0; + ScavengedRC = NULL; + ScavengeRestore = NULL; + CurrDist = 0; + DistanceMap.clear(); + ImplicitDefed.reset(); + + // All registers started out unused. + RegsAvailable.set(); + + // Reserved registers are always used. + RegsAvailable ^= ReservedRegs; + + // Live-in registers are in use. + if (!MBB->livein_empty()) + for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + setUsed(*I); + + Tracking = false; +} + +void RegScavenger::restoreScavengedReg() { + TII->loadRegFromStackSlot(*MBB, MBBI, ScavengedReg, + ScavengingFrameIndex, ScavengedRC); + MachineBasicBlock::iterator II = prior(MBBI); + TRI->eliminateFrameIndex(II, 0, this); + setUsed(ScavengedReg); + ScavengedReg = 0; + ScavengedRC = NULL; +} + +#ifndef NDEBUG +/// isLiveInButUnusedBefore - Return true if register is livein the MBB not +/// not used before it reaches the MI that defines register. +static bool isLiveInButUnusedBefore(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + const TargetRegisterInfo *TRI, + MachineRegisterInfo* MRI) { + // First check if register is livein. + bool isLiveIn = false; + for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + if (Reg == *I || TRI->isSuperRegister(Reg, *I)) { + isLiveIn = true; + break; + } + if (!isLiveIn) + return false; + + // Is there any use of it before the specified MI? + SmallPtrSet<MachineInstr*, 4> UsesInMBB; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI->getParent() == MBB) + UsesInMBB.insert(UseMI); + } + if (UsesInMBB.empty()) + return true; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I) + if (UsesInMBB.count(&*I)) + return false; + return true; +} +#endif + +void RegScavenger::forward() { + // Move ptr forward. + if (!Tracking) { + MBBI = MBB->begin(); + Tracking = true; + } else { + assert(MBBI != MBB->end() && "Already at the end of the basic block!"); + MBBI = next(MBBI); + } + + MachineInstr *MI = MBBI; + DistanceMap.insert(std::make_pair(MI, CurrDist++)); + + if (MI == ScavengeRestore) { + ScavengedReg = 0; + ScavengedRC = NULL; + ScavengeRestore = NULL; + } + + bool IsImpDef = MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF; + + // Separate register operands into 3 classes: uses, defs, earlyclobbers. + SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs; + SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs; + SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + if (MO.isUse()) + UseMOs.push_back(std::make_pair(&MO,i)); + else if (MO.isEarlyClobber()) + EarlyClobberMOs.push_back(std::make_pair(&MO,i)); + else + DefMOs.push_back(std::make_pair(&MO,i)); + } + + // Process uses first. + BitVector UseRegs(NumPhysRegs); + for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) { + const MachineOperand MO = *UseMOs[i].first; + unsigned Reg = MO.getReg(); + + assert(isUsed(Reg) && "Using an undefined register!"); + + if (MO.isKill() && !isReserved(Reg)) { + UseRegs.set(Reg); + + // Mark sub-registers as used. + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + UseRegs.set(SubReg); + } + } + + // Change states of all registers after all the uses are processed to guard + // against multiple uses. + setUnused(UseRegs); + + // Process early clobber defs then process defs. We can have a early clobber + // that is dead, it should not conflict with a def that happens one "slot" + // (see InstrSlots in LiveIntervalAnalysis.h) later. + unsigned NumECs = EarlyClobberMOs.size(); + unsigned NumDefs = DefMOs.size(); + + for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) { + const MachineOperand &MO = (i < NumECs) + ? *EarlyClobberMOs[i].first : *DefMOs[i-NumECs].first; + unsigned Idx = (i < NumECs) + ? EarlyClobberMOs[i].second : DefMOs[i-NumECs].second; + unsigned Reg = MO.getReg(); + + // If it's dead upon def, then it is now free. + if (MO.isDead()) { + setUnused(Reg, MI); + continue; + } + + // Skip two-address destination operand. + if (MI->isRegTiedToUseOperand(Idx)) { + assert(isUsed(Reg) && "Using an undefined register!"); + continue; + } + + // Skip if this is merely redefining part of a super-register. + if (RedefinesSuperRegPart(MI, MO, TRI)) + continue; + + // Implicit def is allowed to "re-define" any register. Similarly, + // implicitly defined registers can be clobbered. + assert((isReserved(Reg) || isUnused(Reg) || + IsImpDef || isImplicitlyDefined(Reg) || + isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) && + "Re-defining a live register!"); + setUsed(Reg, IsImpDef); + } +} + +void RegScavenger::backward() { + assert(Tracking && "Not tracking states!"); + assert(MBBI != MBB->begin() && "Already at start of basic block!"); + // Move ptr backward. + MBBI = prior(MBBI); + + MachineInstr *MI = MBBI; + DistanceMap.erase(MI); + --CurrDist; + + // Separate register operands into 3 classes: uses, defs, earlyclobbers. + SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs; + SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs; + SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + if (MO.isUse()) + UseMOs.push_back(std::make_pair(&MO,i)); + else if (MO.isEarlyClobber()) + EarlyClobberMOs.push_back(std::make_pair(&MO,i)); + else + DefMOs.push_back(std::make_pair(&MO,i)); + } + + + // Process defs first. + unsigned NumECs = EarlyClobberMOs.size(); + unsigned NumDefs = DefMOs.size(); + for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) { + const MachineOperand &MO = (i < NumDefs) + ? *DefMOs[i].first : *EarlyClobberMOs[i-NumDefs].first; + unsigned Idx = (i < NumECs) + ? DefMOs[i].second : EarlyClobberMOs[i-NumDefs].second; + + // Skip two-address destination operand. + if (MI->isRegTiedToUseOperand(Idx)) + continue; + + unsigned Reg = MO.getReg(); + assert(isUsed(Reg)); + if (!isReserved(Reg)) + setUnused(Reg, MI); + } + + // Process uses. + BitVector UseRegs(NumPhysRegs); + for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) { + const MachineOperand MO = *UseMOs[i].first; + unsigned Reg = MO.getReg(); + assert(isUnused(Reg) || isReserved(Reg)); + UseRegs.set(Reg); + + // Set the sub-registers as "used". + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + UseRegs.set(SubReg); + } + setUsed(UseRegs); +} + +void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { + if (includeReserved) + used = ~RegsAvailable; + else + used = ~RegsAvailable & ~ReservedRegs; +} + +/// CreateRegClassMask - Set the bits that represent the registers in the +/// TargetRegisterClass. +static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; + ++I) + Mask.set(*I); +} + +unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass, + const BitVector &Candidates) const { + // Mask off the registers which are not in the TargetRegisterClass. + BitVector RegsAvailableCopy(NumPhysRegs, false); + CreateRegClassMask(RegClass, RegsAvailableCopy); + RegsAvailableCopy &= RegsAvailable; + + // Restrict the search to candidates. + RegsAvailableCopy &= Candidates; + + // Returns the first unused (bit is set) register, or 0 is none is found. + int Reg = RegsAvailableCopy.find_first(); + return (Reg == -1) ? 0 : Reg; +} + +unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass, + bool ExCalleeSaved) const { + // Mask off the registers which are not in the TargetRegisterClass. + BitVector RegsAvailableCopy(NumPhysRegs, false); + CreateRegClassMask(RegClass, RegsAvailableCopy); + RegsAvailableCopy &= RegsAvailable; + + // If looking for a non-callee-saved register, mask off all the callee-saved + // registers. + if (ExCalleeSaved) + RegsAvailableCopy &= ~CalleeSavedRegs; + + // Returns the first unused (bit is set) register, or 0 is none is found. + int Reg = RegsAvailableCopy.find_first(); + return (Reg == -1) ? 0 : Reg; +} + +/// findFirstUse - Calculate the distance to the first use of the +/// specified register. +MachineInstr* +RegScavenger::findFirstUse(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, unsigned Reg, + unsigned &Dist) { + MachineInstr *UseMI = 0; + Dist = ~0U; + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), + RE = MRI->reg_end(); RI != RE; ++RI) { + MachineInstr *UDMI = &*RI; + if (UDMI->getParent() != MBB) + continue; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI); + if (DI == DistanceMap.end()) { + // If it's not in map, it's below current MI, let's initialize the + // map. + I = next(I); + unsigned Dist = CurrDist + 1; + while (I != MBB->end()) { + DistanceMap.insert(std::make_pair(I, Dist++)); + I = next(I); + } + } + DI = DistanceMap.find(UDMI); + if (DI->second > CurrDist && DI->second < Dist) { + Dist = DI->second; + UseMI = UDMI; + } + } + return UseMI; +} + +unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, + MachineBasicBlock::iterator I, + int SPAdj) { + assert(ScavengingFrameIndex >= 0 && + "Cannot scavenge a register without an emergency spill slot!"); + + // Mask off the registers which are not in the TargetRegisterClass. + BitVector Candidates(NumPhysRegs, false); + CreateRegClassMask(RC, Candidates); + Candidates ^= ReservedRegs; // Do not include reserved registers. + + // Exclude all the registers being used by the instruction. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (MO.isReg()) + Candidates.reset(MO.getReg()); + } + + // Find the register whose use is furthest away. + unsigned SReg = 0; + unsigned MaxDist = 0; + MachineInstr *MaxUseMI = 0; + int Reg = Candidates.find_first(); + while (Reg != -1) { + unsigned Dist; + MachineInstr *UseMI = findFirstUse(MBB, I, Reg, Dist); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + unsigned AsDist; + MachineInstr *AsUseMI = findFirstUse(MBB, I, *AS, AsDist); + if (AsDist < Dist) { + Dist = AsDist; + UseMI = AsUseMI; + } + } + if (Dist >= MaxDist) { + MaxDist = Dist; + MaxUseMI = UseMI; + SReg = Reg; + } + Reg = Candidates.find_next(Reg); + } + + if (ScavengedReg != 0) { + assert(0 && "Scavenger slot is live, unable to scavenge another register!"); + abort(); + } + + // Spill the scavenged register before I. + TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC); + MachineBasicBlock::iterator II = prior(I); + TRI->eliminateFrameIndex(II, SPAdj, this); + + // Restore the scavenged register before its use (or first terminator). + II = MaxUseMI + ? MachineBasicBlock::iterator(MaxUseMI) : MBB->getFirstTerminator(); + TII->loadRegFromStackSlot(*MBB, II, SReg, ScavengingFrameIndex, RC); + ScavengeRestore = prior(II); + ScavengedReg = SReg; + ScavengedRC = RC; + + return SReg; +} diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp new file mode 100644 index 0000000..a8452df --- /dev/null +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -0,0 +1,572 @@ +//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include <climits> +using namespace llvm; + +ScheduleDAG::ScheduleDAG(MachineFunction &mf) + : TM(mf.getTarget()), + TII(TM.getInstrInfo()), + TRI(TM.getRegisterInfo()), + TLI(TM.getTargetLowering()), + MF(mf), MRI(mf.getRegInfo()), + ConstPool(MF.getConstantPool()), + EntrySU(), ExitSU() { +} + +ScheduleDAG::~ScheduleDAG() {} + +/// dump - dump the schedule. +void ScheduleDAG::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + cerr << "**** NOOP ****\n"; + } +} + + +/// Run - perform scheduling. +/// +void ScheduleDAG::Run(MachineBasicBlock *bb, + MachineBasicBlock::iterator insertPos) { + BB = bb; + InsertPos = insertPos; + + SUnits.clear(); + Sequence.clear(); + EntrySU = SUnit(); + ExitSU = SUnit(); + + Schedule(); + + DOUT << "*** Final schedule ***\n"; + DEBUG(dumpSchedule()); + DOUT << "\n"; +} + +/// addPred - This adds the specified edge as a pred of the current node if +/// not already. It also adds the current node as a successor of the +/// specified node. +void SUnit::addPred(const SDep &D) { + // If this node already has this depenence, don't add a redundant one. + for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) + if (*I == D) + return; + // Now add a corresponding succ to N. + SDep P = D; + P.setSUnit(this); + SUnit *N = D.getSUnit(); + // Update the bookkeeping. + if (D.getKind() == SDep::Data) { + ++NumPreds; + ++N->NumSuccs; + } + if (!N->isScheduled) + ++NumPredsLeft; + if (!isScheduled) + ++N->NumSuccsLeft; + Preds.push_back(D); + N->Succs.push_back(P); + if (P.getLatency() != 0) { + this->setDepthDirty(); + N->setHeightDirty(); + } +} + +/// removePred - This removes the specified edge as a pred of the current +/// node if it exists. It also removes the current node as a successor of +/// the specified node. +void SUnit::removePred(const SDep &D) { + // Find the matching predecessor. + for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) + if (*I == D) { + bool FoundSucc = false; + // Find the corresponding successor in N. + SDep P = D; + P.setSUnit(this); + SUnit *N = D.getSUnit(); + for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(), + EE = N->Succs.end(); II != EE; ++II) + if (*II == P) { + FoundSucc = true; + N->Succs.erase(II); + break; + } + assert(FoundSucc && "Mismatching preds / succs lists!"); + Preds.erase(I); + // Update the bookkeeping. + if (P.getKind() == SDep::Data) { + --NumPreds; + --N->NumSuccs; + } + if (!N->isScheduled) + --NumPredsLeft; + if (!isScheduled) + --N->NumSuccsLeft; + if (P.getLatency() != 0) { + this->setDepthDirty(); + N->setHeightDirty(); + } + return; + } +} + +void SUnit::setDepthDirty() { + if (!isDepthCurrent) return; + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *SU = WorkList.pop_back_val(); + SU->isDepthCurrent = false; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), + E = SU->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isDepthCurrent) + WorkList.push_back(SuccSU); + } + } while (!WorkList.empty()); +} + +void SUnit::setHeightDirty() { + if (!isHeightCurrent) return; + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *SU = WorkList.pop_back_val(); + SU->isHeightCurrent = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), + E = SU->Preds.end(); I != E; ++I) { + SUnit *PredSU = I->getSUnit(); + if (PredSU->isHeightCurrent) + WorkList.push_back(PredSU); + } + } while (!WorkList.empty()); +} + +/// setDepthToAtLeast - Update this node's successors to reflect the +/// fact that this node's depth just increased. +/// +void SUnit::setDepthToAtLeast(unsigned NewDepth) { + if (NewDepth <= getDepth()) + return; + setDepthDirty(); + Depth = NewDepth; + isDepthCurrent = true; +} + +/// setHeightToAtLeast - Update this node's predecessors to reflect the +/// fact that this node's height just increased. +/// +void SUnit::setHeightToAtLeast(unsigned NewHeight) { + if (NewHeight <= getHeight()) + return; + setHeightDirty(); + Height = NewHeight; + isHeightCurrent = true; +} + +/// ComputeDepth - Calculate the maximal path from the node to the exit. +/// +void SUnit::ComputeDepth() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxPredDepth = 0; + for (SUnit::const_pred_iterator I = Cur->Preds.begin(), + E = Cur->Preds.end(); I != E; ++I) { + SUnit *PredSU = I->getSUnit(); + if (PredSU->isDepthCurrent) + MaxPredDepth = std::max(MaxPredDepth, + PredSU->Depth + I->getLatency()); + else { + Done = false; + WorkList.push_back(PredSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxPredDepth != Cur->Depth) { + Cur->setDepthDirty(); + Cur->Depth = MaxPredDepth; + } + Cur->isDepthCurrent = true; + } + } while (!WorkList.empty()); +} + +/// ComputeHeight - Calculate the maximal path from the node to the entry. +/// +void SUnit::ComputeHeight() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxSuccHeight = 0; + for (SUnit::const_succ_iterator I = Cur->Succs.begin(), + E = Cur->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isHeightCurrent) + MaxSuccHeight = std::max(MaxSuccHeight, + SuccSU->Height + I->getLatency()); + else { + Done = false; + WorkList.push_back(SuccSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxSuccHeight != Cur->Height) { + Cur->setHeightDirty(); + Cur->Height = MaxSuccHeight; + } + Cur->isHeightCurrent = true; + } + } while (!WorkList.empty()); +} + +/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or +/// a group of nodes flagged together. +void SUnit::dump(const ScheduleDAG *G) const { + cerr << "SU(" << NodeNum << "): "; + G->dumpNode(this); +} + +void SUnit::dumpAll(const ScheduleDAG *G) const { + dump(G); + + cerr << " # preds left : " << NumPredsLeft << "\n"; + cerr << " # succs left : " << NumSuccsLeft << "\n"; + cerr << " Latency : " << Latency << "\n"; + cerr << " Depth : " << Depth << "\n"; + cerr << " Height : " << Height << "\n"; + + if (Preds.size() != 0) { + cerr << " Predecessors:\n"; + for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { + cerr << " "; + switch (I->getKind()) { + case SDep::Data: cerr << "val "; break; + case SDep::Anti: cerr << "anti"; break; + case SDep::Output: cerr << "out "; break; + case SDep::Order: cerr << "ch "; break; + } + cerr << "#"; + cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + if (I->isArtificial()) + cerr << " *"; + cerr << "\n"; + } + } + if (Succs.size() != 0) { + cerr << " Successors:\n"; + for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); + I != E; ++I) { + cerr << " "; + switch (I->getKind()) { + case SDep::Data: cerr << "val "; break; + case SDep::Anti: cerr << "anti"; break; + case SDep::Output: cerr << "out "; break; + case SDep::Order: cerr << "ch "; break; + } + cerr << "#"; + cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + if (I->isArtificial()) + cerr << " *"; + cerr << "\n"; + } + } + cerr << "\n"; +} + +#ifndef NDEBUG +/// VerifySchedule - Verify that all SUnits were scheduled and that +/// their state is consistent. +/// +void ScheduleDAG::VerifySchedule(bool isBottomUp) { + bool AnyNotSched = false; + unsigned DeadNodes = 0; + unsigned Noops = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (!SUnits[i].isScheduled) { + if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { + ++DeadNodes; + continue; + } + if (!AnyNotSched) + cerr << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has not been scheduled!\n"; + AnyNotSched = true; + } + if (SUnits[i].isScheduled && + (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) > + unsigned(INT_MAX)) { + if (!AnyNotSched) + cerr << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has an unexpected " + << (isBottomUp ? "Height" : "Depth") << " value!\n"; + AnyNotSched = true; + } + if (isBottomUp) { + if (SUnits[i].NumSuccsLeft != 0) { + if (!AnyNotSched) + cerr << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has successors left!\n"; + AnyNotSched = true; + } + } else { + if (SUnits[i].NumPredsLeft != 0) { + if (!AnyNotSched) + cerr << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has predecessors left!\n"; + AnyNotSched = true; + } + } + } + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(!AnyNotSched); + assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && + "The number of nodes scheduled doesn't match the expected number!"); +} +#endif + +/// InitDAGTopologicalSorting - create the initial topological +/// ordering from the DAG to be scheduled. +/// +/// The idea of the algorithm is taken from +/// "Online algorithms for managing the topological order of +/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly +/// This is the MNR algorithm, which was first introduced by +/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in +/// "Maintaining a topological order under edge insertions". +/// +/// Short description of the algorithm: +/// +/// Topological ordering, ord, of a DAG maps each node to a topological +/// index so that for all edges X->Y it is the case that ord(X) < ord(Y). +/// +/// This means that if there is a path from the node X to the node Z, +/// then ord(X) < ord(Z). +/// +/// This property can be used to check for reachability of nodes: +/// if Z is reachable from X, then an insertion of the edge Z->X would +/// create a cycle. +/// +/// The algorithm first computes a topological ordering for the DAG by +/// initializing the Index2Node and Node2Index arrays and then tries to keep +/// the ordering up-to-date after edge insertions by reordering the DAG. +/// +/// On insertion of the edge X->Y, the algorithm first marks by calling DFS +/// the nodes reachable from Y, and then shifts them using Shift to lie +/// immediately after X in Index2Node. +void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { + unsigned DAGSize = SUnits.size(); + std::vector<SUnit*> WorkList; + WorkList.reserve(DAGSize); + + Index2Node.resize(DAGSize); + Node2Index.resize(DAGSize); + + // Initialize the data structures. + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + int NodeNum = SU->NodeNum; + unsigned Degree = SU->Succs.size(); + // Temporarily use the Node2Index array as scratch space for degree counts. + Node2Index[NodeNum] = Degree; + + // Is it a node without dependencies? + if (Degree == 0) { + assert(SU->Succs.empty() && "SUnit should have no successors"); + // Collect leaf nodes. + WorkList.push_back(SU); + } + } + + int Id = DAGSize; + while (!WorkList.empty()) { + SUnit *SU = WorkList.back(); + WorkList.pop_back(); + Allocate(SU->NodeNum, --Id); + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit *SU = I->getSUnit(); + if (!--Node2Index[SU->NodeNum]) + // If all dependencies of the node are processed already, + // then the node can be computed now. + WorkList.push_back(SU); + } + } + + Visited.resize(DAGSize); + +#ifndef NDEBUG + // Check correctness of the ordering + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && + "Wrong topological sorting"); + } + } +#endif +} + +/// AddPred - Updates the topological ordering to accomodate an edge +/// to be added from SUnit X to SUnit Y. +void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { + int UpperBound, LowerBound; + LowerBound = Node2Index[Y->NodeNum]; + UpperBound = Node2Index[X->NodeNum]; + bool HasLoop = false; + // Is Ord(X) < Ord(Y) ? + if (LowerBound < UpperBound) { + // Update the topological order. + Visited.reset(); + DFS(Y, UpperBound, HasLoop); + assert(!HasLoop && "Inserted edge creates a loop!"); + // Recompute topological indexes. + Shift(Visited, LowerBound, UpperBound); + } +} + +/// RemovePred - Updates the topological ordering to accomodate an +/// an edge to be removed from the specified node N from the predecessors +/// of the current node M. +void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { + // InitDAGTopologicalSorting(); +} + +/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark +/// all nodes affected by the edge insertion. These nodes will later get new +/// topological indexes by means of the Shift method. +void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, + bool& HasLoop) { + std::vector<const SUnit*> WorkList; + WorkList.reserve(SUnits.size()); + + WorkList.push_back(SU); + do { + SU = WorkList.back(); + WorkList.pop_back(); + Visited.set(SU->NodeNum); + for (int I = SU->Succs.size()-1; I >= 0; --I) { + int s = SU->Succs[I].getSUnit()->NodeNum; + if (Node2Index[s] == UpperBound) { + HasLoop = true; + return; + } + // Visit successors if not already and in affected region. + if (!Visited.test(s) && Node2Index[s] < UpperBound) { + WorkList.push_back(SU->Succs[I].getSUnit()); + } + } + } while (!WorkList.empty()); +} + +/// Shift - Renumber the nodes so that the topological ordering is +/// preserved. +void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, + int UpperBound) { + std::vector<int> L; + int shift = 0; + int i; + + for (i = LowerBound; i <= UpperBound; ++i) { + // w is node at topological index i. + int w = Index2Node[i]; + if (Visited.test(w)) { + // Unmark. + Visited.reset(w); + L.push_back(w); + shift = shift + 1; + } else { + Allocate(w, i - shift); + } + } + + for (unsigned j = 0; j < L.size(); ++j) { + Allocate(L[j], i - shift); + i = i + 1; + } +} + + +/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will +/// create a cycle. +bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + if (IsReachable(TargetSU, SU)) + return true; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (I->isAssignedRegDep() && + IsReachable(TargetSU, I->getSUnit())) + return true; + return false; +} + +/// IsReachable - Checks if SU is reachable from TargetSU. +bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, + const SUnit *TargetSU) { + // If insertion of the edge SU->TargetSU would create a cycle + // then there is a path from TargetSU to SU. + int UpperBound, LowerBound; + LowerBound = Node2Index[TargetSU->NodeNum]; + UpperBound = Node2Index[SU->NodeNum]; + bool HasLoop = false; + // Is Ord(TargetSU) < Ord(SU) ? + if (LowerBound < UpperBound) { + Visited.reset(); + // There may be a path from TargetSU to SU. Check for it. + DFS(TargetSU, UpperBound, HasLoop); + } + return HasLoop; +} + +/// Allocate - assign the topological index to the node n. +void ScheduleDAGTopologicalSort::Allocate(int n, int index) { + Node2Index[n] = index; + Index2Node[index] = n; +} + +ScheduleDAGTopologicalSort::ScheduleDAGTopologicalSort( + std::vector<SUnit> &sunits) + : SUnits(sunits) {} + +ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {} diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp new file mode 100644 index 0000000..770f5bb --- /dev/null +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -0,0 +1,71 @@ +//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the ScheduleDAG class, which creates +// MachineInstrs according to the computed schedule. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +void ScheduleDAG::AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO) { + MI->addMemOperand(MF, MO); +} + +void ScheduleDAG::EmitNoop() { + TII->insertNoop(*BB, InsertPos); +} + +void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, + DenseMap<SUnit*, unsigned> &VRBaseMap) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->CopyDstRC) { + // Copy to physical register. + DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); + assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); + // Find the destination physical register. + unsigned Reg = 0; + for (SUnit::const_succ_iterator II = SU->Succs.begin(), + EE = SU->Succs.end(); II != EE; ++II) { + if (II->getReg()) { + Reg = II->getReg(); + break; + } + } + TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second, + SU->CopyDstRC, SU->CopySrcRC); + } else { + // Copy from physical register. + assert(I->getReg() && "Unknown physical register!"); + unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(), + SU->CopyDstRC, SU->CopySrcRC); + } + break; + } +} diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp new file mode 100644 index 0000000..8e18b3d --- /dev/null +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -0,0 +1,468 @@ +//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAGInstrs class, which implements re-scheduling +// of MachineInstrs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sched-instrs" +#include "ScheduleDAGInstrs.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallSet.h" +using namespace llvm; + +ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, + const MachineLoopInfo &mli, + const MachineDominatorTree &mdt) + : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {} + +/// Run - perform scheduling. +/// +void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + BB = bb; + Begin = begin; + InsertPosIndex = endcount; + + ScheduleDAG::Run(bb, end); +} + +/// getOpcode - If this is an Instruction or a ConstantExpr, return the +/// opcode value. Otherwise return UserOp1. +static unsigned getOpcode(const Value *V) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + return I->getOpcode(); + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + return CE->getOpcode(); + // Use UserOp1 to mean there's no opcode. + return Instruction::UserOp1; +} + +/// getUnderlyingObjectFromInt - This is the function that does the work of +/// looking through basic ptrtoint+arithmetic+inttoptr sequences. +static const Value *getUnderlyingObjectFromInt(const Value *V) { + do { + if (const User *U = dyn_cast<User>(V)) { + // If we find a ptrtoint, we can transfer control back to the + // regular getUnderlyingObjectFromInt. + if (getOpcode(U) == Instruction::PtrToInt) + return U->getOperand(0); + // If we find an add of a constant or a multiplied value, it's + // likely that the other operand will lead us to the base + // object. We don't have to worry about the case where the + // object address is somehow being computed bt the multiply, + // because our callers only care when the result is an + // identifibale object. + if (getOpcode(U) != Instruction::Add || + (!isa<ConstantInt>(U->getOperand(1)) && + getOpcode(U->getOperand(1)) != Instruction::Mul)) + return V; + V = U->getOperand(0); + } else { + return V; + } + assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!"); + } while (1); +} + +/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject +/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. +static const Value *getUnderlyingObject(const Value *V) { + // First just call Value::getUnderlyingObject to let it do what it does. + do { + V = V->getUnderlyingObject(); + // If it found an inttoptr, use special code to continue climing. + if (getOpcode(V) != Instruction::IntToPtr) + break; + const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); + // If that succeeded in finding a pointer, continue the search. + if (!isa<PointerType>(O->getType())) + break; + V = O; + } while (1); + return V; +} + +/// getUnderlyingObjectForInstr - If this machine instr has memory reference +/// information and it can be tracked to a normal reference to a known +/// object, return the Value for that object. Otherwise return null. +static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI) { + if (!MI->hasOneMemOperand() || + !MI->memoperands_begin()->getValue() || + MI->memoperands_begin()->isVolatile()) + return 0; + + const Value *V = MI->memoperands_begin()->getValue(); + if (!V) + return 0; + + V = getUnderlyingObject(V); + if (!isa<PseudoSourceValue>(V) && !isIdentifiedObject(V)) + return 0; + + return V; +} + +void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { + if (MachineLoop *ML = MLI.getLoopFor(BB)) + if (BB == ML->getLoopLatch()) { + MachineBasicBlock *Header = ML->getHeader(); + for (MachineBasicBlock::livein_iterator I = Header->livein_begin(), + E = Header->livein_end(); I != E; ++I) + LoopLiveInRegs.insert(*I); + LoopRegs.VisitLoop(ML); + } +} + +void ScheduleDAGInstrs::BuildSchedGraph() { + // We'll be allocating one SUnit for each instruction, plus one for + // the region exit node. + SUnits.reserve(BB->size()); + + // We build scheduling units by walking a block's instruction list from bottom + // to top. + + // Remember where a generic side-effecting instruction is as we procede. If + // ChainMMO is null, this is assumed to have arbitrary side-effects. If + // ChainMMO is non-null, then Chain makes only a single memory reference. + SUnit *Chain = 0; + MachineMemOperand *ChainMMO = 0; + + // Memory references to specific known memory locations are tracked so that + // they can be given more precise dependencies. + std::map<const Value *, SUnit *> MemDefs; + std::map<const Value *, std::vector<SUnit *> > MemUses; + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = ForceUnitLatencies(); + + // Ask the target if address-backscheduling is desirable, and if so how much. + unsigned SpecialAddressLatency = + TM.getSubtarget<TargetSubtarget>().getSpecialAddressLatency(); + + // Walk the list of instructions, from bottom moving up. + for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; + MII != MIE; --MII) { + MachineInstr *MI = prior(MII); + const TargetInstrDesc &TID = MI->getDesc(); + assert(!TID.isTerminator() && !MI->isLabel() && + "Cannot schedule terminators or labels!"); + // Create the SUnit for this MI. + SUnit *SU = NewSUnit(MI); + + // Assign the Latency field of SU using target-provided information. + if (UnitLatencies) + SU->Latency = 1; + else + ComputeLatency(SU); + + // Add register-based dependencies (data, anti, and output). + for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); + std::vector<SUnit *> &UseList = Uses[Reg]; + std::vector<SUnit *> &DefList = Defs[Reg]; + // Optionally add output and anti dependencies. + // TODO: Using a latency of 1 here assumes there's no cost for + // reusing registers. + SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; + for (unsigned i = 0, e = DefList.size(); i != e; ++i) { + SUnit *DefSU = DefList[i]; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(Reg))) + DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg)); + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + std::vector<SUnit *> &DefList = Defs[*Alias]; + for (unsigned i = 0, e = DefList.size(); i != e; ++i) { + SUnit *DefSU = DefList[i]; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(Reg))) + DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias)); + } + } + + if (MO.isDef()) { + // Add any data dependencies. + unsigned DataLatency = SU->Latency; + for (unsigned i = 0, e = UseList.size(); i != e; ++i) { + SUnit *UseSU = UseList[i]; + if (UseSU != SU) { + unsigned LDataLatency = DataLatency; + // Optionally add in a special extra latency for nodes that + // feed addresses. + // TODO: Do this for register aliases too. + if (SpecialAddressLatency != 0 && !UnitLatencies) { + MachineInstr *UseMI = UseSU->getInstr(); + const TargetInstrDesc &UseTID = UseMI->getDesc(); + int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); + assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); + if ((UseTID.mayLoad() || UseTID.mayStore()) && + (unsigned)RegUseIndex < UseTID.getNumOperands() && + UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) + LDataLatency += SpecialAddressLatency; + } + UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg)); + } + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + std::vector<SUnit *> &UseList = Uses[*Alias]; + for (unsigned i = 0, e = UseList.size(); i != e; ++i) { + SUnit *UseSU = UseList[i]; + if (UseSU != SU) + UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias)); + } + } + + // If a def is going to wrap back around to the top of the loop, + // backschedule it. + if (!UnitLatencies && DefList.empty()) { + LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); + if (I != LoopRegs.Deps.end()) { + const MachineOperand *UseMO = I->second.first; + unsigned Count = I->second.second; + const MachineInstr *UseMI = UseMO->getParent(); + unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); + const TargetInstrDesc &UseTID = UseMI->getDesc(); + // TODO: If we knew the total depth of the region here, we could + // handle the case where the whole loop is inside the region but + // is large enough that the isScheduleHigh trick isn't needed. + if (UseMOIdx < UseTID.getNumOperands()) { + // Currently, we only support scheduling regions consisting of + // single basic blocks. Check to see if the instruction is in + // the same region by checking to see if it has the same parent. + if (UseMI->getParent() != MI->getParent()) { + unsigned Latency = SU->Latency; + if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) + Latency += SpecialAddressLatency; + // This is a wild guess as to the portion of the latency which + // will be overlapped by work done outside the current + // scheduling region. + Latency -= std::min(Latency, Count); + // Add the artifical edge. + ExitSU.addPred(SDep(SU, SDep::Order, Latency, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + } else if (SpecialAddressLatency > 0 && + UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { + // The entire loop body is within the current scheduling region + // and the latency of this operation is assumed to be greater + // than the latency of the loop. + // TODO: Recursively mark data-edge predecessors as + // isScheduleHigh too. + SU->isScheduleHigh = true; + } + } + LoopRegs.Deps.erase(I); + } + } + + UseList.clear(); + if (!MO.isDead()) + DefList.clear(); + DefList.push_back(SU); + } else { + UseList.push_back(SU); + } + } + + // Add chain dependencies. + // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable + // after stack slots are lowered to actual addresses. + // TODO: Use an AliasAnalysis and do real alias-analysis queries, and + // produce more precise dependence information. + if (TID.isCall() || TID.hasUnmodeledSideEffects()) { + new_chain: + // This is the conservative case. Add dependencies on all memory + // references. + if (Chain) + Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); + Chain = SU; + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency)); + PendingLoads.clear(); + for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(), + E = MemDefs.end(); I != E; ++I) { + I->second->addPred(SDep(SU, SDep::Order, SU->Latency)); + I->second = SU; + } + for (std::map<const Value *, std::vector<SUnit *> >::iterator I = + MemUses.begin(), E = MemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + I->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency)); + I->second.clear(); + } + // See if it is known to just have a single memory reference. + MachineInstr *ChainMI = Chain->getInstr(); + const TargetInstrDesc &ChainTID = ChainMI->getDesc(); + if (!ChainTID.isCall() && + !ChainTID.hasUnmodeledSideEffects() && + ChainMI->hasOneMemOperand() && + !ChainMI->memoperands_begin()->isVolatile() && + ChainMI->memoperands_begin()->getValue()) + // We know that the Chain accesses one specific memory location. + ChainMMO = &*ChainMI->memoperands_begin(); + else + // Unknown memory accesses. Assume the worst. + ChainMMO = 0; + } else if (TID.mayStore()) { + if (const Value *V = getUnderlyingObjectForInstr(MI)) { + // A store to a specific PseudoSourceValue. Add precise dependencies. + // Handle the def in MemDefs, if there is one. + std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); + if (I != MemDefs.end()) { + I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, + /*isNormalMemory=*/true)); + I->second = SU; + } else { + MemDefs[V] = SU; + } + // Handle the uses in MemUses, if there are any. + std::map<const Value *, std::vector<SUnit *> >::iterator J = + MemUses.find(V); + if (J != MemUses.end()) { + for (unsigned i = 0, e = J->second.size(); i != e; ++i) + J->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, + /*isNormalMemory=*/true)); + J->second.clear(); + } + // Add dependencies from all the PendingLoads, since without + // memoperands we must assume they alias anything. + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency)); + // Add a general dependence too, if needed. + if (Chain) + Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); + } else + // Treat all other stores conservatively. + goto new_chain; + } else if (TID.mayLoad()) { + if (TII->isInvariantLoad(MI)) { + // Invariant load, no chain dependencies needed! + } else if (const Value *V = getUnderlyingObjectForInstr(MI)) { + // A load from a specific PseudoSourceValue. Add precise dependencies. + std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); + if (I != MemDefs.end()) + I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, + /*isNormalMemory=*/true)); + MemUses[V].push_back(SU); + + // Add a general dependence too, if needed. + if (Chain && (!ChainMMO || + (ChainMMO->isStore() || ChainMMO->isVolatile()))) + Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); + } else if (MI->hasVolatileMemoryRef()) { + // Treat volatile loads conservatively. Note that this includes + // cases where memoperand information is unavailable. + goto new_chain; + } else { + // A normal load. Depend on the general chain, as well as on + // all stores. In the absense of MachineMemOperand information, + // we can't even assume that the load doesn't alias well-behaved + // memory locations. + if (Chain) + Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); + for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(), + E = MemDefs.end(); I != E; ++I) + I->second->addPred(SDep(SU, SDep::Order, SU->Latency)); + PendingLoads.push_back(SU); + } + } + } + + for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { + Defs[i].clear(); + Uses[i].clear(); + } + PendingLoads.clear(); +} + +void ScheduleDAGInstrs::FinishBlock() { + // Nothing to do. +} + +void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes flagged together into this SUnit. + SU->Latency = + InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass()); + + // Simplistic target-independent heuristic: assume that loads take + // extra time. + if (InstrItins.isEmpty()) + if (SU->getInstr()->getDesc().mayLoad()) + SU->Latency += 2; +} + +void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { + SU->getInstr()->dump(); +} + +std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream oss(s); + if (SU == &EntrySU) + oss << "<entry>"; + else if (SU == &ExitSU) + oss << "<exit>"; + else + SU->getInstr()->print(oss); + return oss.str(); +} + +// EmitSchedule - Emit the machine code in scheduled order. +MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { + // For MachineInstr-based scheduling, we're rescheduling the instructions in + // the block, so start by removing them from the block. + while (Begin != InsertPos) { + MachineBasicBlock::iterator I = Begin; + ++Begin; + BB->remove(I); + } + + // Then re-insert them according to the given schedule. + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + EmitNoop(); + continue; + } + + BB->insert(InsertPos, SU->getInstr()); + } + + // Update the Begin iterator, as the first instruction in the block + // may have been scheduled later. + if (!Sequence.empty()) + Begin = Sequence[0]->getInstr(); + + return BB; +} diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h new file mode 100644 index 0000000..00d6268 --- /dev/null +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -0,0 +1,184 @@ +//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScheduleDAGInstrs class, which implements +// scheduling for a MachineInstr-based dependency graph. +// +//===----------------------------------------------------------------------===// + +#ifndef SCHEDULEDAGINSTRS_H +#define SCHEDULEDAGINSTRS_H + +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <map> + +namespace llvm { + class MachineLoopInfo; + class MachineDominatorTree; + + /// LoopDependencies - This class analyzes loop-oriented register + /// dependencies, which are used to guide scheduling decisions. + /// For example, loop induction variable increments should be + /// scheduled as soon as possible after the variable's last use. + /// + class VISIBILITY_HIDDEN LoopDependencies { + const MachineLoopInfo &MLI; + const MachineDominatorTree &MDT; + + public: + typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> > + LoopDeps; + LoopDeps Deps; + + LoopDependencies(const MachineLoopInfo &mli, + const MachineDominatorTree &mdt) : + MLI(mli), MDT(mdt) {} + + /// VisitLoop - Clear out any previous state and analyze the given loop. + /// + void VisitLoop(const MachineLoop *Loop) { + Deps.clear(); + MachineBasicBlock *Header = Loop->getHeader(); + SmallSet<unsigned, 8> LoopLiveIns; + for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(), + LE = Header->livein_end(); LI != LE; ++LI) + LoopLiveIns.insert(*LI); + + const MachineDomTreeNode *Node = MDT.getNode(Header); + const MachineBasicBlock *MBB = Node->getBlock(); + assert(Loop->contains(MBB) && + "Loop does not contain header!"); + VisitRegion(Node, MBB, Loop, LoopLiveIns); + } + + private: + void VisitRegion(const MachineDomTreeNode *Node, + const MachineBasicBlock *MBB, + const MachineLoop *Loop, + const SmallSet<unsigned, 8> &LoopLiveIns) { + unsigned Count = 0; + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I, ++Count) { + const MachineInstr *MI = I; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (LoopLiveIns.count(MOReg)) + Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); + } + } + + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + for (std::vector<MachineDomTreeNode*>::const_iterator I = + Children.begin(), E = Children.end(); I != E; ++I) { + const MachineDomTreeNode *ChildNode = *I; + MachineBasicBlock *ChildBlock = ChildNode->getBlock(); + if (Loop->contains(ChildBlock)) + VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns); + } + } + }; + + /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of + /// MachineInstrs. + class VISIBILITY_HIDDEN ScheduleDAGInstrs : public ScheduleDAG { + const MachineLoopInfo &MLI; + const MachineDominatorTree &MDT; + + /// Defs, Uses - Remember where defs and uses of each physical register + /// are as we iterate upward through the instructions. This is allocated + /// here instead of inside BuildSchedGraph to avoid the need for it to be + /// initialized and destructed for each block. + std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister]; + + /// PendingLoads - Remember where unknown loads are after the most recent + /// unknown store, as we iterate. As with Defs and Uses, this is here + /// to minimize construction/destruction. + std::vector<SUnit *> PendingLoads; + + /// LoopRegs - Track which registers are used for loop-carried dependencies. + /// + LoopDependencies LoopRegs; + + /// LoopLiveInRegs - Track which regs are live into a loop, to help guide + /// back-edge-aware scheduling. + /// + SmallSet<unsigned, 8> LoopLiveInRegs; + + public: + MachineBasicBlock *BB; // Current basic block + MachineBasicBlock::iterator Begin; // The beginning of the range to + // be scheduled. The range extends + // to InsertPos. + unsigned InsertPosIndex; // The index in BB of InsertPos. + + explicit ScheduleDAGInstrs(MachineFunction &mf, + const MachineLoopInfo &mli, + const MachineDominatorTree &mdt); + + virtual ~ScheduleDAGInstrs() {} + + /// NewSUnit - Creates a new SUnit and return a ptr to it. + /// + SUnit *NewSUnit(MachineInstr *MI) { +#ifndef NDEBUG + const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0]; +#endif + SUnits.push_back(SUnit(MI, (unsigned)SUnits.size())); + assert((Addr == 0 || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + return &SUnits.back(); + } + + /// Run - perform scheduling. + /// + void Run(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endindex); + + /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are + /// input. + virtual void BuildSchedGraph(); + + /// ComputeLatency - Compute node latency. + /// + virtual void ComputeLatency(SUnit *SU); + + virtual MachineBasicBlock *EmitSchedule(); + + /// StartBlock - Prepare to perform scheduling in the given block. + /// + virtual void StartBlock(MachineBasicBlock *BB); + + /// Schedule - Order nodes according to selected style, filling + /// in the Sequence member. + /// + virtual void Schedule() = 0; + + /// FinishBlock - Clean up after scheduling in the given block. + /// + virtual void FinishBlock(); + + virtual void dumpNode(const SUnit *SU) const; + + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + }; +} + +#endif diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp new file mode 100644 index 0000000..594c24d --- /dev/null +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -0,0 +1,97 @@ +//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" +#include <fstream> +using namespace llvm; + +namespace llvm { + template<> + struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits { + static std::string getGraphName(const ScheduleDAG *G) { + return G->MF.getFunction()->getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool hasNodeAddressLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + return true; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, + SUnitIterator EI) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + + + static std::string getNodeLabel(const SUnit *Node, + const ScheduleDAG *Graph); + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(ScheduleDAG *G, + GraphWriter<ScheduleDAG*> &GW) { + return G->addCustomGraphFeatures(GW); + } + }; +} + +std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU, + const ScheduleDAG *G) { + return G->getGraphNodeLabel(SU); +} + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void ScheduleDAG::viewGraph() { +// This code is only for debugging! +#ifndef NDEBUG + if (BB->getBasicBlock()) + ViewGraph(this, "dag." + MF.getFunction()->getName(), + "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' + + BB->getBasicBlock()->getName()); + else + ViewGraph(this, "dag." + MF.getFunction()->getName(), + "Scheduling-Units Graph for " + MF.getFunction()->getName()); +#else + cerr << "ScheduleDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt new file mode 100644 index 0000000..9ea59ea --- /dev/null +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -0,0 +1,22 @@ +add_llvm_library(LLVMSelectionDAG + CallingConvLower.cpp + DAGCombiner.cpp + FastISel.cpp + LegalizeDAG.cpp + LegalizeFloatTypes.cpp + LegalizeIntegerTypes.cpp + LegalizeTypes.cpp + LegalizeTypesGeneric.cpp + LegalizeVectorOps.cpp + LegalizeVectorTypes.cpp + ScheduleDAGSDNodes.cpp + ScheduleDAGSDNodesEmit.cpp + ScheduleDAGFast.cpp + ScheduleDAGList.cpp + ScheduleDAGRRList.cpp + SelectionDAGBuild.cpp + SelectionDAG.cpp + SelectionDAGISel.cpp + SelectionDAGPrinter.cpp + TargetLowering.cpp + ) diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp new file mode 100644 index 0000000..7cd2b73 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp @@ -0,0 +1,148 @@ +//===-- CallingConvLower.cpp - Calling Conventions ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CCState class, used for lowering and implementing +// calling conventions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm, + SmallVector<CCValAssign, 16> &locs) + : CallingConv(CC), IsVarArg(isVarArg), TM(tm), + TRI(*TM.getRegisterInfo()), Locs(locs) { + // No stack is used. + StackOffset = 0; + + UsedRegs.resize((TRI.getNumRegs()+31)/32); +} + +// HandleByVal - Allocate a stack slot large enough to pass an argument by +// value. The size and alignment information of the argument is encoded in its +// parameter attribute. +void CCState::HandleByVal(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, + ISD::ArgFlagsTy ArgFlags) { + unsigned Align = ArgFlags.getByValAlign(); + unsigned Size = ArgFlags.getByValSize(); + if (MinSize > (int)Size) + Size = MinSize; + if (MinAlign > (int)Align) + Align = MinAlign; + unsigned Offset = AllocateStack(Size, Align); + + addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); +} + +/// MarkAllocated - Mark a register and all of its aliases as allocated. +void CCState::MarkAllocated(unsigned Reg) { + UsedRegs[Reg/32] |= 1 << (Reg&31); + + if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) + for (; (Reg = *RegAliases); ++RegAliases) + UsedRegs[Reg/32] |= 1 << (Reg&31); +} + +/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, +/// incorporating info about the formals into this state. +void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) { + unsigned NumArgs = TheArgs->getNumValues()-1; + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = TheArgs->getValueType(i); + ISD::ArgFlagsTy ArgFlags = + cast<ARG_FLAGSSDNode>(TheArgs->getOperand(3+i))->getArgFlags(); + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { + cerr << "Formal argument #" << i << " has unhandled type " + << ArgVT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeReturn - Analyze the returned values of an ISD::RET node, +/// incorporating info about the result values into this state. +void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) { + MVT VT = TheRet->getOperand(i*2+1).getValueType(); + ISD::ArgFlagsTy ArgFlags = + cast<ARG_FLAGSSDNode>(TheRet->getOperand(i*2+2))->getArgFlags(); + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)){ + cerr << "Return operand #" << i << " has unhandled type " + << VT.getMVTString() << "\n"; + abort(); + } + } +} + + +/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info +/// about the passed values into this state. +void CCState::AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn) { + unsigned NumOps = TheCall->getNumArgs(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = TheCall->getArg(i).getValueType(); + ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i); + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { + cerr << "Call operand #" << i << " has unhandled type " + << ArgVT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallOperands - Same as above except it takes vectors of types +/// and argument flags. +void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + CCAssignFn Fn) { + unsigned NumOps = ArgVTs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = ArgVTs[i]; + ISD::ArgFlagsTy ArgFlags = Flags[i]; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { + cerr << "Call operand #" << i << " has unhandled type " + << ArgVT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn) { + for (unsigned i = 0, e = TheCall->getNumRetVals(); i != e; ++i) { + MVT VT = TheCall->getRetValType(i); + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (TheCall->isInreg()) + Flags.setInReg(); + if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { + cerr << "Call result #" << i << " has unhandled type " + << VT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Same as above except it's specialized for calls which +/// produce a single value. +void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { + if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { + cerr << "Call result has unhandled type " + << VT.getMVTString() << "\n"; + abort(); + } +} diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp new file mode 100644 index 0000000..4c1710d --- /dev/null +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -0,0 +1,6203 @@ +//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run +// both before and after the DAG is legalized. +// +// This pass is not a substitute for the LLVM IR instcombine pass. This pass is +// primarily intended to handle simplification opportunities that are implicit +// in the LLVM IR and exposed by the various codegen lowering phases. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dagcombine" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <set> +using namespace llvm; + +STATISTIC(NodesCombined , "Number of dag nodes combined"); +STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); +STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); +STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); + +namespace { + static cl::opt<bool> + CombinerAA("combiner-alias-analysis", cl::Hidden, + cl::desc("Turn on alias analysis during testing")); + + static cl::opt<bool> + CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, + cl::desc("Include global information in alias analysis")); + +//------------------------------ DAGCombiner ---------------------------------// + + class VISIBILITY_HIDDEN DAGCombiner { + SelectionDAG &DAG; + const TargetLowering &TLI; + CombineLevel Level; + CodeGenOpt::Level OptLevel; + bool LegalOperations; + bool LegalTypes; + + // Worklist of all of the nodes that need to be simplified. + std::vector<SDNode*> WorkList; + + // AA - Used for DAG load/store alias analysis. + AliasAnalysis &AA; + + /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// the instruction to the work lists because they might get more simplified + /// now. + /// + void AddUsersToWorkList(SDNode *N) { + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + AddToWorkList(*UI); + } + + /// visit - call the node-specific routine that knows how to fold each + /// particular type of node. + SDValue visit(SDNode *N); + + public: + /// AddToWorkList - Add to the work list making sure it's instance is at the + /// the back (next to be processed.) + void AddToWorkList(SDNode *N) { + removeFromWorkList(N); + WorkList.push_back(N); + } + + /// removeFromWorkList - remove all instances of N from the worklist. + /// + void removeFromWorkList(SDNode *N) { + WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N), + WorkList.end()); + } + + SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo = true); + + SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { + return CombineTo(N, &Res, 1, AddTo); + } + + SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, + bool AddTo = true) { + SDValue To[] = { Res0, Res1 }; + return CombineTo(N, To, 2, AddTo); + } + + void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); + + private: + + /// SimplifyDemandedBits - Check the specified integer node value to see if + /// it can be simplified or if things it uses can be simplified by bit + /// propagation. If so, return true. + bool SimplifyDemandedBits(SDValue Op) { + APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits()); + return SimplifyDemandedBits(Op, Demanded); + } + + bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); + + bool CombineToPreIndexedLoadStore(SDNode *N); + bool CombineToPostIndexedLoadStore(SDNode *N); + + + /// combine - call the node-specific routine that knows how to fold each + /// particular type of node. If that doesn't do anything, try the + /// target-specific DAG combines. + SDValue combine(SDNode *N); + + // Visitation implementation - Implement dag node combining for different + // node types. The semantics are as follows: + // Return Value: + // SDValue.getNode() == 0 - No change was made + // SDValue.getNode() == N - N was replaced, is dead and has been handled. + // otherwise - N should be replaced by the returned Operand. + // + SDValue visitTokenFactor(SDNode *N); + SDValue visitMERGE_VALUES(SDNode *N); + SDValue visitADD(SDNode *N); + SDValue visitSUB(SDNode *N); + SDValue visitADDC(SDNode *N); + SDValue visitADDE(SDNode *N); + SDValue visitMUL(SDNode *N); + SDValue visitSDIV(SDNode *N); + SDValue visitUDIV(SDNode *N); + SDValue visitSREM(SDNode *N); + SDValue visitUREM(SDNode *N); + SDValue visitMULHU(SDNode *N); + SDValue visitMULHS(SDNode *N); + SDValue visitSMUL_LOHI(SDNode *N); + SDValue visitUMUL_LOHI(SDNode *N); + SDValue visitSDIVREM(SDNode *N); + SDValue visitUDIVREM(SDNode *N); + SDValue visitAND(SDNode *N); + SDValue visitOR(SDNode *N); + SDValue visitXOR(SDNode *N); + SDValue SimplifyVBinOp(SDNode *N); + SDValue visitSHL(SDNode *N); + SDValue visitSRA(SDNode *N); + SDValue visitSRL(SDNode *N); + SDValue visitCTLZ(SDNode *N); + SDValue visitCTTZ(SDNode *N); + SDValue visitCTPOP(SDNode *N); + SDValue visitSELECT(SDNode *N); + SDValue visitSELECT_CC(SDNode *N); + SDValue visitSETCC(SDNode *N); + SDValue visitSIGN_EXTEND(SDNode *N); + SDValue visitZERO_EXTEND(SDNode *N); + SDValue visitANY_EXTEND(SDNode *N); + SDValue visitSIGN_EXTEND_INREG(SDNode *N); + SDValue visitTRUNCATE(SDNode *N); + SDValue visitBIT_CONVERT(SDNode *N); + SDValue visitBUILD_PAIR(SDNode *N); + SDValue visitFADD(SDNode *N); + SDValue visitFSUB(SDNode *N); + SDValue visitFMUL(SDNode *N); + SDValue visitFDIV(SDNode *N); + SDValue visitFREM(SDNode *N); + SDValue visitFCOPYSIGN(SDNode *N); + SDValue visitSINT_TO_FP(SDNode *N); + SDValue visitUINT_TO_FP(SDNode *N); + SDValue visitFP_TO_SINT(SDNode *N); + SDValue visitFP_TO_UINT(SDNode *N); + SDValue visitFP_ROUND(SDNode *N); + SDValue visitFP_ROUND_INREG(SDNode *N); + SDValue visitFP_EXTEND(SDNode *N); + SDValue visitFNEG(SDNode *N); + SDValue visitFABS(SDNode *N); + SDValue visitBRCOND(SDNode *N); + SDValue visitBR_CC(SDNode *N); + SDValue visitLOAD(SDNode *N); + SDValue visitSTORE(SDNode *N); + SDValue visitINSERT_VECTOR_ELT(SDNode *N); + SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); + SDValue visitBUILD_VECTOR(SDNode *N); + SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitVECTOR_SHUFFLE(SDNode *N); + + SDValue XformToShuffleWithZero(SDNode *N); + SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); + + SDValue visitShiftByConstant(SDNode *N, unsigned Amt); + + bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); + SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); + SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, + bool NotExtCompare = false); + SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + DebugLoc DL, bool foldBooleans = true); + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp); + SDValue CombineConsecutiveLoads(SDNode *N, MVT VT); + SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT); + SDValue BuildSDIV(SDNode *N); + SDValue BuildUDIV(SDNode *N); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); + SDValue ReduceLoadWidth(SDNode *N); + SDValue ReduceLoadOpStoreWidth(SDNode *N); + + SDValue GetDemandedBits(SDValue V, const APInt &Mask); + + /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, + /// looking for aliasing nodes and adding them to the Aliases vector. + void GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVector<SDValue, 8> &Aliases); + + /// isAlias - Return true if there is any possibility that the two addresses + /// overlap. + bool isAlias(SDValue Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + SDValue Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2) const; + + /// FindAliasInfo - Extracts the relevant alias information from the memory + /// node. Returns true if the operand was a load. + bool FindAliasInfo(SDNode *N, + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, int &SrcValueOffset) const; + + /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, + /// looking for a better chain (aliasing node.) + SDValue FindBetterChain(SDNode *N, SDValue Chain); + + /// getShiftAmountTy - Returns a type large enough to hold any valid + /// shift amount - before type legalization these can be huge. + MVT getShiftAmountTy() { + return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); + } + +public: + DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) + : DAG(D), + TLI(D.getTargetLoweringInfo()), + Level(Unrestricted), + OptLevel(OL), + LegalOperations(false), + LegalTypes(false), + AA(A) {} + + /// Run - runs the dag combiner on all nodes in the work list + void Run(CombineLevel AtLevel); + }; +} + + +namespace { +/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted +/// nodes from the worklist. +class VISIBILITY_HIDDEN WorkListRemover : + public SelectionDAG::DAGUpdateListener { + DAGCombiner &DC; +public: + explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + DC.removeFromWorkList(N); + } + + virtual void NodeUpdated(SDNode *N) { + // Ignore updates. + } +}; +} + +//===----------------------------------------------------------------------===// +// TargetLowering::DAGCombinerInfo implementation +//===----------------------------------------------------------------------===// + +void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { + ((DAGCombiner*)DC)->AddToWorkList(N); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); +} + + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); +} + +void TargetLowering::DAGCombinerInfo:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); +} + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// isNegatibleForFree - Return 1 if we can compute the negated form of the +/// specified expression for the same cost as the expression itself, or 2 if we +/// can compute the negated form more cheaply than the expression itself. +static char isNegatibleForFree(SDValue Op, bool LegalOperations, + unsigned Depth = 0) { + // No compile time optimizations on this type. + if (Op.getValueType() == MVT::ppcf128) + return 0; + + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return 2; + + // Don't allow anything with multiple uses. + if (!Op.hasOneUse()) return 0; + + // Don't recurse exponentially. + if (Depth > 6) return 0; + + switch (Op.getOpcode()) { + default: return false; + case ISD::ConstantFP: + // Don't invert constant FP values after legalize. The negated constant + // isn't necessarily legal. + return LegalOperations ? 0 : 1; + case ISD::FADD: + // FIXME: determine better conditions for this xform. + if (!UnsafeFPMath) return 0; + + // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!UnsafeFPMath) return 0; + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return 1; + + case ISD::FMUL: + case ISD::FDIV: + if (HonorSignDependentRoundingFPMath()) return 0; + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return V; + + return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1); + } +} + +/// GetNegatedExpression - If isNegatibleForFree returns true, this function +/// returns the newly negated expression. +static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, unsigned Depth = 0) { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); + + // Don't allow anything with multiple uses. + assert(Op.hasOneUse() && "Unknown reuse!"); + + assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); + switch (Op.getOpcode()) { + default: assert(0 && "Unknown code"); + case ISD::ConstantFP: { + APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, Op.getValueType()); + } + case ISD::FADD: + // FIXME: determine better conditions for this xform. + assert(UnsafeFPMath); + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1), + Op.getOperand(0)); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + assert(UnsafeFPMath); + + // fold (fneg (fsub 0, B)) -> B + if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) + if (N0CFP->getValueAPF().isZero()) + return Op.getOperand(1); + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(1), Op.getOperand(0)); + + case ISD::FMUL: + case ISD::FDIV: + assert(!HonorSignDependentRoundingFPMath()); + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(0), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1)); + + case ISD::FP_EXTEND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1)); + case ISD::FP_ROUND: + return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + } +} + + +// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc +// that selects between the values 1 and 0, making it equivalent to a setcc. +// Also, set the incoming LHS, RHS, and CC references to the appropriate +// nodes based on the type of node we are checking. This simplifies life a +// bit for the callers. +static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) { + if (N.getOpcode() == ISD::SETCC) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(2); + return true; + } + if (N.getOpcode() == ISD::SELECT_CC && + N.getOperand(2).getOpcode() == ISD::Constant && + N.getOperand(3).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && + cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; + } + return false; +} + +// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only +// one use. If this is true, it allows the users to invert the operation for +// free when it is profitable to do so. +static bool isOneUseSetCC(SDValue N) { + SDValue N0, N1, N2; + if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) + return true; + return false; +} + +SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, + SDValue N0, SDValue N1) { + MVT VT = N0.getValueType(); + if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { + if (isa<ConstantSDNode>(N1)) { + // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) + SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, VT, + cast<ConstantSDNode>(N0.getOperand(1)), + cast<ConstantSDNode>(N1)); + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + } else if (N0.hasOneUse()) { + // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use + SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + N0.getOperand(0), N1); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + } + } + + if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { + if (isa<ConstantSDNode>(N0)) { + // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) + SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, VT, + cast<ConstantSDNode>(N1.getOperand(1)), + cast<ConstantSDNode>(N0)); + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + } else if (N1.hasOneUse()) { + // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use + SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + N1.getOperand(0), N0); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo) { + assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); + ++NodesCombined; + DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG)); + DOUT << " and " << NumTo-1 << " other values\n"; + DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i) + assert(N->getValueType(i) == To[i].getValueType() && + "Cannot combine value to value of different type!")); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesWith(N, To, &DeadNodes); + + if (AddTo) { + // Push the new nodes and any users onto the worklist + for (unsigned i = 0, e = NumTo; i != e; ++i) { + if (To[i].getNode()) { + AddToWorkList(To[i].getNode()); + AddUsersToWorkList(To[i].getNode()); + } + } + } + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) { + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + } + return SDValue(N, 0); +} + +void +DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & + TLO) { + // Replace all uses. If any nodes become isomorphic to other nodes and + // are deleted, make sure to remove them from our worklist. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); + + // Push the new node and any (possibly new) users onto the worklist. + AddToWorkList(TLO.New.getNode()); + AddUsersToWorkList(TLO.New.getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (TLO.Old.getNode()->use_empty()) { + removeFromWorkList(TLO.Old.getNode()); + + // If the operands of this node are only used by the node, they will now + // be dead. Make sure to visit them first to delete dead nodes early. + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) + if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) + AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); + + DAG.DeleteNode(TLO.Old.getNode()); + } +} + +/// SimplifyDemandedBits - Check the specified integer node value to see if +/// it can be simplified or if things it uses can be simplified by bit +/// propagation. If so, return true. +bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { + TargetLowering::TargetLoweringOpt TLO(DAG); + APInt KnownZero, KnownOne; + if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + return false; + + // Revisit the node. + AddToWorkList(Op.getNode()); + + // Replace the old value with the new one. + ++NodesCombined; + DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG)); + DOUT << '\n'; + + CommitTargetLoweringOpt(TLO); + return true; +} + +//===----------------------------------------------------------------------===// +// Main DAG Combiner implementation +//===----------------------------------------------------------------------===// + +void DAGCombiner::Run(CombineLevel AtLevel) { + // set the instance variables, so that the various visit routines may use it. + Level = AtLevel; + LegalOperations = Level >= NoIllegalOperations; + LegalTypes = Level >= NoIllegalTypes; + + // Add all the dag nodes to the worklist. + WorkList.reserve(DAG.allnodes_size()); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) + WorkList.push_back(I); + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + + // The root of the dag may dangle to deleted nodes until the dag combiner is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // while the worklist isn't empty, inspect the node on the end of it and + // try and combine it. + while (!WorkList.empty()) { + SDNode *N = WorkList.back(); + WorkList.pop_back(); + + // If N has no uses, it is dead. Make sure to revisit all N's operands once + // N is deleted from the DAG, since they too may now be dead or may have a + // reduced number of uses, allowing other xforms. + if (N->use_empty() && N != &Dummy) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + AddToWorkList(N->getOperand(i).getNode()); + + DAG.DeleteNode(N); + continue; + } + + SDValue RV = combine(N); + + if (RV.getNode() == 0) + continue; + + ++NodesCombined; + + // If we get back the same node we passed in, rather than a new node or + // zero, we know that the node must have defined multiple values and + // CombineTo was used. Since CombineTo takes care of the worklist + // mechanics for us, we have no work to do in this case. + if (RV.getNode() == N) + continue; + + assert(N->getOpcode() != ISD::DELETED_NODE && + RV.getNode()->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned new node!"); + + DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG)); + DOUT << '\n'; + WorkListRemover DeadNodes(*this); + if (N->getNumValues() == RV.getNode()->getNumValues()) + DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); + else { + assert(N->getValueType(0) == RV.getValueType() && + N->getNumValues() == 1 && "Type mismatch"); + SDValue OpV = RV; + DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes); + } + + // Push the new node and any users onto the worklist + AddToWorkList(RV.getNode()); + AddUsersToWorkList(RV.getNode()); + + // Add any uses of the old node to the worklist in case this node is the + // last one that uses them. They may become dead after this node is + // deleted. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + AddToWorkList(N->getOperand(i).getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) { + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + } + } + + // If the root changed (e.g. it was a dead load, update the root). + DAG.setRoot(Dummy.getValue()); +} + +SDValue DAGCombiner::visit(SDNode *N) { + switch(N->getOpcode()) { + default: break; + case ISD::TokenFactor: return visitTokenFactor(N); + case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); + case ISD::ADD: return visitADD(N); + case ISD::SUB: return visitSUB(N); + case ISD::ADDC: return visitADDC(N); + case ISD::ADDE: return visitADDE(N); + case ISD::MUL: return visitMUL(N); + case ISD::SDIV: return visitSDIV(N); + case ISD::UDIV: return visitUDIV(N); + case ISD::SREM: return visitSREM(N); + case ISD::UREM: return visitUREM(N); + case ISD::MULHU: return visitMULHU(N); + case ISD::MULHS: return visitMULHS(N); + case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); + case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); + case ISD::SDIVREM: return visitSDIVREM(N); + case ISD::UDIVREM: return visitUDIVREM(N); + case ISD::AND: return visitAND(N); + case ISD::OR: return visitOR(N); + case ISD::XOR: return visitXOR(N); + case ISD::SHL: return visitSHL(N); + case ISD::SRA: return visitSRA(N); + case ISD::SRL: return visitSRL(N); + case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTPOP: return visitCTPOP(N); + case ISD::SELECT: return visitSELECT(N); + case ISD::SELECT_CC: return visitSELECT_CC(N); + case ISD::SETCC: return visitSETCC(N); + case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); + case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); + case ISD::ANY_EXTEND: return visitANY_EXTEND(N); + case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::TRUNCATE: return visitTRUNCATE(N); + case ISD::BIT_CONVERT: return visitBIT_CONVERT(N); + case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); + case ISD::FADD: return visitFADD(N); + case ISD::FSUB: return visitFSUB(N); + case ISD::FMUL: return visitFMUL(N); + case ISD::FDIV: return visitFDIV(N); + case ISD::FREM: return visitFREM(N); + case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); + case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); + case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); + case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); + case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); + case ISD::FP_ROUND: return visitFP_ROUND(N); + case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); + case ISD::FP_EXTEND: return visitFP_EXTEND(N); + case ISD::FNEG: return visitFNEG(N); + case ISD::FABS: return visitFABS(N); + case ISD::BRCOND: return visitBRCOND(N); + case ISD::BR_CC: return visitBR_CC(N); + case ISD::LOAD: return visitLOAD(N); + case ISD::STORE: return visitSTORE(N); + case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); + case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); + case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); + case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + } + return SDValue(); +} + +SDValue DAGCombiner::combine(SDNode *N) { + SDValue RV = visit(N); + + // If nothing happened, try a target-specific DAG combine. + if (RV.getNode() == 0) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned NULL!"); + + if (N->getOpcode() >= ISD::BUILTIN_OP_END || + TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { + + // Expose the DAG combiner to the target combiner impls. + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level == Unrestricted, false, this); + + RV = TLI.PerformDAGCombine(N, DagCombineInfo); + } + } + + // If N is a commutative binary node, try commuting it to enable more + // sdisel CSE. + if (RV.getNode() == 0 && + SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + N->getNumValues() == 1) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Constant operands are canonicalized to RHS. + if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { + SDValue Ops[] = { N1, N0 }; + SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), + Ops, 2); + if (CSENode) + return SDValue(CSENode, 0); + } + } + + return RV; +} + +/// getInputChainForNode - Given a node, return its input chain if it has one, +/// otherwise return a null sd operand. +static SDValue getInputChainForNode(SDNode *N) { + if (unsigned NumOps = N->getNumOperands()) { + if (N->getOperand(0).getValueType() == MVT::Other) + return N->getOperand(0); + else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + return N->getOperand(NumOps-1); + for (unsigned i = 1; i < NumOps-1; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + return N->getOperand(i); + } + return SDValue(); +} + +SDValue DAGCombiner::visitTokenFactor(SDNode *N) { + // If N has two operands, where one has an input chain equal to the other, + // the 'other' chain is redundant. + if (N->getNumOperands() == 2) { + if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) + return N->getOperand(0); + if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) + return N->getOperand(1); + } + + SmallVector<SDNode *, 8> TFs; // List of token factors to visit. + SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. + SmallPtrSet<SDNode*, 16> SeenOps; + bool Changed = false; // If we should replace this token factor. + + // Start out with this token factor. + TFs.push_back(N); + + // Iterate through token factors. The TFs grows when new token factors are + // encountered. + for (unsigned i = 0; i < TFs.size(); ++i) { + SDNode *TF = TFs[i]; + + // Check each of the operands. + for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { + SDValue Op = TF->getOperand(i); + + switch (Op.getOpcode()) { + case ISD::EntryToken: + // Entry tokens don't need to be added to the list. They are + // rededundant. + Changed = true; + break; + + case ISD::TokenFactor: + if ((CombinerAA || Op.hasOneUse()) && + std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { + // Queue up for processing. + TFs.push_back(Op.getNode()); + // Clean up in case the token factor is removed. + AddToWorkList(Op.getNode()); + Changed = true; + break; + } + // Fall thru + + default: + // Only add if it isn't already in the list. + if (SeenOps.insert(Op.getNode())) + Ops.push_back(Op); + else + Changed = true; + break; + } + } + } + + SDValue Result; + + // If we've change things around then replace token factor. + if (Changed) { + if (Ops.empty()) { + // The entry token is the only possible outcome. + Result = DAG.getEntryNode(); + } else { + // New and improved token factor. + Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, &Ops[0], Ops.size()); + } + + // Don't add users to work list. + return CombineTo(N, Result, false); + } + + return Result; +} + +/// MERGE_VALUES can always be eliminated. +SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { + WorkListRemover DeadNodes(*this); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), + &DeadNodes); + removeFromWorkList(N); + DAG.DeleteNode(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +static +SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, + SelectionDAG &DAG) { + MVT VT = N0.getValueType(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); + + if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && + isa<ConstantSDNode>(N00.getOperand(1))) { + // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) + N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, + N00.getOperand(0), N01), + DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, + N00.getOperand(1), N01)); + return DAG.getNode(ISD::ADD, DL, VT, N0, N1); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (add x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (add c1, c2) -> c1+c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); + // fold (add x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && + GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), VT, + GA->getOffset() + + (uint64_t)N1C->getSExtValue()); + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N1C && N0.getOpcode() == ISD::SUB) + if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(N1C->getAPIntValue()+ + N0C->getAPIntValue(), VT), + N0.getOperand(1)); + // reassociate add + SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); + if (RADD.getNode() != 0) + return RADD; + // fold ((0-A) + B) -> B-A + if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && + cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); + // fold (A + (0-B)) -> A-B + if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && + cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); + // fold (A+(B-A)) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) + return N1.getOperand(0); + // fold ((B-A)+A) -> B + if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) + return N0.getOperand(0); + // fold (A+(B-(A+C))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(0)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(1)); + // fold (A+(B-(C+A))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(1)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(0)); + // fold (A+((B-A)+or-C)) to (B+or-C) + if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && + N1.getOperand(0).getOpcode() == ISD::SUB && + N0 == N1.getOperand(0).getOperand(1)) + return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, + N1.getOperand(0).getOperand(0), N1.getOperand(1)); + + // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + + if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), + DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); + } + + if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (a+b) -> (a|b) iff a and b share no bits. + if (VT.isInteger() && !VT.isVector()) { + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || + (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); + } + } + + // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) + if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { + SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); + if (Result.getNode()) return Result; + } + if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { + SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); + if (Result.getNode()) return Result; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + + // If the flag result is dead, turn this into an ADD. + if (N->hasNUsesOfValue(0, 1)) + return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + + // canonicalize constant to RHS. + if (N0C && !N1C) + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + + // fold (addc x, 0) -> x + no carry out + if (N1C && N1C->isNullValue()) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + + // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || + (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), + N1, N0, CarryIn); + + // fold (adde x, y, false) -> (addc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (sub x, x) -> 0 + if (N0 == N1) + return DAG.getConstant(0, N->getValueType(0)); + // fold (sub c1, c2) -> c1-c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); + // fold (sub x, c) -> (add x, -c) + if (N1C) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, + DAG.getConstant(-N1C->getAPIntValue(), VT)); + // fold (A+B)-A -> B + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) + return N0.getOperand(1); + // fold (A+B)-B -> A + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) + return N0.getOperand(0); + // fold ((A+(B+or-C))-B) -> A+or-C + if (N0.getOpcode() == ISD::ADD && + (N0.getOperand(1).getOpcode() == ISD::SUB || + N0.getOperand(1).getOpcode() == ISD::ADD) && + N0.getOperand(1).getOperand(0) == N1) + return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(1)); + // fold ((A+(C+B))-B) -> A+C + if (N0.getOpcode() == ISD::ADD && + N0.getOperand(1).getOpcode() == ISD::ADD && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + // fold ((A-(B-C))-C) -> A-B + if (N0.getOpcode() == ISD::SUB && + N0.getOperand(1).getOpcode() == ISD::SUB && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + + // If either operand of a sub is undef, the result is undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + // If the relocation model supports it, consider symbol offsets. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { + // fold (sub Sym, c) -> Sym-c + if (N1C && GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), VT, + GA->getOffset() - + (uint64_t)N1C->getSExtValue()); + // fold (sub Sym+c1, Sym+c2) -> c1-c2 + if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) + if (GA->getGlobal() == GB->getGlobal()) + return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), + VT); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (mul c1, c2) -> c1*c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); + // fold (mul x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mul x, -1) -> 0-x + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), N0); + // fold (mul x, (1 << c)) -> x << c + if (N1C && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), + getShiftAmountTy())); + // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c + if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { + unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); + // FIXME: If the input is something that is easily negated (e.g. a + // single-use add), we should put the negate there. + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getConstant(Log2Val, getShiftAmountTy()))); + } + // (mul (shl X, c1), c2) -> (mul X, c2 << c1) + if (N1C && N0.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N0.getOperand(1))) { + SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + N1, N0.getOperand(1)); + AddToWorkList(C3.getNode()); + return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + N0.getOperand(0), C3); + } + + // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one + // use. + { + SDValue Sh(0,0), Y(0,0); + // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). + if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getNode()->hasOneUse()) { + Sh = N0; Y = N1; + } else if (N1.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N1.getOperand(1)) && + N1.getNode()->hasOneUse()) { + Sh = N1; Y = N0; + } + + if (Sh.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + Mul, Sh.getOperand(1)); + } + } + + // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) + if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, + N0.getOperand(1), N1)); + + // reassociate mul + SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); + if (RMUL.getNode() != 0) + return RMUL; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (sdiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); + // fold (sdiv X, 1) -> X + if (N1C && N1C->getSExtValue() == 1LL) + return N0; + // fold (sdiv X, -1) -> 0-X + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), N0); + // If we know the sign bits of both operands are zero, strength reduce to a + // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), + N0, N1); + } + // fold (sdiv X, pow2) -> simple ops after legalize + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() && + (isPowerOf2_64(N1C->getSExtValue()) || + isPowerOf2_64(-N1C->getSExtValue()))) { + // If dividing by powers of two is cheap, then don't perform the following + // fold. + if (TLI.isPow2DivCheap()) + return SDValue(); + + int64_t pow2 = N1C->getSExtValue(); + int64_t abs2 = pow2 > 0 ? pow2 : -pow2; + unsigned lg2 = Log2_64(abs2); + + // Splat the sign bit into the register + SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + DAG.getConstant(VT.getSizeInBits()-1, + getShiftAmountTy())); + AddToWorkList(SGN.getNode()); + + // Add (N0 < 0) ? abs2 - 1 : 0; + SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, + DAG.getConstant(VT.getSizeInBits() - lg2, + getShiftAmountTy())); + SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); + AddToWorkList(SRL.getNode()); + AddToWorkList(ADD.getNode()); // Divide by pow2 + SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, + DAG.getConstant(lg2, getShiftAmountTy())); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (pow2 > 0) + return SRA; + + AddToWorkList(SRA.getNode()); + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), SRA); + } + + // if integer divide is expensive and we satisfy the requirements, emit an + // alternate sequence. + if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) && + !TLI.isIntDivCheap()) { + SDValue Op = BuildSDIV(N); + if (Op.getNode()) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (udiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); + // fold (udiv x, (1 << c)) -> x >>u c + if (N1C && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), + getShiftAmountTy())); + // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + MVT ADDVT = N1.getOperand(1).getValueType(); + SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, + N1.getOperand(1), + DAG.getConstant(SHC->getAPIntValue() + .logBase2(), + ADDVT)); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); + } + } + } + // fold (udiv x, c) -> alternate + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + SDValue Op = BuildUDIV(N); + if (Op.getNode()) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitSREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold (srem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); + // If we know the sign bits of both operands are zero, strength reduce to a + // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); + } + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + if (N1C && !N1C->isNullValue()) { + SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); + AddToWorkList(Div.getNode()); + SDValue OptimizedDiv = combine(Div.getNode()); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + AddToWorkList(Mul.getNode()); + return Sub; + } + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitUREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold (urem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); + // fold (urem x, pow2) -> (and x, pow2-1) + if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue()-1,VT)); + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + SDValue Add = + DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), + VT)); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); + } + } + } + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + if (N1C && !N1C->isNullValue()) { + SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); + AddToWorkList(Div.getNode()); + SDValue OptimizedDiv = combine(Div.getNode()); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + AddToWorkList(Mul.getNode()); + return Sub; + } + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHS(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold (mulhs x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhs x, 1) -> (sra x, size(x)-1) + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, + DAG.getConstant(N0.getValueType().getSizeInBits() - 1, + getShiftAmountTy())); + // fold (mulhs x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHU(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold (mulhu x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhu x, 1) -> 0 + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getConstant(0, N0.getValueType()); + // fold (mulhu x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + return SDValue(); +} + +/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that +/// compute two values. LoOp and HiOp give the opcodes for the two computations +/// that are being performed. Return true if a simplification was made. +/// +SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp) { + // If the high half is not needed, just compute the low half. + bool HiExists = N->hasAnyUseOfValue(1); + if (!HiExists && + (!LegalOperations || + TLI.isOperationLegal(LoOp, N->getValueType(0)))) { + SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + N->op_begin(), N->getNumOperands()); + return CombineTo(N, Res, Res); + } + + // If the low half is not needed, just compute the high half. + bool LoExists = N->hasAnyUseOfValue(0); + if (!LoExists && + (!LegalOperations || + TLI.isOperationLegal(HiOp, N->getValueType(1)))) { + SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + N->op_begin(), N->getNumOperands()); + return CombineTo(N, Res, Res); + } + + // If both halves are used, return as it is. + if (LoExists && HiExists) + return SDValue(); + + // If the two computed results can be simplified separately, separate them. + if (LoExists) { + SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + N->op_begin(), N->getNumOperands()); + AddToWorkList(Lo.getNode()); + SDValue LoOpt = combine(Lo.getNode()); + if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && + (!LegalOperations || + TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) + return CombineTo(N, LoOpt, LoOpt); + } + + if (HiExists) { + SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + N->op_begin(), N->getNumOperands()); + AddToWorkList(Hi.getNode()); + SDValue HiOpt = combine(Hi.getNode()); + if (HiOpt.getNode() && HiOpt != Hi && + (!LegalOperations || + TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) + return CombineTo(N, HiOpt, HiOpt); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIVREM(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIVREM(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); + if (Res.getNode()) return Res; + + return SDValue(); +} + +/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with +/// two operands of the same opcode, try to simplify it. +SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + MVT VT = N0.getValueType(); + assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); + + // For each of OP in AND/OR/XOR: + // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) + // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) + // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| + N0.getOpcode() == ISD::SIGN_EXTEND || + (N0.getOpcode() == ISD::TRUNCATE && + !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { + SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); + } + + // For each of OP in SHL/SRL/SRA/AND... + // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) + // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) + // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || + N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && + N0.getOperand(1) == N1.getOperand(1)) { + SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ORNode, N0.getOperand(1)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitAND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N1.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (and c1, c2) -> c1&c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); + // fold (and x, -1) -> x + if (N1C && N1C->isAllOnesValue()) + return N0; + // if (and x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(BitWidth))) + return DAG.getConstant(0, VT); + // reassociate and + SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); + if (RAND.getNode() != 0) + return RAND; + // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF + if (N1C && N0.getOpcode() == ISD::OR) + if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) + return N1; + // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N0Op0 = N0.getOperand(0); + APInt Mask = ~N1C->getAPIntValue(); + Mask.trunc(N0Op0.getValueSizeInBits()); + if (DAG.MaskedValueIsZero(N0Op0, Mask)) { + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), + N0.getValueType(), N0Op0); + + // Replace uses of the AND with uses of the Zero extend node. + CombineTo(N, Zext); + + // We actually want to replace all uses of the any_extend with the + // zero_extend, to avoid duplicating things. This will later cause this + // AND to be folded. + CombineTo(N0.getNode(), Zext); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { + SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + LL, LR, Result); + } + } + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT EVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT EVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (and (load x), 255) -> (zextload x, i8) + // fold (and (extload x, i16), 255) -> (zextload x, i8) + if (N1C && N0.getOpcode() == ISD::LOAD) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (LN0->getExtensionType() != ISD::SEXTLOAD && + LN0->isUnindexed() && N0.hasOneUse() && + // Do not change the width of a volatile load. + !LN0->isVolatile()) { + MVT EVT = MVT::Other; + uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); + if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())) + EVT = MVT::getIntegerVT(ActiveBits); + + MVT LoadedVT = LN0->getMemoryVT(); + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + MVT PtrType = N0.getOperand(1).getValueType(); + + // For big endian targets, we need to add an offset to the pointer to + // load the correct bytes. For little endian systems, we merely need to + // read fewer bytes from the same pointer. + unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8; + unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8; + unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; + unsigned Alignment = LN0->getAlignment(); + SDValue NewPtr = LN0->getBasePtr(); + + if (TLI.isBigEndian()) { + NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr, DAG.getConstant(PtrOff, PtrType)); + Alignment = MinAlign(Alignment, PtrOff); + } + + AddToWorkList(NewPtr.getNode()); + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(), + NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), + EVT, LN0->isVolatile(), Alignment); + AddToWorkList(N); + CombineTo(N0.getNode(), Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N1.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (or x, undef) -> -1 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(~0ULL, VT); + // fold (or c1, c2) -> c1|c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); + // fold (or x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (or x, -1) -> -1 + if (N1C && N1C->isAllOnesValue()) + return N1; + // fold (or x, c) -> c iff (x & ~c) == 0 + if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) + return N1; + // reassociate or + SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); + if (ROR.getNode() != 0) + return ROR; + // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) + if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) + // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) + // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + LL, LR, Result); + } + } + + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N1.getOperand(1).getOpcode() == ISD::Constant && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + const APInt &RHSMask = + cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, + DAG.getConstant(LHSMask | RHSMask, VT)); + } + } + + // See if this is some rotate idiom. + if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) + return SDValue(Rot, 0); + + return SDValue(); +} + +/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. +static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { + if (Op.getOpcode() == ISD::AND) { + if (isa<ConstantSDNode>(Op.getOperand(1))) { + Mask = Op.getOperand(1); + Op = Op.getOperand(0); + } else { + return false; + } + } + + if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { + Shift = Op; + return true; + } + + return false; +} + +// MatchRotate - Handle an 'or' of two operands. If this is one of the many +// idioms for rotate, and if the target supports rotation instructions, generate +// a rot[lr]. +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { + // Must be a legal type. Expanded 'n promoted things won't work with rotates. + MVT VT = LHS.getValueType(); + if (!TLI.isTypeLegal(VT)) return 0; + + // The target must have at least one rotate flavor. + bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); + bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); + if (!HasROTL && !HasROTR) return 0; + + // Match "(X shl/srl V1) & V2" where V2 may not be present. + SDValue LHSShift; // The shift. + SDValue LHSMask; // AND value if any. + if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) + return 0; // Not part of a rotate. + + SDValue RHSShift; // The shift. + SDValue RHSMask; // AND value if any. + if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) + return 0; // Not part of a rotate. + + if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) + return 0; // Not shifting the same value. + + if (LHSShift.getOpcode() == RHSShift.getOpcode()) + return 0; // Shifts must disagree. + + // Canonicalize shl to left side in a shl/srl pair. + if (RHSShift.getOpcode() == ISD::SHL) { + std::swap(LHS, RHS); + std::swap(LHSShift, RHSShift); + std::swap(LHSMask , RHSMask ); + } + + unsigned OpSizeInBits = VT.getSizeInBits(); + SDValue LHSShiftArg = LHSShift.getOperand(0); + SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftAmt = RHSShift.getOperand(1); + + // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) + // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + if (LHSShiftAmt.getOpcode() == ISD::Constant && + RHSShiftAmt.getOpcode() == ISD::Constant) { + uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); + uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); + if ((LShVal + RShVal) != OpSizeInBits) + return 0; + + SDValue Rot; + if (HasROTL) + Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); + else + Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); + + // If there is an AND of either shifted operand, apply it to the result. + if (LHSMask.getNode() || RHSMask.getNode()) { + APInt Mask = APInt::getAllOnesValue(OpSizeInBits); + + if (LHSMask.getNode()) { + APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); + Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; + } + if (RHSMask.getNode()) { + APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); + Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; + } + + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); + } + + return Rot.getNode(); + } + + // If there is a mask here, and we have a variable shift, we can't be sure + // that we're masking out the right stuff. + if (LHSMask.getNode() || RHSMask.getNode()) + return 0; + + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) + if (RHSShiftAmt.getOpcode() == ISD::SUB && + LHSShiftAmt == RHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + if (HasROTL) + return DAG.getNode(ISD::ROTL, DL, VT, + LHSShiftArg, LHSShiftAmt).getNode(); + else + return DAG.getNode(ISD::ROTR, DL, VT, + LHSShiftArg, RHSShiftAmt).getNode(); + } + } + } + + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) + if (LHSShiftAmt.getOpcode() == ISD::SUB && + RHSShiftAmt == LHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + if (HasROTR) + return DAG.getNode(ISD::ROTR, DL, VT, + LHSShiftArg, RHSShiftAmt).getNode(); + else + return DAG.getNode(ISD::ROTL, DL, VT, + LHSShiftArg, LHSShiftAmt).getNode(); + } + } + } + + // Look for sign/zext/any-extended or truncate cases: + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND + || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND + || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND + || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND + || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND + || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND + || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + SDValue LExtOp0 = LHSShiftAmt.getOperand(0); + SDValue RExtOp0 = RHSShiftAmt.getOperand(0); + if (RExtOp0.getOpcode() == ISD::SUB && + RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } + } + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotr x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, + LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); + } + } + } + } + + return 0; +} + +SDValue DAGCombiner::visitXOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LHS, RHS, CC; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (xor undef, undef) -> 0. This is a common idiom (misuse). + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (xor x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (xor c1, c2) -> c1^c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + // fold (xor x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // reassociate xor + SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); + if (RXOR.getNode() != 0) + return RXOR; + + // fold !(x cc y) -> (x !cc y) + if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { + bool isInt = LHS.getValueType().isInteger(); + ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + isInt); + + if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { + switch (N0.getOpcode()) { + default: + assert(0 && "Unhandled SetCC Equivalent!"); + abort(); + case ISD::SETCC: + return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); + case ISD::SELECT_CC: + return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), + N0.getOperand(3), NotCC); + } + } + } + + // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) + if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getNode()->hasOneUse() && + isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ + SDValue V = N0.getOperand(0); + V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, + DAG.getConstant(1, V.getValueType())); + AddToWorkList(V.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); + } + + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc + if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + } + } + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants + if (N1C && N1C->isAllOnesValue() && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + } + } + // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) + if (N1C && N0.getOpcode() == ISD::XOR) { + ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (N00C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), + DAG.getConstant(N1C->getAPIntValue() ^ + N00C->getAPIntValue(), VT)); + if (N01C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(N1C->getAPIntValue() ^ + N01C->getAPIntValue(), VT)); + } + // fold (xor x, x) -> 0 + if (N0 == N1) { + if (!VT.isVector()) { + return DAG.getConstant(0, VT); + } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){ + // Produce a vector of zeros. + SDValue El = DAG.getConstant(0, VT.getVectorElementType()); + std::vector<SDValue> Ops(VT.getVectorNumElements(), El); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + } + + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // Simplify the expression using non-local knowledge. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +/// visitShiftByConstant - Handle transforms common to the three shifts, when +/// the shift amount is a constant. +SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { + SDNode *LHS = N->getOperand(0).getNode(); + if (!LHS->hasOneUse()) return SDValue(); + + // We want to pull some binops through shifts, so that we have (and (shift)) + // instead of (shift (and)), likewise for add, or, xor, etc. This sort of + // thing happens with address calculations, so it's important to canonicalize + // it. + bool HighBitSet = false; // Can we transform this if the high bit is set? + + switch (LHS->getOpcode()) { + default: return SDValue(); + case ISD::OR: + case ISD::XOR: + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + case ISD::AND: + HighBitSet = true; // We can only transform sra if the high bit is set. + break; + case ISD::ADD: + if (N->getOpcode() != ISD::SHL) + return SDValue(); // only shl(add) not sr[al](add). + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + } + + // We require the RHS of the binop to be a constant as well. + ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); + if (!BinOpCst) return SDValue(); + + // FIXME: disable this unless the input to the binop is a shift by a constant. + // If it is not a shift, it pessimizes some common cases like: + // + // void foo(int *X, int i) { X[i & 1235] = 1; } + // int bar(int *X, int i) { return X[i & 255]; } + SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); + if ((BinOpLHSVal->getOpcode() != ISD::SHL && + BinOpLHSVal->getOpcode() != ISD::SRA && + BinOpLHSVal->getOpcode() != ISD::SRL) || + !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) + return SDValue(); + + MVT VT = N->getValueType(0); + + // If this is a signed shift right, and the high bit is modified by the + // logical operation, do not perform the transformation. The highBitSet + // boolean indicates the value of the high bit of the constant which would + // cause it to be modified for this operation. + if (N->getOpcode() == ISD::SRA) { + bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); + if (BinOpRHSSignSet != HighBitSet) + return SDValue(); + } + + // Fold the constants, shifting the binop RHS by the shift amount. + SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), + N->getValueType(0), + LHS->getOperand(1), N->getOperand(1)); + + // Create the new shift. + SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(), + VT, LHS->getOperand(0), N->getOperand(1)); + + // Create the new binop. + return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); +} + +SDValue DAGCombiner::visitSHL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getSizeInBits(); + + // fold (shl c1, c2) -> c1<<c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); + // fold (shl 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (shl x, c >= size(x)) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (shl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (shl x, c) is known to be zero, return 0 + if (DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(VT.getSizeInBits()))) + return DAG.getConstant(0, VT); + // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + MVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SHL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 > OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or + // (srl (and x, (shl -1, c1)), (sub c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(~0ULL << c1, VT)); + if (c2 > c1) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c2-c1, N1.getValueType())); + else + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c1-c2, N1.getValueType())); + } + // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) + if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(~0ULL << N1C->getZExtValue(), VT)); + + return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); +} + +SDValue DAGCombiner::visitSRA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + + // fold (sra c1, c2) -> (sra c1, c2) + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); + // fold (sra 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (sra -1, x) -> -1 + if (N0C && N0C->isAllOnesValue()) + return N0; + // fold (sra x, (setge c, size(x))) -> undef + if (N1C && N1C->getZExtValue() >= VT.getSizeInBits()) + return DAG.getUNDEF(VT); + // fold (sra x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports + // sext_inreg. + if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { + unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue(); + MVT EVT = MVT::getIntegerVT(LowBits); + if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + N0.getOperand(0), DAG.getValueType(EVT)); + } + + // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRA) { + if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); + if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1; + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(Sum, N1C->getValueType(0))); + } + } + + // fold (sra (shl X, m), (sub result_size, n)) + // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for + // result_size - n != m. + // If truncate is free for the target sext(shl) is likely to result in better + // code. + if (N0.getOpcode() == ISD::SHL) { + // Get the two constanst of the shifts, CN0 = m, CN = n. + const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (N01C && N1C) { + // Determine what the truncate's result bitsize and type would be. + unsigned VTValSize = VT.getSizeInBits(); + MVT TruncVT = + MVT::getIntegerVT(VTValSize - N1C->getZExtValue()); + // Determine the residual right-shift amount. + signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); + + // If the shift is not a no-op (in which case this should be just a sign + // extend already), the truncated to type is legal, sign_extend is legal + // on that type, and the the truncate to that type is both legal and free, + // perform the transform. + if ((ShiftAmt > 0) && + TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && + TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && + TLI.isTruncateFree(VT, TruncVT)) { + + SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy()); + SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, + N0.getOperand(0), Amt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, + Shift); + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), + N->getValueType(0), Trunc); + } + } + } + + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + MVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), + TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + // Simplify, based on bits shifted out of the LHS. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + + // If the sign bit is known to be zero, switch this to a SRL. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); + + return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); +} + +SDValue DAGCombiner::visitSRL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getSizeInBits(); + + // fold (srl c1, c2) -> c1 >>u c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); + // fold (srl 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (srl x, c >= size(x)) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (srl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (srl x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(OpSizeInBits))) + return DAG.getConstant(0, VT); + + // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 > OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + + // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + // Shifting in all undef bits? + MVT SmallVT = N0.getOperand(0).getValueType(); + if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) + return DAG.getUNDEF(VT); + + SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + N0.getOperand(0), N1); + AddToWorkList(SmallShift.getNode()); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + } + + // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign + // bit, which is unmodified by sra. + if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { + if (N0.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); + } + + // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). + if (N1C && N0.getOpcode() == ISD::CTLZ && + N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); + + // If any of the input bits are KnownOne, then the input couldn't be all + // zeros, thus the result of the srl will always be zero. + if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); + + // If all of the bits input the to ctlz node are known to be zero, then + // the result of the ctlz is "32" and the result of the shift is one. + APInt UnknownBits = ~KnownZero & Mask; + if (UnknownBits == 0) return DAG.getConstant(1, VT); + + // Otherwise, check to see if there is exactly one bit input to the ctlz. + if ((UnknownBits & (UnknownBits - 1)) == 0) { + // Okay, we know that only that the single bit specified by UnknownBits + // could be set on input to the CTLZ node. If this bit is set, the SRL + // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair + // to an SRL/XOR pair, which is likely to simplify more. + unsigned ShAmt = UnknownBits.countTrailingZeros(); + SDValue Op = N0.getOperand(0); + + if (ShAmt) { + Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, + DAG.getConstant(ShAmt, getShiftAmountTy())); + AddToWorkList(Op.getNode()); + } + + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + Op, DAG.getConstant(1, VT)); + } + } + + // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + MVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), + TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + // fold operands of srl based on knowledge that the low bits are not + // demanded. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); +} + +SDValue DAGCombiner::visitCTLZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (ctlz c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTTZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (cttz c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTPOP(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (ctpop c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); + MVT VT = N->getValueType(0); + MVT VT0 = N0.getValueType(); + + // fold (select C, X, X) -> X + if (N1 == N2) + return N1; + // fold (select true, X, Y) -> X + if (N0C && !N0C->isNullValue()) + return N1; + // fold (select false, X, Y) -> Y + if (N0C && N0C->isNullValue()) + return N2; + // fold (select C, 1, X) -> (or C, X) + if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + // fold (select C, 0, 1) -> (xor C, 1) + if (VT.isInteger() && + (VT0 == MVT::i1 || + (VT0.isInteger() && + TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) && + N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { + SDValue XORNode; + if (VT == VT0) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, + N0, DAG.getConstant(1, VT0)); + XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, + N0, DAG.getConstant(1, VT0)); + AddToWorkList(XORNode.getNode()); + if (VT.bitsGT(VT0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); + } + // fold (select C, 0, X) -> (and (not C), X) + if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { + SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + AddToWorkList(NOTNode.getNode()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); + } + // fold (select C, X, 1) -> (or (not C), X) + if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { + SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + AddToWorkList(NOTNode.getNode()); + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); + } + // fold (select C, X, 0) -> (and C, X) + if (VT == MVT::i1 && N2C && N2C->isNullValue()) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + // fold (select X, X, Y) -> (or X, Y) + // fold (select X, 1, Y) -> (or X, Y) + if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + // fold (select X, Y, X) -> (and X, Y) + // fold (select X, Y, 0) -> (and X, Y) + if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + + // fold selects based on a setcc into other things, such as min/max/abs + if (N0.getOpcode() == ISD::SETCC) { + // FIXME: + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), + N1, N2, N0.getOperand(2)); + return SimplifySelect(N->getDebugLoc(), N0, N1, N2); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + SDValue N4 = N->getOperand(4); + ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); + + // fold select_cc lhs, rhs, x, x, cc -> x + if (N2 == N3) + return N2; + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC, N->getDebugLoc(), false); + if (SCC.getNode()) AddToWorkList(SCC.getNode()); + + if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } + + // Fold to a simpler select_cc + if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N2, N3)) + return SDValue(N, 0); // Don't revisit N. + + // fold select_cc into other things, such as min/max/abs + return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); +} + +SDValue DAGCombiner::visitSETCC(SDNode *N) { + return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast<CondCodeSDNode>(N->getOperand(2))->get(), + N->getDebugLoc()); +} + +// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: +// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" +// transformation. Returns true if extension are possible and the above +// mentioned transformation is profitable. +static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, + unsigned ExtOpc, + SmallVector<SDNode*, 4> &ExtendNodes, + const TargetLowering &TLI) { + bool HasCopyToRegUses = false; + bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); + for (SDNode::use_iterator UI = N0.getNode()->use_begin(), + UE = N0.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User == N) + continue; + if (UI.getUse().getResNo() != N0.getResNo()) + continue; + // FIXME: Only extend SETCC N, N and SETCC N, c for now. + if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); + if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) + // Sign bits will be lost after a zext. + return false; + bool Add = false; + for (unsigned i = 0; i != 2; ++i) { + SDValue UseOp = User->getOperand(i); + if (UseOp == N0) + continue; + if (!isa<ConstantSDNode>(UseOp)) + return false; + Add = true; + } + if (Add) + ExtendNodes.push_back(User); + continue; + } + // If truncates aren't free and there are users we can't + // extend, it isn't worthwhile. + if (!isTruncFree) + return false; + // Remember if this value is live-out. + if (User->getOpcode() == ISD::CopyToReg) + HasCopyToRegUses = true; + } + + if (HasCopyToRegUses) { + bool BothLiveOut = false; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { + BothLiveOut = true; + break; + } + } + if (BothLiveOut) + // Both unextended and extended values are live out. There had better be + // good a reason for the transformation. + return ExtendNodes.size(); + } + return true; +} + +SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (sext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); + + // fold (sext (sext x)) -> (sext x) + // fold (sext (aext x)) -> (sext x) + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); + + if (N0.getOpcode() == ISD::TRUNCATE) { + // fold (sext (truncate (load x))) -> (sext (smaller load x)) + // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + if (NarrowLoad.getNode() != N0.getNode()) + CombineTo(N0.getNode(), NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // See if the value being truncated is already sign extended. If so, just + // eliminate the trunc/sext pair. + SDValue Op = N0.getOperand(0); + unsigned OpBits = Op.getValueType().getSizeInBits(); + unsigned MidBits = N0.getValueType().getSizeInBits(); + unsigned DestBits = VT.getSizeInBits(); + unsigned NumSignBits = DAG.ComputeNumSignBits(Op); + + if (OpBits == DestBits) { + // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign + // bits, it is already ready. + if (NumSignBits > DestBits-MidBits) + return Op; + } else if (OpBits < DestBits) { + // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign + // bits, just sext from i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); + } else { + // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign + // bits, just truncate to i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + } + + // fold (sext (truncate x)) -> (sextinreg x). + if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, + N0.getValueType())) { + if (Op.getValueType().bitsLT(VT)) + Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); + else if (Op.getValueType().bitsGT(VT)) + Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, + DAG.getValueType(N0.getValueType())); + } + } + + // fold (sext (load x)) -> (sext (truncate (sextload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (sext (sextload x)) -> (sext (truncate (sextload x))) + // fold (sext ( extload x)) -> (sext (truncate (sextload x))) + if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT EVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + if (N0.getOpcode() == ISD::SETCC) { + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + } + + // fold (sext x) -> (zext x) if the sign bit is known zero. + if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && + DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (zext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + // fold (zext (zext x)) -> (zext x) + // fold (zext (aext x)) -> (zext x) + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); + + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + if (NarrowLoad.getNode() != N0.getNode()) + CombineTo(N0.getNode(), NarrowLoad); + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + } + } + + // fold (zext (truncate x)) -> (and x, mask) + if (N0.getOpcode() == ISD::TRUNCATE && + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + SDValue Op = N0.getOperand(0); + if (Op.getValueType().bitsLT(VT)) { + Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); + } else if (Op.getValueType().bitsGT(VT)) { + Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + } + return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType()); + } + + // Fold (zext (and (trunc x), cst)) -> (and x, cst), + // if either of the casts is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + } + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + X, DAG.getConstant(Mask, VT)); + } + + // fold (zext (load x)) -> (zext (truncate (zextload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (zext (zextload x)) -> (zext (truncate (zextload x))) + // fold (zext ( extload x)) -> (zext (truncate (zextload x))) + if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT EVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), + ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + if (N0.getOpcode() == ISD::SETCC) { + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (aext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + if (N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); + + // fold (aext (truncate (load x))) -> (aext (smaller load x)) + // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + if (NarrowLoad.getNode() != N0.getNode()) + CombineTo(N0.getNode(), NarrowLoad); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + } + } + + // fold (aext (truncate x)) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue TruncOp = N0.getOperand(0); + if (TruncOp.getValueType() == VT) + return TruncOp; // x iff x size == zext size. + if (TruncOp.getValueType().bitsGT(VT)) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); + } + + // Fold (aext (and (trunc x), cst)) -> (and x, cst) + // if the trunc is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType())) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); + } + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + X, DAG.getConstant(Mask, VT)); + } + + // fold (aext (load x)) -> (aext (truncate (extload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (aext (zextload x)) -> (aext (truncate (zextload x))) + // fold (aext (sextload x)) -> (aext (truncate (sextload x))) + // fold (aext ( extload x)) -> (aext (truncate (extload x))) + if (N0.getOpcode() == ISD::LOAD && + !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT EVT = LN0->getMemoryVT(); + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + if (N0.getOpcode() == ISD::SETCC) { + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) + return SCC; + } + + return SDValue(); +} + +/// GetDemandedBits - See if the specified operand can be simplified with the +/// knowledge that only the bits specified by Mask are used. If so, return the +/// simpler operand, otherwise return a null SDValue. +SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { + switch (V.getOpcode()) { + default: break; + case ISD::OR: + case ISD::XOR: + // If the LHS or RHS don't contribute bits to the or, drop them. + if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) + return V.getOperand(1); + if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) + return V.getOperand(0); + break; + case ISD::SRL: + // Only look at single-use SRLs. + if (!V.getNode()->hasOneUse()) + break; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + // See if we can recursively simplify the LHS. + unsigned Amt = RHSC->getZExtValue(); + + // Watch out for shift count overflow though. + if (Amt >= Mask.getBitWidth()) break; + APInt NewMask = Mask << Amt; + SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); + if (SimplifyLHS.getNode()) + return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), + SimplifyLHS, V.getOperand(1)); + } + } + return SDValue(); +} + +/// ReduceLoadWidth - If the result of a wider load is shifted to right of N +/// bits and then truncated to a narrower type and where N is a multiple +/// of number of bits of the narrower type, transform it to a narrower load +/// from address + N / num of bits of new type. If the result is to be +/// extended, also fold the extension to form a extending load. +SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { + unsigned Opc = N->getOpcode(); + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + MVT EVT = VT; + + // This transformation isn't valid for vector loads. + if (VT.isVector()) + return SDValue(); + + // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then + // extended to VT. + if (Opc == ISD::SIGN_EXTEND_INREG) { + ExtType = ISD::SEXTLOAD; + EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) + return SDValue(); + } + + unsigned EVTBits = EVT.getSizeInBits(); + unsigned ShAmt = 0; + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + ShAmt = N01->getZExtValue(); + // Is the shift amount a multiple of size of VT? + if ((ShAmt & (EVTBits-1)) == 0) { + N0 = N0.getOperand(0); + if (N0.getValueType().getSizeInBits() <= EVTBits) + return SDValue(); + } + } + } + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (isa<LoadSDNode>(N0) && N0.hasOneUse() && EVT.isRound() && + cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits && + // Do not change the width of a volatile load. + !cast<LoadSDNode>(N0)->isVolatile()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT PtrType = N0.getOperand(1).getValueType(); + + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) { + unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); + unsigned EVTStoreBits = EVT.getStoreSizeInBits(); + ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; + } + + uint64_t PtrOff = ShAmt / 8; + unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); + SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), + PtrType, LN0->getBasePtr(), + DAG.getConstant(PtrOff, PtrType)); + AddToWorkList(NewPtr.getNode()); + + SDValue Load = (ExtType == ISD::NON_EXTLOAD) + ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, + LN0->isVolatile(), NewAlign) + : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, + EVT, LN0->isVolatile(), NewAlign); + + // Replace the old load's chain with the new load's chain. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), + &DeadNodes); + + // Return the new loaded value. + return Load; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + MVT VT = N->getValueType(0); + MVT EVT = cast<VTSDNode>(N1)->getVT(); + unsigned VTBits = VT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + // fold (sext_in_reg c1) -> c1 + if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); + + // If the input is already sign extended, just drop the extension. + if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1) + return N0; + + // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + N0.getOperand(0), N1); + } + + // fold (sext_in_reg (sext x)) -> (sext x) + // fold (sext_in_reg (aext x)) -> (sext x) + // if x is small enough. + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getValueType().getSizeInBits() < EVTBits) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); + } + + // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. + if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) + return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); + + // fold operands of sext_in_reg based on knowledge that the top bits are not + // demanded. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (sext_in_reg (load x)) -> (smaller sextload x) + // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + + // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) + // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. + // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. + if (N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) { + // We can turn this into an SRA iff the input to the SRL is already sign + // extended enough. + unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); + if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits) + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1)); + } + } + + // fold (sext_inreg (extload x)) -> (sextload x) + if (ISD::isEXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use + if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse() && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + return SDValue(); +} + +SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // noop truncate + if (N0.getValueType() == N->getValueType(0)) + return N0; + // fold (truncate c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); + // fold (truncate (truncate x)) -> (truncate x) + if (N0.getOpcode() == ISD::TRUNCATE) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + // fold (truncate (ext x)) -> (ext x) or (truncate x) or x + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND|| + N0.getOpcode() == ISD::ANY_EXTEND) { + if (N0.getOperand(0).getValueType().bitsLT(VT)) + // if the source is smaller than the dest, we still need an extend + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0)); + else if (N0.getOperand(0).getValueType().bitsGT(VT)) + // if the source is larger than the dest, than we just need the truncate + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + else + // if the source and dest are the same type, we can drop both the extend + // and the truncate + return N0.getOperand(0); + } + + // See if we can simplify the input to this truncate through knowledge that + // only the low bits are being used. For example "trunc (or (shl x, 8), y)" + // -> trunc y + SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits())); + if (Shorter.getNode()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + + // fold (truncate (load x)) -> (smaller load x) + // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) + return ReduceLoadWidth(N); +} + +static SDNode *getBuildPairElt(SDNode *N, unsigned i) { + SDValue Elt = N->getOperand(i); + if (Elt.getOpcode() != ISD::MERGE_VALUES) + return Elt.getNode(); + return Elt.getOperand(Elt.getResNo()).getNode(); +} + +/// CombineConsecutiveLoads - build_pair (load, load) -> load +/// if load locations are consecutive. +SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { + assert(N->getOpcode() == ISD::BUILD_PAIR); + + SDNode *LD1 = getBuildPairElt(N, 0); + if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + return SDValue(); + MVT LD1VT = LD1->getValueType(0); + SDNode *LD2 = getBuildPairElt(N, 1); + const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + + if (ISD::isNON_EXTLoad(LD2) && + LD2->hasOneUse() && + // If both are volatile this would reduce the number of volatile loads. + // If one is volatile it might be ok, but play conservative and bail out. + !cast<LoadSDNode>(LD1)->isVolatile() && + !cast<LoadSDNode>(LD2)->isVolatile() && + TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { + LoadSDNode *LD = cast<LoadSDNode>(LD1); + unsigned Align = LD->getAlignment(); + unsigned NewAlign = TLI.getTargetData()-> + getABITypeAlignment(VT.getTypeForMVT()); + + if (NewAlign <= Align && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) + return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + false, Align); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // If the input is a BUILD_VECTOR with all constant elements, fold this now. + // Only do this before legalize, since afterward the target may be depending + // on the bitconvert. + // First check to see if this is all constant. + if (!LegalTypes && + N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && + VT.isVector()) { + bool isSimple = true; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) + if (N0.getOperand(i).getOpcode() != ISD::UNDEF && + N0.getOperand(i).getOpcode() != ISD::Constant && + N0.getOperand(i).getOpcode() != ISD::ConstantFP) { + isSimple = false; + break; + } + + MVT DestEltVT = N->getValueType(0).getVectorElementType(); + assert(!DestEltVT.isVector() && + "Element type of vector ValueType must not be vector!"); + if (isSimple) + return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT); + } + + // If the input is a constant, let getNode fold it. + if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { + SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); + if (Res.getNode() != N) return Res; + } + + // (conv (conv x, t1), t2) -> (conv x, t2) + if (N0.getOpcode() == ISD::BIT_CONVERT) + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, + N0.getOperand(0)); + + // fold (conv (load x)) -> (load (conv*)x) + // If the resultant load doesn't need a higher alignment than the original! + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + // Do not change the width of a volatile load. + !cast<LoadSDNode>(N0)->isVolatile() && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + unsigned Align = TLI.getTargetData()-> + getABITypeAlignment(VT.getTypeForMVT()); + unsigned OrigAlign = LN0->getAlignment(); + + if (Align <= OrigAlign) { + SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), + LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), OrigAlign); + AddToWorkList(N); + CombineTo(N0.getNode(), + DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + N0.getValueType(), Load), + Load.getValue(1)); + return Load; + } + } + + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) + // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // This often reduces constant pool loads. + if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && + N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { + SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT, + N0.getOperand(0)); + AddToWorkList(NewConv.getNode()); + + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + if (N0.getOpcode() == ISD::FNEG) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + NewConv, DAG.getConstant(SignBit, VT)); + assert(N0.getOpcode() == ISD::FABS); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + NewConv, DAG.getConstant(~SignBit, VT)); + } + + // fold (bitconvert (fcopysign cst, x)) -> + // (or (and (bitconvert x), sign), (and cst, (not sign))) + // Note that we don't handle (copysign x, cst) because this can always be + // folded to an fneg or fabs. + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(0)) && + VT.isInteger() && !VT.isVector()) { + unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); + MVT IntXVT = MVT::getIntegerVT(OrigXWidth); + if (TLI.isTypeLegal(IntXVT) || !LegalTypes) { + SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + IntXVT, N0.getOperand(1)); + AddToWorkList(X.getNode()); + + // If X has a different width than the result/lhs, sext it or truncate it. + unsigned VTWidth = VT.getSizeInBits(); + if (OrigXWidth < VTWidth) { + X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); + AddToWorkList(X.getNode()); + } else if (OrigXWidth > VTWidth) { + // To get the sign bit in the right place, we have to shift it right + // before truncating. + X = DAG.getNode(ISD::SRL, X.getDebugLoc(), + X.getValueType(), X, + DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); + AddToWorkList(X.getNode()); + X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + AddToWorkList(X.getNode()); + } + + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, + X, DAG.getConstant(SignBit, VT)); + AddToWorkList(X.getNode()); + + SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + VT, N0.getOperand(0)); + Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, + Cst, DAG.getConstant(~SignBit, VT)); + AddToWorkList(Cst.getNode()); + + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); + } + } + + // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. + if (N0.getOpcode() == ISD::BUILD_PAIR) { + SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); + if (CombineLD.getNode()) + return CombineLD; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { + MVT VT = N->getValueType(0); + return CombineConsecutiveLoads(N, VT); +} + +/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector +/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the +/// destination element value type. +SDValue DAGCombiner:: +ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { + MVT SrcEltVT = BV->getValueType(0).getVectorElementType(); + + // If this is already the right type, we're done. + if (SrcEltVT == DstEltVT) return SDValue(BV, 0); + + unsigned SrcBitSize = SrcEltVT.getSizeInBits(); + unsigned DstBitSize = DstEltVT.getSizeInBits(); + + // If this is a conversion of N elements of one type to N elements of another + // type, convert each element. This handles FP<->INT cases. + if (SrcBitSize == DstBitSize) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + SDValue Op = BV->getOperand(i); + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (Op.getValueType() != SrcEltVT) + Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); + Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), + DstEltVT, Op)); + AddToWorkList(Ops.back().getNode()); + } + MVT VT = MVT::getVectorVT(DstEltVT, + BV->getValueType(0).getVectorNumElements()); + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + + // Otherwise, we're growing or shrinking the elements. To avoid having to + // handle annoying details of growing/shrinking FP values, we convert them to + // int first. + if (SrcEltVT.isFloatingPoint()) { + // Convert the input float vector to a int vector where the elements are the + // same sizes. + assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); + MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits()); + BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); + SrcEltVT = IntVT; + } + + // Now we know the input is an integer vector. If the output is a FP type, + // convert to integer first, then to FP of the right size. + if (DstEltVT.isFloatingPoint()) { + assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); + MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits()); + SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); + + // Next, convert to FP elements of the same size. + return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT); + } + + // Okay, we know the src/dst types are both integers of differing types. + // Handling growing first. + assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); + if (SrcBitSize < DstBitSize) { + unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; + i += NumInputsPerOutput) { + bool isLE = TLI.isLittleEndian(); + APInt NewBits = APInt(DstBitSize, 0); + bool EltIsUndef = true; + for (unsigned j = 0; j != NumInputsPerOutput; ++j) { + // Shift the previously computed bits over. + NewBits <<= SrcBitSize; + SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); + if (Op.getOpcode() == ISD::UNDEF) continue; + EltIsUndef = false; + + NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). + zextOrTrunc(SrcBitSize).zext(DstBitSize)); + } + + if (EltIsUndef) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); + } + + MVT VT = MVT::getVectorVT(DstEltVT, Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + + // Finally, this must be the case where we are shrinking elements: each input + // turns into multiple outputs. + bool isS2V = ISD::isScalarToVector(BV); + unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; + MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands()); + SmallVector<SDValue, 8> Ops; + + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { + for (unsigned j = 0; j != NumOutputsPerInput; ++j) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + continue; + } + + APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))-> + getAPIntValue()).zextOrTrunc(SrcBitSize); + + for (unsigned j = 0; j != NumOutputsPerInput; ++j) { + APInt ThisVal = APInt(OpVal).trunc(DstBitSize); + Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); + if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal) + // Simply turn this into a SCALAR_TO_VECTOR of the new type. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + Ops[0]); + OpVal = OpVal.lshr(DstBitSize); + } + + // For big endian targets, swap the order of the pieces of each element. + if (TLI.isBigEndian()) + std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); + } + + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); +} + +SDValue DAGCombiner::visitFADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fadd c1, c2) -> (fadd c1, c2) + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); + // fold (fadd A, 0) -> A + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + // fold (fadd A, (fneg B)) -> (fsub A, B) + if (isNegatibleForFree(N1, LegalOperations) == 2) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations)); + // fold (fadd (fneg A), B) -> (fsub B, A) + if (isNegatibleForFree(N0, LegalOperations) == 2) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, + GetNegatedExpression(N0, DAG, LegalOperations)); + + // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) + if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && + N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + N0.getOperand(1), N1)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fsub c1, c2) -> c1-c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); + // fold (fsub A, 0) -> A + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + // fold (fsub 0, B) -> -B + if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations)) + return GetNegatedExpression(N1, DAG, LegalOperations); + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); + } + // fold (fsub A, (fneg B)) -> (fadd A, B) + if (isNegatibleForFree(N1, LegalOperations)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fmul c1, c2) -> c1*c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); + // fold (fmul A, 0) -> 0 + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N1; + // fold (fmul X, 2.0) -> (fadd X, X) + if (N1CFP && N1CFP->isExactlyValue(+2.0)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); + // fold (fmul X, (fneg 1.0)) -> (fneg X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); + + // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations)); + } + } + + // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && + N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), N1)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fdiv c1, c2) -> c1/c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + + + // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold (frem c1, c2) -> fmod(c1,c2) + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); + + return SDValue(); +} + +SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); + + if (N1CFP) { + const APFloat& V = N1CFP->getValueAPF(); + // copysign(x, c1) -> fabs(x) iff ispos(c1) + // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) + if (!V.isNegative()) { + if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + } else { + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); + } + } + + // copysign(fabs(x), y) -> copysign(x, y) + // copysign(fneg(x), y) -> copysign(x, y) + // copysign(copysign(x,z), y) -> copysign(x, y) + if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || + N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0.getOperand(0), N1); + + // copysign(x, abs(y)) -> abs(x) + if (N1.getOpcode() == ISD::FABS) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + + // copysign(x, copysign(y,z)) -> copysign(x, z) + if (N1.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0, N1.getOperand(1)); + + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0, N1.getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + MVT VT = N->getValueType(0); + MVT OpVT = N0.getValueType(); + + // fold (sint_to_fp c1) -> c1fp + if (N0C && OpVT != MVT::ppcf128) + return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + + // If the input is a legal type, and SINT_TO_FP is not legal on this target, + // but UINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to UINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + MVT VT = N->getValueType(0); + MVT OpVT = N0.getValueType(); + + // fold (uint_to_fp c1) -> c1fp + if (N0C && OpVT != MVT::ppcf128) + return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + + // If the input is a legal type, and UINT_TO_FP is not legal on this target, + // but SINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to SINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT VT = N->getValueType(0); + + // fold (fp_to_sint c1fp) -> c1 + if (N0CFP) + return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT VT = N->getValueType(0); + + // fold (fp_to_uint c1fp) -> c1 + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT VT = N->getValueType(0); + + // fold (fp_round c1fp) -> c1fp + if (N0CFP && N0.getValueType() != MVT::ppcf128) + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); + + // fold (fp_round (fp_extend x)) -> x + if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) + return N0.getOperand(0); + + // fold (fp_round (fp_round x)) -> (fp_round x) + if (N0.getOpcode() == ISD::FP_ROUND) { + // This is a value preserving truncation if both round's are. + bool IsTrunc = N->getConstantOperandVal(1) == 1 && + N0.getNode()->getConstantOperandVal(1) == 1; + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getIntPtrConstant(IsTrunc)); + } + + // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { + SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, + N0.getOperand(0), N1); + AddToWorkList(Tmp.getNode()); + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + Tmp, N0.getOperand(1)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + + // fold (fp_round_inreg c1fp) -> c1fp + if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) { + SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT VT = N->getValueType(0); + + // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. + if (N->hasOneUse() && + N->use_begin()->getOpcode() == ISD::FP_ROUND) + return SDValue(); + + // fold (fp_extend c1fp) -> c1fp + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); + + // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the + // value of X. + if (N0.getOpcode() == ISD::FP_ROUND + && N0.getNode()->getConstantOperandVal(1) == 1) { + SDValue In = N0.getOperand(0); + if (In.getValueType() == VT) return In; + if (VT.bitsLT(In.getValueType())) + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, + In, N0.getOperand(1)); + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); + } + + // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), + N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFNEG(SDNode *N) { + SDValue N0 = N->getOperand(0); + + if (isNegatibleForFree(N0, LegalOperations)) + return GetNegatedExpression(N0, DAG, LegalOperations); + + // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading + // constant pool values. + if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger() && + !N0.getOperand(0).getValueType().isVector()) { + SDValue Int = N0.getOperand(0); + MVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, + DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + AddToWorkList(Int.getNode()); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Int); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFABS(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT VT = N->getValueType(0); + + // fold (fabs c1) -> fabs(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + // fold (fabs (fabs x)) -> (fabs x) + if (N0.getOpcode() == ISD::FABS) + return N->getOperand(0); + // fold (fabs (fneg x)) -> (fabs x) + // fold (fabs (fcopysign x, y)) -> (fabs x) + if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); + + // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading + // constant pool values. + if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger() && + !N0.getOperand(0).getValueType().isVector()) { + SDValue Int = N0.getOperand(0); + MVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, + DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + AddToWorkList(Int.getNode()); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Int); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBRCOND(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + + // never taken branch, fold to chain + if (N1C && N1C->isNullValue()) + return Chain; + // unconditional branch + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2); + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal + // on the target. + if (N1.getOpcode() == ISD::SETCC && + TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { + return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + Chain, N1.getOperand(2), + N1.getOperand(0), N1.getOperand(1), N2); + } + + if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) { + // Match this pattern so that we can generate simpler code: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // This applies only when the AND constant value has one bit set and the + // SRL constant is equal to the log2 of the AND constant. The back-end is + // smart enough to convert the result into a TEST/JMP sequence. + SDValue Op0 = N1.getOperand(0); + SDValue Op1 = N1.getOperand(1); + + if (Op0.getOpcode() == ISD::AND && + Op0.hasOneUse() && + Op1.getOpcode() == ISD::Constant) { + SDValue AndOp0 = Op0.getOperand(0); + SDValue AndOp1 = Op0.getOperand(1); + + if (AndOp1.getOpcode() == ISD::Constant) { + const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); + + if (AndConst.isPowerOf2() && + cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { + SDValue SetCC = + DAG.getSetCC(N->getDebugLoc(), + TLI.getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, Op0.getValueType()), + ISD::SETNE); + + // Replace the uses of SRL with SETCC + DAG.ReplaceAllUsesOfValueWith(N1, SetCC); + removeFromWorkList(N1.getNode()); + DAG.DeleteNode(N1.getNode()); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + } + } + } + } + + return SDValue(); +} + +// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. +// +SDValue DAGCombiner::visitBR_CC(SDNode *N) { + CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); + SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + + // Use SimplifySetCC to simplify SETCC's. + SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), + CondLHS, CondRHS, CC->get(), N->getDebugLoc(), + false); + if (Simp.getNode()) AddToWorkList(Simp.getNode()); + + ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode()); + + // fold br_cc true, dest -> br dest (unconditional branch) + if (SCCC && !SCCC->isNullValue()) + return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, + N->getOperand(0), N->getOperand(4)); + // fold br_cc false, dest -> unconditional fall through + if (SCCC && SCCC->isNullValue()) + return N->getOperand(0); + + // fold to a simpler setcc + if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + N->getOperand(0), Simp.getOperand(2), + Simp.getOperand(0), Simp.getOperand(1), + N->getOperand(4)); + + return SDValue(); +} + +/// CombineToPreIndexedLoadStore - Try turning a load / store into a +/// pre-indexed load / store when the base pointer is an add or subtract +/// and it has other uses besides the load / store. After the +/// transformation, the new indexed load / store has effectively folded +/// the add / subtract in and all of its other uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (!LegalOperations) + return false; + + bool isLoad = true; + SDValue Ptr; + MVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail + // out. There is no reason to make this a preinc/predec. + if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || + Ptr.getNode()->hasOneUse()) + return false; + + // Ask the target to do addressing mode selection. + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) + return false; + // Don't create a indexed load / store with zero offset. + if (isa<ConstantSDNode>(Offset) && + cast<ConstantSDNode>(Offset)->isNullValue()) + return false; + + // Try turning it into a pre-indexed load / store except when: + // 1) The new base ptr is a frame index. + // 2) If N is a store and the new base ptr is either the same as or is a + // predecessor of the value being stored. + // 3) Another use of old base ptr is a predecessor of N. If ptr is folded + // that would create a cycle. + // 4) All uses are load / store ops that use it as old base ptr. + + // Check #1. Preinc'ing a frame index would require copying the stack pointer + // (plus the implicit offset) to a register to preinc anyway. + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + return false; + + // Check #2. + if (!isLoad) { + SDValue Val = cast<StoreSDNode>(N)->getValue(); + if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) + return false; + } + + // Now check for #3 and #4. + bool RealUse = false; + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), + E = Ptr.getNode()->use_end(); I != E; ++I) { + SDNode *Use = *I; + if (Use == N) + continue; + if (Use->isPredecessorOf(N)) + return false; + + if (!((Use->getOpcode() == ISD::LOAD && + cast<LoadSDNode>(Use)->getBasePtr() == Ptr) || + (Use->getOpcode() == ISD::STORE && + cast<StoreSDNode>(Use)->getBasePtr() == Ptr))) + RealUse = true; + } + + if (!RealUse) + return false; + + SDValue Result; + if (isLoad) + Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + else + Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + ++PreIndexedNodes; + ++NodesCombined; + DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); + DOUT << '\n'; + WorkListRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), + &DeadNodes); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), + &DeadNodes); + } + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + // Replace the uses of Ptr with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0), + &DeadNodes); + removeFromWorkList(Ptr.getNode()); + DAG.DeleteNode(Ptr.getNode()); + + return true; +} + +/// CombineToPostIndexedLoadStore - Try to combine a load / store with a +/// add / sub of the base pointer node into a post-indexed load / store. +/// The transformation folded the add / subtract into the new indexed +/// load / store effectively and all of its uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { + if (!LegalOperations) + return false; + + bool isLoad = true; + SDValue Ptr; + MVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + if (Ptr.getNode()->hasOneUse()) + return false; + + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), + E = Ptr.getNode()->use_end(); I != E; ++I) { + SDNode *Op = *I; + if (Op == N || + (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) + continue; + + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { + if (Ptr == Offset) + std::swap(BasePtr, Offset); + if (Ptr != BasePtr) + continue; + // Don't create a indexed load / store with zero offset. + if (isa<ConstantSDNode>(Offset) && + cast<ConstantSDNode>(Offset)->isNullValue()) + continue; + + // Try turning it into a post-indexed load / store except when + // 1) All uses are load / store ops that use it as base ptr. + // 2) Op must be independent of N, i.e. Op is neither a predecessor + // nor a successor of N. Otherwise, if Op is folded that would + // create a cycle. + + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + continue; + + // Check for #1. + bool TryNext = false; + for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), + EE = BasePtr.getNode()->use_end(); II != EE; ++II) { + SDNode *Use = *II; + if (Use == Ptr.getNode()) + continue; + + // If all the uses are load / store addresses, then don't do the + // transformation. + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + bool RealUse = false; + for (SDNode::use_iterator III = Use->use_begin(), + EEE = Use->use_end(); III != EEE; ++III) { + SDNode *UseUse = *III; + if (!((UseUse->getOpcode() == ISD::LOAD && + cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) || + (UseUse->getOpcode() == ISD::STORE && + cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use))) + RealUse = true; + } + + if (!RealUse) { + TryNext = true; + break; + } + } + } + + if (TryNext) + continue; + + // Check for #2 + if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { + SDValue Result = isLoad + ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM) + : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + ++PostIndexedNodes; + ++NodesCombined; + DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); + DOUT << '\n'; + WorkListRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), + &DeadNodes); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), + &DeadNodes); + } + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + // Replace the uses of Use with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), + Result.getValue(isLoad ? 1 : 0), + &DeadNodes); + removeFromWorkList(Op); + DAG.DeleteNode(Op); + return true; + } + } + } + + return false; +} + +/// InferAlignment - If we can infer some alignment information from this +/// pointer, return it. +static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) { + // If this is a direct reference to a stack slot, use information about the + // stack slot's alignment. + int FrameIdx = 1 << 31; + int64_t FrameOffset = 0; + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { + FrameIdx = FI->getIndex(); + } else if (Ptr.getOpcode() == ISD::ADD && + isa<ConstantSDNode>(Ptr.getOperand(1)) && + isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + FrameOffset = Ptr.getConstantOperandVal(1); + } + + if (FrameIdx != (1 << 31)) { + // FIXME: Handle FI+CST. + const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo(); + if (MFI.isFixedObjectIndex(FrameIdx)) { + int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; + + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. + unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = MinAlign(ObjectOffset, StackAlign); + + // Finally, the frame object itself may have a known alignment. Factor + // the alignment + offset into a new alignment. For example, if we know + // the FI is 8 byte aligned, but the pointer is 4 off, we really have a + // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte + // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. + unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), + FrameOffset); + return std::max(Align, FIInfoAlign); + } + } + + return 0; +} + +SDValue DAGCombiner::visitLOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + // Try to infer better alignment information than the load already has. + if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { + if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (Align > LD->getAlignment()) + return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + LD->getValueType(0), + Chain, Ptr, LD->getSrcValue(), + LD->getSrcValueOffset(), LD->getMemoryVT(), + LD->isVolatile(), Align); + } + } + + // If load is not volatile and there are no uses of the loaded value (and + // the updated indexed value in case of indexed loads), change uses of the + // chain value into uses of the chain input (i.e. delete the dead load). + if (!LD->isVolatile()) { + if (N->getValueType(1) == MVT::Other) { + // Unindexed loads. + if (N->hasNUsesOfValue(0, 0)) { + // It's not safe to use the two value CombineTo variant here. e.g. + // v1, chain2 = load chain1, loc + // v2, chain3 = load chain2, loc + // v3 = add v2, c + // Now we replace use of chain2 with chain1. This makes the second load + // isomorphic to the one we are deleting, and thus makes this load live. + DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG)); + DOUT << "\n"; + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); + + if (N->use_empty()) { + removeFromWorkList(N); + DAG.DeleteNode(N); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } else { + // Indexed loads. + assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); + if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { + SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG)); + DOUT << " and 2 other values\n"; + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), + DAG.getUNDEF(N->getValueType(1)), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes); + removeFromWorkList(N); + DAG.DeleteNode(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + // If this load is directly stored, replace the load value with the stored + // value. + // TODO: Handle store large -> read small portion. + // TODO: Handle TRUNCSTORE/LOADEXT + if (LD->getExtensionType() == ISD::NON_EXTLOAD && + !LD->isVolatile()) { + if (ISD::isNON_TRUNCStore(Chain.getNode())) { + StoreSDNode *PrevST = cast<StoreSDNode>(Chain); + if (PrevST->getBasePtr() == Ptr && + PrevST->getValue().getValueType() == N->getValueType(0)) + return CombineTo(N, Chain.getOperand(1), Chain); + } + } + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + SDValue ReplLoad; + + // Replace the chain to void dependency. + if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), + BetterChain, Ptr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + } else { + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), + LD->getValueType(0), + BetterChain, Ptr, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->getMemoryVT(), + LD->isVolatile(), + LD->getAlignment()); + } + + // Create token factor to keep old chain connected. + SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, Chain, ReplLoad.getValue(1)); + + // Replace uses with load result and token factor. Don't add users + // to work list. + return CombineTo(N, ReplLoad.getValue(0), Token, false); + } + } + + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + return SDValue(); +} + + +/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is +/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some +/// of the loaded bits, try narrowing the load and store if it would end up +/// being a win for performance or code size. +SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + if (ST->isVolatile()) + return SDValue(); + + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + MVT VT = Value.getValueType(); + + if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) + return SDValue(); + + unsigned Opc = Value.getOpcode(); + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || + Value.getOperand(1).getOpcode() != ISD::Constant) + return SDValue(); + + SDValue N0 = Value.getOperand(0); + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LD = cast<LoadSDNode>(N0); + if (LD->getBasePtr() != Ptr) + return SDValue(); + + // Find the type to narrow it the load / op / store to. + SDValue N1 = Value.getOperand(1); + unsigned BitWidth = N1.getValueSizeInBits(); + APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); + if (Opc == ISD::AND) + Imm ^= APInt::getAllOnesValue(BitWidth); + if (Imm == 0 || Imm.isAllOnesValue()) + return SDValue(); + unsigned ShAmt = Imm.countTrailingZeros(); + unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; + unsigned NewBW = NextPowerOf2(MSB - ShAmt); + MVT NewVT = MVT::getIntegerVT(NewBW); + while (NewBW < BitWidth && + !(TLI.isOperationLegalOrCustom(Opc, NewVT) && + TLI.isNarrowingProfitable(VT, NewVT))) { + NewBW = NextPowerOf2(NewBW); + NewVT = MVT::getIntegerVT(NewBW); + } + if (NewBW >= BitWidth) + return SDValue(); + + // If the lsb changed does not start at the type bitwidth boundary, + // start at the previous one. + if (ShAmt % NewBW) + ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; + APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); + if ((Imm & Mask) == Imm) { + APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); + if (Opc == ISD::AND) + NewImm ^= APInt::getAllOnesValue(NewBW); + uint64_t PtrOff = ShAmt / 8; + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) + PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + + unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); + if (NewAlign < + TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT())) + return SDValue(); + + SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), + Ptr.getValueType(), Ptr, + DAG.getConstant(PtrOff, Ptr.getValueType())); + SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), + LD->getChain(), NewPtr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), NewAlign); + SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, + DAG.getConstant(NewImm, NewVT)); + SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), + NewVal, NewPtr, + ST->getSrcValue(), ST->getSrcValueOffset(), + false, NewAlign); + + AddToWorkList(NewPtr.getNode()); + AddToWorkList(NewLD.getNode()); + AddToWorkList(NewVal.getNode()); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1), + &DeadNodes); + ++OpsNarrowed; + return NewST; + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSTORE(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + + // Try to infer better alignment information than the store already has. + if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { + if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (Align > ST->getAlignment()) + return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), Align); + } + } + + // If this is a store of a bit convert, store the input value if the + // resultant store does not need a higher alignment than the original. + if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && + ST->isUnindexed()) { + unsigned OrigAlign = ST->getAlignment(); + MVT SVT = Value.getOperand(0).getValueType(); + unsigned Align = TLI.getTargetData()-> + getABITypeAlignment(SVT.getTypeForMVT()); + if (Align <= OrigAlign && + ((!LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) + return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign); + } + + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { + // NOTE: If the original store is volatile, this transform must not increase + // the number of stores. For example, on x86-32 an f64 can be stored in one + // processor operation but an i64 (which is not legal) requires two. So the + // transform should not be done in this case. + if (Value.getOpcode() != ISD::TargetConstantFP) { + SDValue Tmp; + switch (CFP->getValueType(0).getSimpleVT()) { + default: assert(0 && "Unknown FP type"); + case MVT::f80: // We don't do this for these yet. + case MVT::f128: + case MVT::ppcf128: + break; + case MVT::f32: + if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue(), MVT::i32); + return DAG.getStore(Chain, N->getDebugLoc(), Tmp, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), + ST->getAlignment()); + } + break; + case MVT::f64: + if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + getZExtValue(), MVT::i64); + return DAG.getStore(Chain, N->getDebugLoc(), Tmp, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), + ST->getAlignment()); + } else if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + // Many FP stores are not made apparent until after legalize, e.g. for + // argument passing. Since this is so common, custom legalize the + // 64-bit integer store into two 32-bit stores. + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + + SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), + isVolatile, ST->getAlignment()); + Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, + DAG.getConstant(4, Ptr.getValueType())); + SVOffset += 4; + Alignment = MinAlign(Alignment, 4U); + SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, + Ptr, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + St0, St1); + } + + break; + } + } + } + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + // Replace the chain to avoid dependency. + SDValue ReplStore; + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, + ST->getSrcValue(),ST->getSrcValueOffset(), + ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); + } else { + ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->isVolatile(), ST->getAlignment()); + } + + // Create token to keep both nodes around. + SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, Chain, ReplStore); + + // Don't add users to work list. + return CombineTo(N, Token, false); + } + } + + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + // FIXME: is there such a thing as a truncating indexed store? + if (ST->isTruncatingStore() && ST->isUnindexed() && + Value.getValueType().isInteger()) { + // See if we can simplify the input to this truncstore with knowledge that + // only the low bits are being used. For example: + // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + SDValue Shorter = + GetDemandedBits(Value, + APInt::getLowBitsSet(Value.getValueSizeInBits(), + ST->getMemoryVT().getSizeInBits())); + AddToWorkList(Value.getNode()); + if (Shorter.getNode()) + return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); + + // Otherwise, see if we can simplify the operation with + // SimplifyDemandedBits, which only works if the value has a single use. + if (SimplifyDemandedBits(Value, + APInt::getLowBitsSet( + Value.getValueSizeInBits(), + ST->getMemoryVT().getSizeInBits()))) + return SDValue(N, 0); + } + + // If this is a load followed by a store to the same location, then the store + // is dead/noop. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { + if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && + ST->isUnindexed() && !ST->isVolatile() && + // There can't be any side effects between the load and store, such as + // a call or store. + Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { + // The store is dead, remove it. + return Chain; + } + } + + // If this is an FP_ROUND or TRUNC followed by a store, fold this into a + // truncating store. We can do this even if this is already a truncstore. + if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) + && Value.getNode()->hasOneUse() && ST->isUnindexed() && + TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), + ST->getMemoryVT())) { + return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); + } + + return ReduceLoadOpStoreWidth(N); +} + +SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { + SDValue InVec = N->getOperand(0); + SDValue InVal = N->getOperand(1); + SDValue EltNo = N->getOperand(2); + + // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new + // vector with the inserted element. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(), + InVec.getNode()->op_end()); + if (Elt < Ops.size()) + Ops[Elt] = InVal; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + InVec.getValueType(), &Ops[0], Ops.size()); + } + // If the invec is an UNDEF and if EltNo is a constant, create a new + // BUILD_VECTOR with undef elements and the inserted element. + if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && + isa<ConstantSDNode>(EltNo)) { + MVT VT = InVec.getValueType(); + MVT EVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT)); + + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + if (Elt < Ops.size()) + Ops[Elt] = InVal; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + InVec.getValueType(), &Ops[0], Ops.size()); + } + return SDValue(); +} + +SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { + // (vextract (scalar_to_vector val, 0) -> val + SDValue InVec = N->getOperand(0); + + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + MVT EltVT = InVec.getValueType().getVectorElementType(); + SDValue InOp = InVec.getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp); + return InOp; + } + + // Perform only after legalization to ensure build_vector / vector_shuffle + // optimizations have already been done. + if (!LegalOperations) return SDValue(); + + // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) + SDValue EltNo = N->getOperand(1); + + if (isa<ConstantSDNode>(EltNo)) { + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + bool NewLoad = false; + bool BCNumEltsChanged = false; + MVT VT = InVec.getValueType(); + MVT EVT = VT.getVectorElementType(); + MVT LVT = EVT; + + if (InVec.getOpcode() == ISD::BIT_CONVERT) { + MVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + EVT = BCVT.getVectorElementType(); + NewLoad = true; + } + + LoadSDNode *LN0 = NULL; + const ShuffleVectorSDNode *SVN = NULL; + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast<LoadSDNode>(InVec); + } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && + InVec.getOperand(0).getValueType() == EVT && + ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + LN0 = cast<LoadSDNode>(InVec.getOperand(0)); + } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { + // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) + // => + // (load $addr+1*size) + + // If the bit convert changed the number of elements, it is unsafe + // to examine the mask. + if (BCNumEltsChanged) + return SDValue(); + + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt); + InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); + + if (InVec.getOpcode() == ISD::BIT_CONVERT) + InVec = InVec.getOperand(0); + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast<LoadSDNode>(InVec); + Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems; + } + } + + if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile()) + return SDValue(); + + unsigned Align = LN0->getAlignment(); + if (NewLoad) { + // Check the resultant load doesn't need a higher alignment than the + // original load. + unsigned NewAlign = + TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT()); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) + return SDValue(); + + Align = NewAlign; + } + + SDValue NewPtr = LN0->getBasePtr(); + if (Elt) { + unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; + MVT PtrType = NewPtr.getValueType(); + if (TLI.isBigEndian()) + PtrOff = VT.getSizeInBits() / 8 - PtrOff; + NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, + DAG.getConstant(PtrOff, PtrType)); + } + + return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), Align); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + unsigned NumInScalars = N->getNumOperands(); + MVT VT = N->getValueType(0); + MVT EltType = VT.getVectorElementType(); + + // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT + // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from + // at most two distinct vectors, turn this into a shuffle node. + SDValue VecIn1, VecIn2; + for (unsigned i = 0; i != NumInScalars; ++i) { + // Ignore undef inputs. + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + // If this input is something other than a EXTRACT_VECTOR_ELT with a + // constant index, bail out. + if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + + // If the input vector type disagrees with the result of the build_vector, + // we can't make a shuffle. + SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); + if (ExtractedFromVec.getValueType() != VT) { + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + + // Otherwise, remember this. We allow up to two distinct input vectors. + if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) + continue; + + if (VecIn1.getNode() == 0) { + VecIn1 = ExtractedFromVec; + } else if (VecIn2.getNode() == 0) { + VecIn2 = ExtractedFromVec; + } else { + // Too many inputs. + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + } + + // If everything is good, we can make a shuffle operation. + if (VecIn1.getNode()) { + SmallVector<int, 8> Mask; + for (unsigned i = 0; i != NumInScalars; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // If extracting from the first vector, just use the index directly. + SDValue Extract = N->getOperand(i); + SDValue ExtVal = Extract.getOperand(1); + if (Extract.getOperand(0) == VecIn1) { + unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); + if (ExtIndex > VT.getVectorNumElements()) + return SDValue(); + + Mask.push_back(ExtIndex); + continue; + } + + // Otherwise, use InIdx + VecSize + unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); + Mask.push_back(Idx+NumInScalars); + } + + // Add count and size info. + if (!TLI.isTypeLegal(VT) && LegalTypes) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + SDValue Ops[2]; + Ops[0] = VecIn1; + Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { + // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of + // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector + // inputs come from at most two distinct vectors, turn this into a shuffle + // node. + + // If we only have one input vector, we don't need to do any concatenation. + if (N->getNumOperands() == 1) + return N->getOperand(0); + + return SDValue(); +} + +SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { + return SDValue(); + + MVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + assert(N0.getValueType().getVectorNumElements() == NumElts && + "Vector shuffle must be normalized in DAG"); + + // FIXME: implement canonicalizations from DAG.getVectorShuffle() + + // If it is a splat, check if the argument vector is a build_vector with + // all scalar elements the same. + if (cast<ShuffleVectorSDNode>(N)->isSplat()) { + SDNode *V = N0.getNode(); + + + // If this is a bit convert that changes the element type of the vector but + // not the number of vector elements, look through it. Be careful not to + // look though conversions that change things like v4f32 to v2f64. + if (V->getOpcode() == ISD::BIT_CONVERT) { + SDValue ConvInput = V->getOperand(0); + if (ConvInput.getValueType().isVector() && + ConvInput.getValueType().getVectorNumElements() == NumElts) + V = ConvInput.getNode(); + } + + if (V->getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElems = V->getNumOperands(); + unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex(); + if (NumElems > BaseIdx) { + SDValue Base; + bool AllSame = true; + for (unsigned i = 0; i != NumElems; ++i) { + if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + Base = V->getOperand(i); + break; + } + } + // Splat of <u, u, u, u>, return <u, u, u, u> + if (!Base.getNode()) + return N0; + for (unsigned i = 0; i != NumElems; ++i) { + if (V->getOperand(i) != Base) { + AllSame = false; + break; + } + } + // Splat of <x, x, x, x>, return <x, x, x, x> + if (AllSame) + return N0; + } + } + } + return SDValue(); +} + +/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform +/// an AND to a vector_shuffle with the destination vector and a zero vector. +/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> +/// vector_shuffle V, Zero, <0, 4, 2, 4> +SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (N->getOpcode() == ISD::AND) { + if (RHS.getOpcode() == ISD::BIT_CONVERT) + RHS = RHS.getOperand(0); + if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<int, 8> Indices; + unsigned NumElts = RHS.getNumOperands(); + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Elt = RHS.getOperand(i); + if (!isa<ConstantSDNode>(Elt)) + return SDValue(); + else if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) + Indices.push_back(i); + else if (cast<ConstantSDNode>(Elt)->isNullValue()) + Indices.push_back(NumElts); + else + return SDValue(); + } + + // Let's see if the target supports this vector_shuffle. + MVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + MVT EVT = RVT.getVectorElementType(); + SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, EVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + RVT, &ZeroOps[0], ZeroOps.size()); + LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); + } + } + + return SDValue(); +} + +/// SimplifyVBinOp - Visit a binary vector operation, like ADD. +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { + // After legalize, the target may be depending on adds and other + // binary ops to provide legal ways to construct constants or other + // things. Simplifying them may result in a loss of legality. + if (LegalOperations) return SDValue(); + + MVT VT = N->getValueType(0); + assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); + + MVT EltType = VT.getVectorElementType(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Shuffle = XformToShuffleWithZero(N); + if (Shuffle.getNode()) return Shuffle; + + // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold + // this operation. + if (LHS.getOpcode() == ISD::BUILD_VECTOR && + RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + SDValue LHSOp = LHS.getOperand(i); + SDValue RHSOp = RHS.getOperand(i); + // If these two elements can't be folded, bail out. + if ((LHSOp.getOpcode() != ISD::UNDEF && + LHSOp.getOpcode() != ISD::Constant && + LHSOp.getOpcode() != ISD::ConstantFP) || + (RHSOp.getOpcode() != ISD::UNDEF && + RHSOp.getOpcode() != ISD::Constant && + RHSOp.getOpcode() != ISD::ConstantFP)) + break; + + // Can't fold divide by zero. + if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || + N->getOpcode() == ISD::FDIV) { + if ((RHSOp.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || + (RHSOp.getOpcode() == ISD::ConstantFP && + cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) + break; + } + + Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), + EltType, LHSOp, RHSOp)); + AddToWorkList(Ops.back().getNode()); + assert((Ops.back().getOpcode() == ISD::UNDEF || + Ops.back().getOpcode() == ISD::Constant || + Ops.back().getOpcode() == ISD::ConstantFP) && + "Scalar binop didn't fold!"); + } + + if (Ops.size() == LHS.getNumOperands()) { + MVT VT = LHS.getValueType(); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, + SDValue N1, SDValue N2){ + assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); + + SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + + // If we got a simplified select_cc node back from SimplifySelectCC, then + // break it down into a new SETCC node, and a new SELECT node, and then return + // the SELECT node, since we were called with a SELECT node. + if (SCC.getNode()) { + // Check to see if we got a select_cc back (to turn into setcc/select). + // Otherwise, just return whatever node we got back, like fabs. + if (SCC.getOpcode() == ISD::SELECT_CC) { + SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), + N0.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), + SCC.getOperand(4)); + AddToWorkList(SETCC.getNode()); + return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), + SCC.getOperand(2), SCC.getOperand(3), SETCC); + } + + return SCC; + } + return SDValue(); +} + +/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS +/// are the two values being selected between, see if we can simplify the +/// select. Callers of this should assume that TheSelect is deleted if this +/// returns true. As such, they should return the appropriate thing (e.g. the +/// node) back to the top-level of the DAG combiner loop to avoid it being +/// looked at. +bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, + SDValue RHS) { + + // If this is a select from two identical things, try to pull the operation + // through the select. + if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){ + // If this is a load and the token chain is identical, replace the select + // of two loads with a load through a select of the address to load from. + // This triggers in things like "select bool X, 10.0, 123.0" after the FP + // constants have been dropped into the constant pool. + if (LHS.getOpcode() == ISD::LOAD && + // Do not let this transformation reduce the number of volatile loads. + !cast<LoadSDNode>(LHS)->isVolatile() && + !cast<LoadSDNode>(RHS)->isVolatile() && + // Token chains must be identical. + LHS.getOperand(0) == RHS.getOperand(0)) { + LoadSDNode *LLD = cast<LoadSDNode>(LHS); + LoadSDNode *RLD = cast<LoadSDNode>(RHS); + + // If this is an EXTLOAD, the VT's must match. + if (LLD->getMemoryVT() == RLD->getMemoryVT()) { + // FIXME: this conflates two src values, discarding one. This is not + // the right thing to do, but nothing uses srcvalues now. When they do, + // turn SrcValue into a list of locations. + SDValue Addr; + if (TheSelect->getOpcode() == ISD::SELECT) { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) { + Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } + } else { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) { + Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } + } + + if (Addr.getNode()) { + SDValue Load; + if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { + Load = DAG.getLoad(TheSelect->getValueType(0), + TheSelect->getDebugLoc(), + LLD->getChain(), + Addr,LLD->getSrcValue(), + LLD->getSrcValueOffset(), + LLD->isVolatile(), + LLD->getAlignment()); + } else { + Load = DAG.getExtLoad(LLD->getExtensionType(), + TheSelect->getDebugLoc(), + TheSelect->getValueType(0), + LLD->getChain(), Addr, LLD->getSrcValue(), + LLD->getSrcValueOffset(), + LLD->getMemoryVT(), + LLD->isVolatile(), + LLD->getAlignment()); + } + + // Users of the select now use the result of the load. + CombineTo(TheSelect, Load); + + // Users of the old loads now use the new load's chain. We know the + // old-load value is dead now. + CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); + CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); + return true; + } + } + } + } + + return false; +} + +/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 +/// where 'cond' is the comparison specified by CC. +SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, + ISD::CondCode CC, bool NotExtCompare) { + // (x ? y : y) -> y. + if (N2 == N3) return N2; + + MVT VT = N2.getValueType(); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC, DL, false); + if (SCC.getNode()) AddToWorkList(SCC.getNode()); + ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); + + // fold select_cc true, x, y -> x + if (SCCC && !SCCC->isNullValue()) + return N2; + // fold select_cc false, x, y -> y + if (SCCC && SCCC->isNullValue()) + return N3; + + // Check to see if we can simplify the select into an fabs node + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { + // Allow either -0.0 or 0.0 + if (CFP->getValueAPF().isZero()) { + // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs + if ((CC == ISD::SETGE || CC == ISD::SETGT) && + N0 == N2 && N3.getOpcode() == ISD::FNEG && + N2 == N3.getOperand(0)) + return DAG.getNode(ISD::FABS, DL, VT, N0); + + // select (setl[te] X, +/-0.0), fneg(X), X -> fabs + if ((CC == ISD::SETLT || CC == ISD::SETLE) && + N0 == N3 && N2.getOpcode() == ISD::FNEG && + N2.getOperand(0) == N3) + return DAG.getNode(ISD::FABS, DL, VT, N3); + } + } + + // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" + // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 + // in it. This is a win when the constant is not otherwise available because + // it replaces two constant pool loads with one. We only do this if the FP + // type is known to be legal, because if it isn't, then we are before legalize + // types an we want the other legalization to happen first (e.g. to avoid + // messing with soft float) and if the ConstantFP is not legal, because if + // it is legal, we may not need to store the FP constant in a constant pool. + if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) + if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { + if (TLI.isTypeLegal(N2.getValueType()) && + (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != + TargetLowering::Legal) && + // If both constants have multiple uses, then we won't need to do an + // extra load, they are likely around in registers for other users. + (TV->hasOneUse() || FV->hasOneUse())) { + Constant *Elts[] = { + const_cast<ConstantFP*>(FV->getConstantFPValue()), + const_cast<ConstantFP*>(TV->getConstantFPValue()) + }; + const Type *FPTy = Elts[0]->getType(); + const TargetData &TD = *TLI.getTargetData(); + + // Create a ConstantArray of the two constants. + Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2); + SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), + TD.getPrefTypeAlignment(FPTy)); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + + // Get the offsets to the 0 and 1 element of the array so that we can + // select between them. + SDValue Zero = DAG.getIntPtrConstant(0); + unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); + SDValue One = DAG.getIntPtrConstant(EltSize); + + SDValue Cond = DAG.getSetCC(DL, + TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), + Cond, One, Zero); + CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, + CstOffset); + return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, false, + Alignment); + + } + } + + // Check to see if we can perform the "gzip trick", transforming + // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) + if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && + N0.getValueType().isInteger() && + N2.getValueType().isInteger() && + (N1C->isNullValue() || // (a < 0) ? b : 0 + (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 + MVT XType = N0.getValueType(); + MVT AType = N2.getValueType(); + if (XType.bitsGE(AType)) { + // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a + // single-bit constant. + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { + unsigned ShCtV = N2C->getAPIntValue().logBase2(); + ShCtV = XType.getSizeInBits()-ShCtV-1; + SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy()); + SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), + XType, N0, ShCt); + AddToWorkList(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorkList(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), + XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + AddToWorkList(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorkList(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + } + + // fold select C, 16, 0 -> shl C, 4 + if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && + TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) { + + // If the caller doesn't want us to simplify this into a zext of a compare, + // don't do it. + if (NotExtCompare && N2C->getAPIntValue() == 1) + return SDValue(); + + // Get a SetCC of the condition + // FIXME: Should probably make sure that setcc is legal if we ever have a + // target where it isn't. + SDValue Temp, SCC; + // cast from setcc result type to select result type + if (LegalTypes) { + SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + if (N2.getValueType().bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } else { + SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } + + AddToWorkList(SCC.getNode()); + AddToWorkList(Temp.getNode()); + + if (N2C->getAPIntValue() == 1) + return Temp; + + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy())); + } + + // Check to see if this is the equivalent of setcc + // FIXME: Turn all of these into setcc if setcc if setcc is legal + // otherwise, go ahead with the folds. + if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { + MVT XType = N0.getValueType(); + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { + SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); + if (Res.getValueType() != VT) + Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); + return Res; + } + + // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) + if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && + (!LegalOperations || + TLI.isOperationLegal(ISD::CTLZ, XType))) { + SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); + return DAG.getNode(ISD::SRL, DL, XType, Ctlz, + DAG.getConstant(Log2_32(XType.getSizeInBits()), + getShiftAmountTy())); + } + // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) + if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { + SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), + XType, DAG.getConstant(0, XType), N0); + SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); + return DAG.getNode(ISD::SRL, DL, XType, + DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + } + // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) + if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { + SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); + } + } + + // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && + N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && + N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { + MVT XType = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType, + N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); + } + // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && + N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { + if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) { + MVT XType = N0.getValueType(); + if (SubC->isNullValue() && XType.isInteger()) { + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + XType, N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); + } + } + } + + return SDValue(); +} + +/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. +SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0, + SDValue N1, ISD::CondCode Cond, + DebugLoc DL, bool foldBooleans) { + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level == Unrestricted, false, this); + return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue DAGCombiner::BuildSDIV(SDNode *N) { + std::vector<SDNode*> Built; + SDValue S = TLI.BuildSDIV(N, DAG, &Built); + + for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue DAGCombiner::BuildUDIV(SDNode *N) { + std::vector<SDNode*> Built; + SDValue S = TLI.BuildUDIV(N, DAG, &Built); + + for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// FindBaseOffset - Return true if base is known not to alias with anything +/// but itself. Provides base object and offset as results. +static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) { + // Assume it is a primitive operation. + Base = Ptr; Offset = 0; + + // If it's an adding a simple constant then integrate the offset. + if (Base.getOpcode() == ISD::ADD) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { + Base = Base.getOperand(0); + Offset += C->getZExtValue(); + } + } + + // If it's any of the following then it can't alias with anything but itself. + return isa<FrameIndexSDNode>(Base) || + isa<ConstantPoolSDNode>(Base) || + isa<GlobalAddressSDNode>(Base); +} + +/// isAlias - Return true if there is any possibility that the two addresses +/// overlap. +bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + SDValue Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2) const { + // If they are the same then they must be aliases. + if (Ptr1 == Ptr2) return true; + + // Gather base node and offset information. + SDValue Base1, Base2; + int64_t Offset1, Offset2; + bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1); + bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2); + + // If they have a same base address then... + if (Base1 == Base2) + // Check to see if the addresses overlap. + return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + + // If we know both bases then they can't alias. + if (KnownBase1 && KnownBase2) return false; + + if (CombinerGlobalAA) { + // Use alias analysis information. + int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); + int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; + int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; + AliasAnalysis::AliasResult AAResult = + AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2); + if (AAResult == AliasAnalysis::NoAlias) + return false; + } + + // Otherwise we have to assume they alias. + return true; +} + +/// FindAliasInfo - Extracts the relevant alias information from the memory +/// node. Returns true if the operand was a load. +bool DAGCombiner::FindAliasInfo(SDNode *N, + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, int &SrcValueOffset) const { + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + Ptr = LD->getBasePtr(); + Size = LD->getMemoryVT().getSizeInBits() >> 3; + SrcValue = LD->getSrcValue(); + SrcValueOffset = LD->getSrcValueOffset(); + return true; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + Ptr = ST->getBasePtr(); + Size = ST->getMemoryVT().getSizeInBits() >> 3; + SrcValue = ST->getSrcValue(); + SrcValueOffset = ST->getSrcValueOffset(); + } else { + assert(0 && "FindAliasInfo expected a memory operand"); + } + + return false; +} + +/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, +/// looking for aliasing nodes and adding them to the Aliases vector. +void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVector<SDValue, 8> &Aliases) { + SmallVector<SDValue, 8> Chains; // List of chains to visit. + std::set<SDNode *> Visited; // Visited node set. + + // Get alias information for node. + SDValue Ptr; + int64_t Size = 0; + const Value *SrcValue = 0; + int SrcValueOffset = 0; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset); + + // Starting off. + Chains.push_back(OriginalChain); + + // Look at each chain and determine if it is an alias. If so, add it to the + // aliases list. If not, then continue up the chain looking for the next + // candidate. + while (!Chains.empty()) { + SDValue Chain = Chains.back(); + Chains.pop_back(); + + // Don't bother if we've been before. + if (Visited.find(Chain.getNode()) != Visited.end()) continue; + Visited.insert(Chain.getNode()); + + switch (Chain.getOpcode()) { + case ISD::EntryToken: + // Entry token is ideal chain operand, but handled in FindBetterChain. + break; + + case ISD::LOAD: + case ISD::STORE: { + // Get alias information for Chain. + SDValue OpPtr; + int64_t OpSize = 0; + const Value *OpSrcValue = 0; + int OpSrcValueOffset = 0; + bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, + OpSrcValue, OpSrcValueOffset); + + // If chain is alias then stop here. + if (!(IsLoad && IsOpLoad) && + isAlias(Ptr, Size, SrcValue, SrcValueOffset, + OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) { + Aliases.push_back(Chain); + } else { + // Look further up the chain. + Chains.push_back(Chain.getOperand(0)); + // Clean up old chain. + AddToWorkList(Chain.getNode()); + } + break; + } + + case ISD::TokenFactor: + // We have to check each of the operands of the token factor, so we queue + // then up. Adding the operands to the queue (stack) in reverse order + // maintains the original order and increases the likelihood that getNode + // will find a matching token factor (CSE.) + for (unsigned n = Chain.getNumOperands(); n;) + Chains.push_back(Chain.getOperand(--n)); + // Eliminate the token factor if we can. + AddToWorkList(Chain.getNode()); + break; + + default: + // For all other instructions we will just have to take what we can get. + Aliases.push_back(Chain); + break; + } + } +} + +/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking +/// for a better chain (aliasing node.) +SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { + SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. + + // Accumulate all the aliases to this node. + GatherAllAliases(N, OldChain, Aliases); + + if (Aliases.size() == 0) { + // If no operands then chain to entry token. + return DAG.getEntryNode(); + } else if (Aliases.size() == 1) { + // If a single operand then chain to it. We don't need to revisit it. + return Aliases[0]; + } + + // Construct a custom tailored token factor. + SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + &Aliases[0], Aliases.size()); + + // Make sure the old chain gets cleaned up. + if (NewChain != OldChain) AddToWorkList(OldChain.getNode()); + + return NewChain; +} + +// SelectionDAG::Combine - This is the entry point for the file. +// +void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, + CodeGenOpt::Level OptLevel) { + /// run - This is the main entry point to this class. + /// + DAGCombiner(*this, AA, OptLevel).Run(Level); +} diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp new file mode 100644 index 0000000..6becff3 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -0,0 +1,1033 @@ +///===-- FastISel.cpp - Implementation of the FastISel class --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the FastISel class. +// +// "Fast" instruction selection is designed to emit very poor code quickly. +// Also, it is not designed to be able to do much lowering, so most illegal +// types (e.g. i64 on 32-bit targets) and operations are not supported. It is +// also not intended to be able to do much optimization, except in a few cases +// where doing optimizations reduces overall compile time. For example, folding +// constants into immediate fields is often done, because it's cheap and it +// reduces the number of instructions later phases have to examine. +// +// "Fast" instruction selection is able to fail gracefully and transfer +// control to the SelectionDAG selector for operations that it doesn't +// support. In many cases, this allows us to avoid duplicating a lot of +// the complicated lowering logic that SelectionDAG currently has. +// +// The intended use for "fast" instruction selection is "-O0" mode +// compilation, where the quality of the generated code is irrelevant when +// weighed against the speed at which the code can be generated. Also, +// at -O0, the LLVM optimizers are not running, and this makes the +// compile time of codegen a much higher portion of the overall compile +// time. Despite its limitations, "fast" instruction selection is able to +// handle enough code on its own to provide noticeable overall speedups +// in -O0 compiles. +// +// Basic operations are supported in a target-independent way, by reading +// the same instruction descriptions that the SelectionDAG selector reads, +// and identifying simple arithmetic operations that can be directly selected +// from simple operators. More complicated operations currently require +// target-specific code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/DebugLoc.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "SelectionDAGBuild.h" +using namespace llvm; + +unsigned FastISel::getRegForValue(Value *V) { + MVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); + // Don't handle non-simple values in FastISel. + if (!RealVT.isSimple()) + return 0; + + // Ignore illegal types. We must do this before looking up the value + // in ValueMap because Arguments are given virtual registers regardless + // of whether FastISel can handle them. + MVT::SimpleValueType VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT)) { + // Promote MVT::i1 to a legal type though, because it's common and easy. + if (VT == MVT::i1) + VT = TLI.getTypeToTransformTo(VT).getSimpleVT(); + else + return 0; + } + + // Look up the value to see if we already have a register for it. We + // cache values defined by Instructions across blocks, and other values + // only locally. This is because Instructions already have the SSA + // def-dominatess-use requirement enforced. + if (ValueMap.count(V)) + return ValueMap[V]; + unsigned Reg = LocalValueMap[V]; + if (Reg != 0) + return Reg; + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getValue().getActiveBits() <= 64) + Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + } else if (isa<AllocaInst>(V)) { + Reg = TargetMaterializeAlloca(cast<AllocaInst>(V)); + } else if (isa<ConstantPointerNull>(V)) { + // Translate this as an integer zero so that it can be + // local-CSE'd with actual integer zeros. + Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType())); + } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + + if (!Reg) { + const APFloat &Flt = CF->getValueAPF(); + MVT IntVT = TLI.getPointerTy(); + + uint64_t x[2]; + uint32_t IntBitWidth = IntVT.getSizeInBits(); + bool isExact; + (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); + if (isExact) { + APInt IntVal(IntBitWidth, 2, x); + + unsigned IntegerReg = getRegForValue(ConstantInt::get(IntVal)); + if (IntegerReg != 0) + Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg); + } + } + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (!SelectOperator(CE, CE->getOpcode())) return 0; + Reg = LocalValueMap[CE]; + } else if (isa<UndefValue>(V)) { + Reg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(MBB, DL, TII.get(TargetInstrInfo::IMPLICIT_DEF), Reg); + } + + // If target-independent code couldn't handle the value, give target-specific + // code a try. + if (!Reg && isa<Constant>(V)) + Reg = TargetMaterializeConstant(cast<Constant>(V)); + + // Don't cache constant materializations in the general ValueMap. + // To do so would require tracking what uses they dominate. + if (Reg != 0) + LocalValueMap[V] = Reg; + return Reg; +} + +unsigned FastISel::lookUpRegForValue(Value *V) { + // Look up the value to see if we already have a register for it. We + // cache values defined by Instructions across blocks, and other values + // only locally. This is because Instructions already have the SSA + // def-dominatess-use requirement enforced. + if (ValueMap.count(V)) + return ValueMap[V]; + return LocalValueMap[V]; +} + +/// UpdateValueMap - Update the value map to include the new mapping for this +/// instruction, or insert an extra copy to get the result in a previous +/// determined register. +/// NOTE: This is only necessary because we might select a block that uses +/// a value before we select the block that defines the value. It might be +/// possible to fix this by selecting blocks in reverse postorder. +unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) { + if (!isa<Instruction>(I)) { + LocalValueMap[I] = Reg; + return Reg; + } + + unsigned &AssignedReg = ValueMap[I]; + if (AssignedReg == 0) + AssignedReg = Reg; + else if (Reg != AssignedReg) { + const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); + TII.copyRegToReg(*MBB, MBB->end(), AssignedReg, + Reg, RegClass, RegClass); + } + return AssignedReg; +} + +unsigned FastISel::getRegForGEPIndex(Value *Idx) { + unsigned IdxN = getRegForValue(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return 0; + + // If the index is smaller or larger than intptr_t, truncate or extend it. + MVT PtrVT = TLI.getPointerTy(); + MVT IdxVT = MVT::getMVT(Idx->getType(), /*HandleUnknown=*/false); + if (IdxVT.bitsLT(PtrVT)) + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(), + ISD::SIGN_EXTEND, IdxN); + else if (IdxVT.bitsGT(PtrVT)) + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(), + ISD::TRUNCATE, IdxN); + return IdxN; +} + +/// SelectBinaryOp - Select and emit code for a binary operator instruction, +/// which has an opcode which directly corresponds to the given ISD opcode. +/// +bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) { + MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true); + if (VT == MVT::Other || !VT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // We only handle legal types. For example, on x86-32 the instruction + // selector contains all of the 64-bit instructions from x86-64, + // under the assumption that i64 won't be used if the target doesn't + // support it. + if (!TLI.isTypeLegal(VT)) { + // MVT::i1 is special. Allow AND, OR, or XOR because they + // don't require additional zeroing, which makes them easy. + if (VT == MVT::i1 && + (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || + ISDOpcode == ISD::XOR)) + VT = TLI.getTypeToTransformTo(VT); + else + return false; + } + + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (Op0 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // Check if the second operand is a constant and handle it appropriately. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, CI->getZExtValue()); + if (ResultReg != 0) { + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + } + + // Check if the second operand is a constant float. + if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { + unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, CF); + if (ResultReg != 0) { + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + } + + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (Op1 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // Now we have both operands in registers. Emit the instruction. + unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op1); + if (ResultReg == 0) + // Target-specific code wasn't able to find a machine opcode for + // the given ISD opcode and type. Halt "fast" selection and bail. + return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; +} + +bool FastISel::SelectGetElementPtr(User *I) { + unsigned N = getRegForValue(I->getOperand(0)); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + const Type *Ty = I->getOperand(0)->getType(); + MVT::SimpleValueType VT = TLI.getPointerTy().getSimpleVT(); + for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end(); + OI != E; ++OI) { + Value *Idx = *OI; + if (const StructType *StTy = dyn_cast<StructType>(Ty)) { + unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); + // FIXME: This can be optimized by combining the add with a + // subsequent one. + N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + Ty = StTy->getElementType(Field); + } else { + Ty = cast<SequentialType>(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->getZExtValue() == 0) continue; + uint64_t Offs = + TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + continue; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = TD.getTypeAllocSize(Ty); + unsigned IdxN = getRegForGEPIndex(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + if (ElementSize != 1) { + IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + } + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, N); + return true; +} + +bool FastISel::SelectCall(User *I) { + Function *F = cast<CallInst>(I)->getCalledFunction(); + if (!F) return false; + + unsigned IID = F->getIntrinsicID(); + switch (IID) { + default: break; + case Intrinsic::dbg_stoppoint: { + DbgStopPointInst *SPI = cast<DbgStopPointInst>(I); + if (DIDescriptor::ValidDebugInfo(SPI->getContext(), CodeGenOpt::None)) { + DICompileUnit CU(cast<GlobalVariable>(SPI->getContext())); + unsigned Line = SPI->getLine(); + unsigned Col = SPI->getColumn(); + unsigned Idx = MF.getOrCreateDebugLocID(CU.getGV(), Line, Col); + setCurDebugLoc(DebugLoc::get(Idx)); + } + return true; + } + case Intrinsic::dbg_region_start: { + DbgRegionStartInst *RSI = cast<DbgRegionStartInst>(I); + if (DIDescriptor::ValidDebugInfo(RSI->getContext(), CodeGenOpt::None) && + DW && DW->ShouldEmitDwarfDebug()) { + unsigned ID = + DW->RecordRegionStart(cast<GlobalVariable>(RSI->getContext())); + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + BuildMI(MBB, DL, II).addImm(ID); + } + return true; + } + case Intrinsic::dbg_region_end: { + DbgRegionEndInst *REI = cast<DbgRegionEndInst>(I); + if (DIDescriptor::ValidDebugInfo(REI->getContext(), CodeGenOpt::None) && + DW && DW->ShouldEmitDwarfDebug()) { + unsigned ID = 0; + DISubprogram Subprogram(cast<GlobalVariable>(REI->getContext())); + if (!Subprogram.isNull() && !Subprogram.describes(MF.getFunction())) { + // This is end of an inlined function. + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + ID = DW->RecordInlinedFnEnd(Subprogram); + if (ID) + // Returned ID is 0 if this is unbalanced "end of inlined + // scope". This could happen if optimizer eats dbg intrinsics + // or "beginning of inlined scope" is not recoginized due to + // missing location info. In such cases, do ignore this region.end. + BuildMI(MBB, DL, II).addImm(ID); + } else { + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + ID = DW->RecordRegionEnd(cast<GlobalVariable>(REI->getContext())); + BuildMI(MBB, DL, II).addImm(ID); + } + } + return true; + } + case Intrinsic::dbg_func_start: { + DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I); + Value *SP = FSI->getSubprogram(); + if (!DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::None)) + return true; + + // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what + // (most?) gdb expects. + DebugLoc PrevLoc = DL; + DISubprogram Subprogram(cast<GlobalVariable>(SP)); + DICompileUnit CompileUnit = Subprogram.getCompileUnit(); + + if (!Subprogram.describes(MF.getFunction())) { + // This is a beginning of an inlined function. + + // If llvm.dbg.func.start is seen in a new block before any + // llvm.dbg.stoppoint intrinsic then the location info is unknown. + // FIXME : Why DebugLoc is reset at the beginning of each block ? + if (PrevLoc.isUnknown()) + return true; + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + setCurDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID( + CompileUnit.getGV(), Line, 0))); + + if (DW && DW->ShouldEmitDwarfDebug()) { + DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); + unsigned LabelID = DW->RecordInlinedFnStart(Subprogram, + DICompileUnit(PrevLocTpl.CompileUnit), + PrevLocTpl.Line, + PrevLocTpl.Col); + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + BuildMI(MBB, DL, II).addImm(LabelID); + } + } else { + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + MF.setDefaultDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID( + CompileUnit.getGV(), Line, 0))); + if (DW && DW->ShouldEmitDwarfDebug()) { + // llvm.dbg.func_start also defines beginning of function scope. + DW->RecordRegionStart(cast<GlobalVariable>(FSI->getSubprogram())); + } + } + + return true; + } + case Intrinsic::dbg_declare: { + DbgDeclareInst *DI = cast<DbgDeclareInst>(I); + Value *Variable = DI->getVariable(); + if (DIDescriptor::ValidDebugInfo(Variable, CodeGenOpt::None) && + DW && DW->ShouldEmitDwarfDebug()) { + // Determine the address of the declared object. + Value *Address = DI->getAddress(); + if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + AllocaInst *AI = dyn_cast<AllocaInst>(Address); + // Don't handle byval struct arguments or VLAs, for example. + if (!AI) break; + DenseMap<const AllocaInst*, int>::iterator SI = + StaticAllocaMap.find(AI); + if (SI == StaticAllocaMap.end()) break; // VLAs. + int FI = SI->second; + + // Determine the debug globalvariable. + GlobalValue *GV = cast<GlobalVariable>(Variable); + + // Build the DECLARE instruction. + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE); + MachineInstr *DeclareMI + = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV); + DIVariable DV(cast<GlobalVariable>(GV)); + if (!DV.isNull()) { + // This is a local variable + DW->RecordVariableScope(DV, DeclareMI); + } + } + return true; + } + case Intrinsic::eh_exception: { + MVT VT = TLI.getValueType(I->getType()); + switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { + default: break; + case TargetLowering::Expand: { + assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); + unsigned Reg = TLI.getExceptionAddressRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + Reg, RC, RC); + assert(InsertedCopy && "Can't copy address registers!"); + InsertedCopy = InsertedCopy; + UpdateValueMap(I, ResultReg); + return true; + } + } + break; + } + case Intrinsic::eh_selector_i32: + case Intrinsic::eh_selector_i64: { + MVT VT = TLI.getValueType(I->getType()); + switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { + default: break; + case TargetLowering::Expand: { + MVT VT = (IID == Intrinsic::eh_selector_i32 ? + MVT::i32 : MVT::i64); + + if (MMI) { + if (MBB->isLandingPad()) + AddCatchInfo(*cast<CallInst>(I), MMI, MBB); + else { +#ifndef NDEBUG + CatchInfoLost.insert(cast<CallInst>(I)); +#endif + // FIXME: Mark exception selector register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionSelectorRegister(); + if (Reg) MBB->addLiveIn(Reg); + } + + unsigned Reg = TLI.getExceptionSelectorRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + Reg, RC, RC); + assert(InsertedCopy && "Can't copy address registers!"); + InsertedCopy = InsertedCopy; + UpdateValueMap(I, ResultReg); + } else { + unsigned ResultReg = + getRegForValue(Constant::getNullValue(I->getType())); + UpdateValueMap(I, ResultReg); + } + return true; + } + } + break; + } + } + return false; +} + +bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) { + MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(I->getType()); + + if (SrcVT == MVT::Other || !SrcVT.isSimple() || + DstVT == MVT::Other || !DstVT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // Check if the destination type is legal. Or as a special case, + // it may be i1 if we're doing a truncate because that's + // easy and somewhat common. + if (!TLI.isTypeLegal(DstVT)) + if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // Check if the source operand is legal. Or as a special case, + // it may be i1 if we're doing zero-extension because that's + // easy and somewhat common. + if (!TLI.isTypeLegal(SrcVT)) + if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND) + // Unhandled type. Halt "fast" selection and bail. + return false; + + unsigned InputReg = getRegForValue(I->getOperand(0)); + if (!InputReg) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // If the operand is i1, arrange for the high bits in the register to be zero. + if (SrcVT == MVT::i1) { + SrcVT = TLI.getTypeToTransformTo(SrcVT); + InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg); + if (!InputReg) + return false; + } + // If the result is i1, truncate to the target's type for i1 first. + if (DstVT == MVT::i1) + DstVT = TLI.getTypeToTransformTo(DstVT); + + unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), + DstVT.getSimpleVT(), + Opcode, + InputReg); + if (!ResultReg) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool FastISel::SelectBitCast(User *I) { + // If the bitcast doesn't change the type, just use the operand value. + if (I->getType() == I->getOperand(0)->getType()) { + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) + return false; + UpdateValueMap(I, Reg); + return true; + } + + // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators. + MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(I->getType()); + + if (SrcVT == MVT::Other || !SrcVT.isSimple() || + DstVT == MVT::Other || !DstVT.isSimple() || + !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT)) + // Unhandled type. Halt "fast" selection and bail. + return false; + + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (Op0 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // First, try to perform the bitcast by inserting a reg-reg copy. + unsigned ResultReg = 0; + if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { + TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); + TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + ResultReg = createResultReg(DstClass); + + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + Op0, DstClass, SrcClass); + if (!InsertedCopy) + ResultReg = 0; + } + + // If the reg-reg copy failed, select a BIT_CONVERT opcode. + if (!ResultReg) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), + ISD::BIT_CONVERT, Op0); + + if (!ResultReg) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool +FastISel::SelectInstruction(Instruction *I) { + return SelectOperator(I, I->getOpcode()); +} + +/// FastEmitBranch - Emit an unconditional branch to the given block, +/// unless it is the immediate (fall-through) successor, and update +/// the CFG. +void +FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { + MachineFunction::iterator NextMBB = + next(MachineFunction::iterator(MBB)); + + if (MBB->isLayoutSuccessor(MSucc)) { + // The unconditional fall-through case, which needs no instructions. + } else { + // The unconditional branch case. + TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>()); + } + MBB->addSuccessor(MSucc); +} + +bool +FastISel::SelectOperator(User *I, unsigned Opcode) { + switch (Opcode) { + case Instruction::Add: { + ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FADD : ISD::ADD; + return SelectBinaryOp(I, Opc); + } + case Instruction::Sub: { + ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FSUB : ISD::SUB; + return SelectBinaryOp(I, Opc); + } + case Instruction::Mul: { + ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FMUL : ISD::MUL; + return SelectBinaryOp(I, Opc); + } + case Instruction::SDiv: + return SelectBinaryOp(I, ISD::SDIV); + case Instruction::UDiv: + return SelectBinaryOp(I, ISD::UDIV); + case Instruction::FDiv: + return SelectBinaryOp(I, ISD::FDIV); + case Instruction::SRem: + return SelectBinaryOp(I, ISD::SREM); + case Instruction::URem: + return SelectBinaryOp(I, ISD::UREM); + case Instruction::FRem: + return SelectBinaryOp(I, ISD::FREM); + case Instruction::Shl: + return SelectBinaryOp(I, ISD::SHL); + case Instruction::LShr: + return SelectBinaryOp(I, ISD::SRL); + case Instruction::AShr: + return SelectBinaryOp(I, ISD::SRA); + case Instruction::And: + return SelectBinaryOp(I, ISD::AND); + case Instruction::Or: + return SelectBinaryOp(I, ISD::OR); + case Instruction::Xor: + return SelectBinaryOp(I, ISD::XOR); + + case Instruction::GetElementPtr: + return SelectGetElementPtr(I); + + case Instruction::Br: { + BranchInst *BI = cast<BranchInst>(I); + + if (BI->isUnconditional()) { + BasicBlock *LLVMSucc = BI->getSuccessor(0); + MachineBasicBlock *MSucc = MBBMap[LLVMSucc]; + FastEmitBranch(MSucc); + return true; + } + + // Conditional branches are not handed yet. + // Halt "fast" selection and bail. + return false; + } + + case Instruction::Unreachable: + // Nothing to emit. + return true; + + case Instruction::PHI: + // PHI nodes are already emitted. + return true; + + case Instruction::Alloca: + // FunctionLowering has the static-sized case covered. + if (StaticAllocaMap.count(cast<AllocaInst>(I))) + return true; + + // Dynamic-sized alloca is not handled yet. + return false; + + case Instruction::Call: + return SelectCall(I); + + case Instruction::BitCast: + return SelectBitCast(I); + + case Instruction::FPToSI: + return SelectCast(I, ISD::FP_TO_SINT); + case Instruction::ZExt: + return SelectCast(I, ISD::ZERO_EXTEND); + case Instruction::SExt: + return SelectCast(I, ISD::SIGN_EXTEND); + case Instruction::Trunc: + return SelectCast(I, ISD::TRUNCATE); + case Instruction::SIToFP: + return SelectCast(I, ISD::SINT_TO_FP); + + case Instruction::IntToPtr: // Deliberate fall-through. + case Instruction::PtrToInt: { + MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(I->getType()); + if (DstVT.bitsGT(SrcVT)) + return SelectCast(I, ISD::ZERO_EXTEND); + if (DstVT.bitsLT(SrcVT)) + return SelectCast(I, ISD::TRUNCATE); + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) return false; + UpdateValueMap(I, Reg); + return true; + } + + default: + // Unhandled instruction. Halt "fast" selection and bail. + return false; + } +} + +FastISel::FastISel(MachineFunction &mf, + MachineModuleInfo *mmi, + DwarfWriter *dw, + DenseMap<const Value *, unsigned> &vm, + DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, + DenseMap<const AllocaInst *, int> &am +#ifndef NDEBUG + , SmallSet<Instruction*, 8> &cil +#endif + ) + : MBB(0), + ValueMap(vm), + MBBMap(bm), + StaticAllocaMap(am), +#ifndef NDEBUG + CatchInfoLost(cil), +#endif + MF(mf), + MMI(mmi), + DW(dw), + MRI(MF.getRegInfo()), + MFI(*MF.getFrameInfo()), + MCP(*MF.getConstantPool()), + TM(MF.getTarget()), + TD(*TM.getTargetData()), + TII(*TM.getInstrInfo()), + TLI(*TM.getTargetLowering()) { +} + +FastISel::~FastISel() {} + +unsigned FastISel::FastEmit_(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType) { + return 0; +} + +unsigned FastISel::FastEmit_r(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/) { + return 0; +} + +unsigned FastISel::FastEmit_rr(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/, + unsigned /*Op0*/) { + return 0; +} + +unsigned FastISel::FastEmit_i(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::FastEmit_f(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::FastEmit_ri(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/, + uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::FastEmit_rf(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/, + ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, + unsigned /*Op0*/, unsigned /*Op1*/, + uint64_t /*Imm*/) { + return 0; +} + +/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries +/// to emit an instruction with an immediate operand using FastEmit_ri. +/// If that fails, it materializes the immediate into a register and try +/// FastEmit_rr instead. +unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode, + unsigned Op0, uint64_t Imm, + MVT::SimpleValueType ImmType) { + // First check if immediate type is legal. If not, we can't use the ri form. + unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm); + if (ResultReg != 0) + return ResultReg; + unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + if (MaterialReg == 0) + return 0; + return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); +} + +/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries +/// to emit an instruction with a floating-point immediate operand using +/// FastEmit_rf. If that fails, it materializes the immediate into a register +/// and try FastEmit_rr instead. +unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode, + unsigned Op0, ConstantFP *FPImm, + MVT::SimpleValueType ImmType) { + // First check if immediate type is legal. If not, we can't use the rf form. + unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm); + if (ResultReg != 0) + return ResultReg; + + // Materialize the constant in a register. + unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm); + if (MaterialReg == 0) { + // If the target doesn't have a way to directly enter a floating-point + // value into a register, use an alternate approach. + // TODO: The current approach only supports floating-point constants + // that can be constructed by conversion from integer values. This should + // be replaced by code that creates a load from a constant-pool entry, + // which will require some target-specific work. + const APFloat &Flt = FPImm->getValueAPF(); + MVT IntVT = TLI.getPointerTy(); + + uint64_t x[2]; + uint32_t IntBitWidth = IntVT.getSizeInBits(); + bool isExact; + (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); + if (!isExact) + return 0; + APInt IntVal(IntBitWidth, 2, x); + + unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(), + ISD::Constant, IntVal.getZExtValue()); + if (IntegerReg == 0) + return 0; + MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, + ISD::SINT_TO_FP, IntegerReg); + if (MaterialReg == 0) + return 0; + } + return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); +} + +unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { + return MRI.createVirtualRegister(RC); +} + +unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass* RC) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + BuildMI(MBB, DL, II, ResultReg); + return ResultReg; +} + +unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0); + else { + BuildMI(MBB, DL, II).addReg(Op0); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, unsigned Op1) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1); + else { + BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm); + else { + BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, ConstantFP *FPImm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm); + else { + BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, unsigned Op1, uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm); + else { + BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addImm(Imm); + else { + BuildMI(MBB, DL, II).addImm(Imm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT, + unsigned Op0, uint32_t Idx) { + const TargetRegisterClass* RC = MRI.getRegClass(Op0); + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + const TargetInstrDesc &II = TII.get(TargetInstrInfo::EXTRACT_SUBREG); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx); + else { + BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op +/// with all but the least significant bit set to zero. +unsigned FastISel::FastEmitZExtFromI1(MVT::SimpleValueType VT, unsigned Op) { + return FastEmit_ri(VT, VT, ISD::AND, Op, 1); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp new file mode 100644 index 0000000..2cd67e6 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -0,0 +1,3091 @@ +//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::Legalize method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <map> +using namespace llvm; + +//===----------------------------------------------------------------------===// +/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and +/// hacks on it until the target machine can handle it. This involves +/// eliminating value sizes the machine cannot handle (promoting small sizes to +/// large sizes or splitting up large values into small values) as well as +/// eliminating operations the machine cannot handle. +/// +/// This code also does a small amount of optimization and recognition of idioms +/// as part of its processing. For example, if a target does not support a +/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this +/// will attempt merge setcc and brc instructions into brcc's. +/// +namespace { +class VISIBILITY_HIDDEN SelectionDAGLegalize { + TargetLowering &TLI; + SelectionDAG &DAG; + CodeGenOpt::Level OptLevel; + + // Libcall insertion helpers. + + /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// legalized. We use this to ensure that calls are properly serialized + /// against each other, including inserted libcalls. + SDValue LastCALLSEQ_END; + + /// IsLegalizingCall - This member is used *only* for purposes of providing + /// helpful assertions that a libcall isn't created while another call is + /// being legalized (which could lead to non-serialized call sequences). + bool IsLegalizingCall; + + enum LegalizeAction { + Legal, // The target natively supports this operation. + Promote, // This operation should be executed in a larger type. + Expand // Try to expand this to other ops, otherwise use a libcall. + }; + + /// ValueTypeActions - This is a bitvector that contains two bits for each + /// value type, where the two bits correspond to the LegalizeAction enum. + /// This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + DenseMap<SDValue, SDValue> LegalizedNodes; + + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + +public: + SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol); + + /// getTypeAction - Return how we should legalize values of this type, either + /// it is already legal or we need to expand it into multiple registers of + /// smaller integer type, or we need to promote it to a larger type. + LegalizeAction getTypeAction(MVT VT) const { + return (LegalizeAction)ValueTypeActions.getTypeAction(VT); + } + + /// isTypeLegal - Return true if this type is legal on this target. + /// + bool isTypeLegal(MVT VT) const { + return getTypeAction(VT) == Legal; + } + + void LegalizeDAG(); + +private: + /// LegalizeOp - We know that the specified value has a legal type. + /// Recursively ensure that the operands have legal types, then return the + /// result. + SDValue LegalizeOp(SDValue O); + + /// PerformInsertVectorEltInMemory - Some target cannot handle a variable + /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it + /// is necessary to spill the vector being inserted into to memory, perform + /// the insert there, and then read the result back. + SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, + SDValue Idx, DebugLoc dl); + SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, + SDValue Idx, DebugLoc dl); + + /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which + /// performs the same shuffe in terms of order or result bytes, but on a type + /// whose vector element type is narrower than the original shuffle type. + /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> + SDValue ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl, + SDValue N1, SDValue N2, + SmallVectorImpl<int> &Mask) const; + + bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet<SDNode*, 32> &NodesLeadingTo); + + void LegalizeSetCCCondCode(MVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + DebugLoc dl); + + SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128); + + SDValue EmitStackConvert(SDValue SrcOp, MVT SlotVT, MVT DestVT, DebugLoc dl); + SDValue ExpandBUILD_VECTOR(SDNode *Node); + SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); + SDValue ExpandDBG_STOPPOINT(SDNode *Node); + void ExpandDYNAMIC_STACKALLOC(SDNode *Node, + SmallVectorImpl<SDValue> &Results); + SDValue ExpandFCOPYSIGN(SDNode *Node); + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, MVT DestVT, + DebugLoc dl); + SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, MVT DestVT, bool isSigned, + DebugLoc dl); + SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, MVT DestVT, bool isSigned, + DebugLoc dl); + + SDValue ExpandBSWAP(SDValue Op, DebugLoc dl); + SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); + + SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + + void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); +}; +} + +/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which +/// performs the same shuffe in terms of order or result bytes, but on a type +/// whose vector element type is narrower than the original shuffle type. +/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> +SDValue +SelectionDAGLegalize::ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl, + SDValue N1, SDValue N2, + SmallVectorImpl<int> &Mask) const { + MVT EltVT = NVT.getVectorElementType(); + unsigned NumMaskElts = VT.getVectorNumElements(); + unsigned NumDestElts = NVT.getVectorNumElements(); + unsigned NumEltsGrowth = NumDestElts / NumMaskElts; + + assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); + + if (NumEltsGrowth == 1) + return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); + + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumMaskElts; ++i) { + int Idx = Mask[i]; + for (unsigned j = 0; j != NumEltsGrowth; ++j) { + if (Idx < 0) + NewMask.push_back(-1); + else + NewMask.push_back(Idx * NumEltsGrowth + j); + } + } + assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); + assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); + return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); +} + +SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, + CodeGenOpt::Level ol) + : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol), + ValueTypeActions(TLI.getValueTypeActions()) { + assert(MVT::LAST_VALUETYPE <= 32 && + "Too many value types for ValueTypeActions to hold!"); +} + +void SelectionDAGLegalize::LegalizeDAG() { + LastCALLSEQ_END = DAG.getEntryNode(); + IsLegalizingCall = false; + + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != next(E); ++I) + LegalizeOp(SDValue(I, 0)); + + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); +} + + +/// FindCallEndFromCallStart - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_END node that terminates the call sequence. +static SDNode *FindCallEndFromCallStart(SDNode *Node) { + if (Node->getOpcode() == ISD::CALLSEQ_END) + return Node; + if (Node->use_empty()) + return 0; // No CallSeqEnd + + // The chain is usually at the end. + SDValue TheChain(Node, Node->getNumValues()-1); + if (TheChain.getValueType() != MVT::Other) { + // Sometimes it's at the beginning. + TheChain = SDValue(Node, 0); + if (TheChain.getValueType() != MVT::Other) { + // Otherwise, hunt for it. + for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) + if (Node->getValueType(i) == MVT::Other) { + TheChain = SDValue(Node, i); + break; + } + + // Otherwise, we walked into a node without a chain. + if (TheChain.getValueType() != MVT::Other) + return 0; + } + } + + for (SDNode::use_iterator UI = Node->use_begin(), + E = Node->use_end(); UI != E; ++UI) { + + // Make sure to only follow users of our token chain. + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) + if (User->getOperand(i) == TheChain) + if (SDNode *Result = FindCallEndFromCallStart(User)) + return Result; + } + return 0; +} + +/// FindCallStartFromCallEnd - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_START node that initiates the call sequence. +static SDNode *FindCallStartFromCallEnd(SDNode *Node) { + assert(Node && "Didn't find callseq_start for a call??"); + if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; + + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Node doesn't have a token chain argument!"); + return FindCallStartFromCallEnd(Node->getOperand(0).getNode()); +} + +/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to +/// see if any uses can reach Dest. If no dest operands can get to dest, +/// legalize them, legalize ourself, and return false, otherwise, return true. +/// +/// Keep track of the nodes we fine that actually do lead to Dest in +/// NodesLeadingTo. This avoids retraversing them exponential number of times. +/// +bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet<SDNode*, 32> &NodesLeadingTo) { + if (N == Dest) return true; // N certainly leads to Dest :) + + // If we've already processed this node and it does lead to Dest, there is no + // need to reprocess it. + if (NodesLeadingTo.count(N)) return true; + + // If the first result of this node has been already legalized, then it cannot + // reach N. + if (LegalizedNodes.count(SDValue(N, 0))) return false; + + // Okay, this node has not already been legalized. Check and legalize all + // operands. If none lead to Dest, then we can legalize this node. + bool OperandsLeadToDest = false; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + OperandsLeadToDest |= // If an operand leads to Dest, so do we. + LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo); + + if (OperandsLeadToDest) { + NodesLeadingTo.insert(N); + return true; + } + + // Okay, this node looks safe, legalize it and return false. + LegalizeOp(SDValue(N, 0)); + return false; +} + +/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or +/// a load from the constant pool. +static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, + SelectionDAG &DAG, const TargetLowering &TLI) { + bool Extend = false; + DebugLoc dl = CFP->getDebugLoc(); + + // If a FP immediate is precise when represented as a float and if the + // target can do an extending load from float to double, we put it into + // the constant pool as a float, even if it's is statically typed as a + // double. This shrinks FP constants and canonicalizes them for targets where + // an FP extending load is the same cost as a normal load (such as on the x87 + // fp stack or PPC FP unit). + MVT VT = CFP->getValueType(0); + ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue()); + if (!UseCP) { + assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); + return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), + (VT == MVT::f64) ? MVT::i64 : MVT::i32); + } + + MVT OrigVT = VT; + MVT SVT = VT; + while (SVT != MVT::f32) { + SVT = (MVT::SimpleValueType)(SVT.getSimpleVT() - 1); + if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) && + // Only do this if the target has a native EXTLOAD instruction from + // smaller type. + TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && + TLI.ShouldShrinkFPConstant(OrigVT)) { + const Type *SType = SVT.getTypeForMVT(); + LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); + VT = SVT; + Extend = true; + } + } + + SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + if (Extend) + return DAG.getExtLoad(ISD::EXTLOAD, dl, + OrigVT, DAG.getEntryNode(), + CPIdx, PseudoSourceValue::getConstantPool(), + 0, VT, false, Alignment); + return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, false, Alignment); +} + +/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. +static +SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI) { + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDValue Val = ST->getValue(); + MVT VT = Val.getValueType(); + int Alignment = ST->getAlignment(); + int SVOffset = ST->getSrcValueOffset(); + DebugLoc dl = ST->getDebugLoc(); + if (ST->getMemoryVT().isFloatingPoint() || + ST->getMemoryVT().isVector()) { + MVT intVT = MVT::getIntegerVT(VT.getSizeInBits()); + if (TLI.isTypeLegal(intVT)) { + // Expand to a bitconvert of the value to the integer type of the + // same size, then a (misaligned) int store. + // FIXME: Does not handle truncating floating point stores! + SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val); + return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(), + SVOffset, ST->isVolatile(), Alignment); + } else { + // Do a (aligned) store to a stack slot, then copy from the stack slot + // to the final destination using (unaligned) integer loads and stores. + MVT StoredVT = ST->getMemoryVT(); + MVT RegVT = + TLI.getRegisterType(MVT::getIntegerVT(StoredVT.getSizeInBits())); + unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + + // Perform the original store, only redirected to the stack slot. + SDValue Store = DAG.getTruncStore(Chain, dl, + Val, StackPtr, NULL, 0, StoredVT); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the stack slot. + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0); + // Store it to the final location. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, + ST->getSrcValue(), SVOffset + Offset, + ST->isVolatile(), + MinAlign(ST->getAlignment(), Offset))); + // Increment the pointers. + Offset += RegBytes; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + } + + // The last store may be partial. Do a truncating store. On big-endian + // machines this requires an extending load from the stack slot to ensure + // that the bits are in the right place. + MVT MemVT = MVT::getIntegerVT(8 * (StoredBytes - Offset)); + + // Load from the stack slot. + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + NULL, 0, MemVT); + + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, + ST->getSrcValue(), SVOffset + Offset, + MemVT, ST->isVolatile(), + MinAlign(ST->getAlignment(), Offset))); + // The order of the stores doesn't matter - say it with a TokenFactor. + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + } + } + assert(ST->getMemoryVT().isInteger() && + !ST->getMemoryVT().isVector() && + "Unaligned store of unknown type."); + // Get the half-size VT + MVT NewStoredVT = + (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT() - 1); + int NumBits = NewStoredVT.getSizeInBits(); + int IncrementSize = NumBits / 8; + + // Divide the stored value in two parts. + SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue Lo = Val; + SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); + + // Store the two parts + SDValue Store1, Store2; + Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, + ST->getSrcValue(), SVOffset, NewStoredVT, + ST->isVolatile(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Alignment = MinAlign(Alignment, IncrementSize); + Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, + ST->getSrcValue(), SVOffset + IncrementSize, + NewStoredVT, ST->isVolatile(), Alignment); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); +} + +/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. +static +SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI) { + int SVOffset = LD->getSrcValueOffset(); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + MVT VT = LD->getValueType(0); + MVT LoadedVT = LD->getMemoryVT(); + DebugLoc dl = LD->getDebugLoc(); + if (VT.isFloatingPoint() || VT.isVector()) { + MVT intVT = MVT::getIntegerVT(LoadedVT.getSizeInBits()); + if (TLI.isTypeLegal(intVT)) { + // Expand to a (misaligned) integer load of the same size, + // then bitconvert to floating point or vector. + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset, LD->isVolatile(), + LD->getAlignment()); + SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad); + if (VT.isFloatingPoint() && LoadedVT != VT) + Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); + + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, dl); + } else { + // Copy the value to a (aligned) stack slot using (unaligned) integer + // loads and stores, then do a (aligned) load from the stack slot. + MVT RegVT = TLI.getRegisterType(intVT); + unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + SDValue StackPtr = StackBase; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the original location. + SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset + Offset, LD->isVolatile(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, + NULL, 0)); + // Increment the pointers. + Offset += RegBytes; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + } + + // The last copy may be partial. Do an extending load. + MVT MemVT = MVT::getIntegerVT(8 * (LoadedBytes - Offset)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + LD->getSrcValue(), SVOffset + Offset, + MemVT, LD->isVolatile(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + // On big-endian machines this requires a truncating store to ensure + // that the bits end up in the right place. + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, + NULL, 0, MemVT)); + + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + + // Finally, perform the original load only redirected to the stack slot. + Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + NULL, 0, LoadedVT); + + // Callers expect a MERGE_VALUES node. + SDValue Ops[] = { Load, TF }; + return DAG.getMergeValues(Ops, 2, dl); + } + } + assert(LoadedVT.isInteger() && !LoadedVT.isVector() && + "Unaligned load of unsupported type."); + + // Compute the new VT that is half the size of the old one. This is an + // integer MVT. + unsigned NumBits = LoadedVT.getSizeInBits(); + MVT NewLoadedVT; + NewLoadedVT = MVT::getIntegerVT(NumBits/2); + NumBits >>= 1; + + unsigned Alignment = LD->getAlignment(); + unsigned IncrementSize = NumBits / 8; + ISD::LoadExtType HiExtType = LD->getExtensionType(); + + // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. + if (HiExtType == ISD::NON_EXTLOAD) + HiExtType = ISD::ZEXTLOAD; + + // Load the value in two parts + SDValue Lo, Hi; + if (TLI.isLittleEndian()) { + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset, NewLoadedVT, LD->isVolatile(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + MinAlign(Alignment, IncrementSize)); + } else { + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset, NewLoadedVT, LD->isVolatile(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + MinAlign(Alignment, IncrementSize)); + } + + // aggregate the two parts + SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); + Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + SDValue Ops[] = { Result, TF }; + return DAG.getMergeValues(Ops, 2, dl); +} + +/// PerformInsertVectorEltInMemory - Some target cannot handle a variable +/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it +/// is necessary to spill the vector being inserted into to memory, perform +/// the insert there, and then read the result back. +SDValue SelectionDAGLegalize:: +PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, + DebugLoc dl) { + SDValue Tmp1 = Vec; + SDValue Tmp2 = Val; + SDValue Tmp3 = Idx; + + // If the target doesn't support this, we have to spill the input vector + // to a temporary stack slot, update the element, then reload it. This is + // badness. We could also load the value into a vector register (either + // with a "move to register" or "extload into register" instruction, then + // permute it into place, if the idx is a constant and if the idx is + // supported by the target. + MVT VT = Tmp1.getValueType(); + MVT EltVT = VT.getVectorElementType(); + MVT IdxVT = Tmp3.getValueType(); + MVT PtrVT = TLI.getPointerTy(); + SDValue StackPtr = DAG.CreateStackTemporary(VT); + + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + + // Store the vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0); + + // Truncate or zero extend offset to target pointer type. + unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; + Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3); + // Add the offset to the index. + unsigned EltSize = EltVT.getSizeInBits()/8; + Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT)); + SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); + // Store the scalar value. + Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, + PseudoSourceValue::getFixedStack(SPFI), 0, EltVT); + // Load the updated vector. + return DAG.getLoad(VT, dl, Ch, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0); +} + + +SDValue SelectionDAGLegalize:: +ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { + if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { + // SCALAR_TO_VECTOR requires that the type of the value being inserted + // match the element type of the vector being created, except for + // integers in which case the inserted value can be over width. + MVT EltVT = Vec.getValueType().getVectorElementType(); + if (Val.getValueType() == EltVT || + (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) { + SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + Vec.getValueType(), Val); + + unsigned NumElts = Vec.getValueType().getVectorNumElements(); + // We generate a shuffle of InVec and ScVec, so the shuffle mask + // should be 0,1,2,3,4,5... with the appropriate element replaced with + // elt 0 of the RHS. + SmallVector<int, 8> ShufOps; + for (unsigned i = 0; i != NumElts; ++i) + ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); + + return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, + &ShufOps[0]); + } + } + return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); +} + +/// LegalizeOp - We know that the specified value has a legal type, and +/// that its operands are legal. Now ensure that the operation itself +/// is legal, recursively ensuring that the operands' operations remain +/// legal. +SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { + if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return Op; + + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + assert(getTypeAction(Node->getValueType(i)) == Legal && + "Unexpected illegal type!"); + + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + assert((isTypeLegal(Node->getOperand(i).getValueType()) || + Node->getOperand(i).getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + SDValue Result = Op; + bool isCustom = false; + + // Figure out the correct action; the way to query this varies by opcode + TargetLowering::LegalizeAction Action; + bool SimpleFinishLegalizing = true; + switch (Node->getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::VAARG: + case ISD::STACKSAVE: + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::EXTRACT_VECTOR_ELT: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: { + MVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Action = TLI.getOperationAction(Node->getOpcode(), InnerType); + break; + } + case ISD::SELECT_CC: + case ISD::SETCC: + case ISD::BR_CC: { + unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : + Node->getOpcode() == ISD::SETCC ? 2 : 1; + unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; + MVT OpVT = Node->getOperand(CompareOperand).getValueType(); + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get(); + Action = TLI.getCondCodeAction(CCCode, OpVT); + if (Action == TargetLowering::Legal) { + if (Node->getOpcode() == ISD::SELECT_CC) + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + else + Action = TLI.getOperationAction(Node->getOpcode(), OpVT); + } + break; + } + case ISD::LOAD: + case ISD::STORE: + // FIXME: Model these properly. LOAD and STORE are complicated, and + // STORE expects the unlegalized operand in some cases. + SimpleFinishLegalizing = false; + break; + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: + // FIXME: This shouldn't be necessary. These nodes have special properties + // dealing with the recursive nature of legalization. Removing this + // special case should be done as part of making LegalizeDAG non-recursive. + SimpleFinishLegalizing = false; + break; + case ISD::CALL: + // FIXME: Legalization for calls requires custom-lowering the call before + // legalizing the operands! (I haven't looked into precisely why.) + SimpleFinishLegalizing = false; + break; + case ISD::EXTRACT_ELEMENT: + case ISD::FLT_ROUNDS_: + case ISD::SADDO: + case ISD::SSUBO: + case ISD::UADDO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + case ISD::FPOWI: + case ISD::MERGE_VALUES: + case ISD::EH_RETURN: + case ISD::FRAME_TO_ARGS_OFFSET: + // These operations lie about being legal: when they claim to be legal, + // they should actually be expanded. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Expand; + break; + case ISD::TRAMPOLINE: + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::FORMAL_ARGUMENTS: + // These operations lie about being legal: when they claim to be legal, + // they should actually be custom-lowered. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Custom; + break; + case ISD::BUILD_VECTOR: + // A weird case: legalization for BUILD_VECTOR never legalizes the + // operands! + // FIXME: This really sucks... changing it isn't semantically incorrect, + // but it massively pessimizes the code for floating-point BUILD_VECTORs + // because ConstantFP operands get legalized into constant pool loads + // before the BUILD_VECTOR code can see them. It doesn't usually bite, + // though, because BUILD_VECTORS usually get lowered into other nodes + // which get legalized properly. + SimpleFinishLegalizing = false; + break; + default: + if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { + Action = TargetLowering::Legal; + } else { + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + } + break; + } + + if (SimpleFinishLegalizing) { + SmallVector<SDValue, 8> Ops, ResultVals; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + switch (Node->getOpcode()) { + default: break; + case ISD::BR: + case ISD::BRIND: + case ISD::BR_JT: + case ISD::BR_CC: + case ISD::BRCOND: + case ISD::RET: + // Branches tweak the chain to include LastCALLSEQ_END + Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], + LastCALLSEQ_END); + Ops[0] = LegalizeOp(Ops[0]); + LastCALLSEQ_END = DAG.getEntryNode(); + break; + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::ROTL: + case ISD::ROTR: + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + if (!Ops[1].getValueType().isVector()) + Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1])); + break; + } + + Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(), + Ops.size()); + switch (Action) { + case TargetLowering::Legal: + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + ResultVals.push_back(Result.getValue(i)); + break; + case TargetLowering::Custom: + // FIXME: The handling for custom lowering with multiple results is + // a complete mess. + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.getNode()) { + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { + if (e == 1) + ResultVals.push_back(Tmp1); + else + ResultVals.push_back(Tmp1.getValue(i)); + } + break; + } + + // FALL THROUGH + case TargetLowering::Expand: + ExpandNode(Result.getNode(), ResultVals); + break; + case TargetLowering::Promote: + PromoteNode(Result.getNode(), ResultVals); + break; + } + if (!ResultVals.empty()) { + for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { + if (ResultVals[i] != SDValue(Node, i)) + ResultVals[i] = LegalizeOp(ResultVals[i]); + AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); + } + return ResultVals[Op.getResNo()]; + } + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "NODE: "; Node->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to legalize this operator!"); + abort(); + case ISD::CALL: + // The only option for this is to custom lower it. + Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG); + assert(Tmp3.getNode() && "Target didn't custom lower this node!"); + // A call within a calling sequence must be legalized to something + // other than the normal CALLSEQ_END. Violating this gets Legalize + // into an infinite loop. + assert ((!IsLegalizingCall || + Node->getOpcode() != ISD::CALL || + Tmp3.getNode()->getOpcode() != ISD::CALLSEQ_END) && + "Nested CALLSEQ_START..CALLSEQ_END not supported."); + + // The number of incoming and outgoing values should match; unless the final + // outgoing value is a flag. + assert((Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() || + (Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() + 1 && + Tmp3.getNode()->getValueType(Tmp3.getNode()->getNumValues() - 1) == + MVT::Flag)) && + "Lowering call/formal_arguments produced unexpected # results!"); + + // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to + // remember that we legalized all of them, so it doesn't get relegalized. + for (unsigned i = 0, e = Tmp3.getNode()->getNumValues(); i != e; ++i) { + if (Tmp3.getNode()->getValueType(i) == MVT::Flag) + continue; + Tmp1 = LegalizeOp(Tmp3.getValue(i)); + if (Op.getResNo() == i) + Tmp2 = Tmp1; + AddLegalizedOperand(SDValue(Node, i), Tmp1); + } + return Tmp2; + case ISD::BUILD_VECTOR: + switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Result = Tmp3; + break; + } + // FALLTHROUGH + case TargetLowering::Expand: + Result = ExpandBUILD_VECTOR(Result.getNode()); + break; + } + break; + case ISD::CALLSEQ_START: { + SDNode *CallEnd = FindCallEndFromCallStart(Node); + + // Recursively Legalize all of the inputs of the call end that do not lead + // to this call start. This ensures that any libcalls that need be inserted + // are inserted *before* the CALLSEQ_START. + {SmallPtrSet<SDNode*, 32> NodesLeadingTo; + for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) + LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, + NodesLeadingTo); + } + + // Now that we legalized all of the inputs (which may have inserted + // libcalls) create the new CALLSEQ_START node. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + + // Merge in the last call, to ensure that this call start after the last + // call ended. + if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { + Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + } + + // Do not try to legalize the target-specific arguments (#1+). + if (Tmp1 != Node->getOperand(0)) { + SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + + // Remember that the CALLSEQ_START is legalized. + AddLegalizedOperand(Op.getValue(0), Result); + if (Node->getNumValues() == 2) // If this has a flag result, remember it. + AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); + + // Now that the callseq_start and all of the non-call nodes above this call + // sequence have been legalized, legalize the call itself. During this + // process, no libcalls can/will be inserted, guaranteeing that no calls + // can overlap. + assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); + // Note that we are selecting this call! + LastCALLSEQ_END = SDValue(CallEnd, 0); + IsLegalizingCall = true; + + // Legalize the call, starting from the CALLSEQ_END. + LegalizeOp(LastCALLSEQ_END); + assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); + return Result; + } + case ISD::CALLSEQ_END: + // If the CALLSEQ_START node hasn't been legalized first, legalize it. This + // will cause this node to be legalized as well as handling libcalls right. + if (LastCALLSEQ_END.getNode() != Node) { + LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + // Otherwise, the call start has been legalized and everything is going + // according to plan. Just legalize ourselves normally here. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Do not try to legalize the target-specific arguments (#1+), except for + // an optional flag input. + if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){ + if (Tmp1 != Node->getOperand(0)) { + SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + } else { + Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); + if (Tmp1 != Node->getOperand(0) || + Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { + SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Ops.back() = Tmp2; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + } + assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); + // This finishes up call legalization. + IsLegalizingCall = false; + + // If the CALLSEQ_END node has a flag, remember that we legalized it. + AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); + if (Node->getNumValues() == 2) + AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); + return Result.getValue(Op.getResNo()); + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Node); + Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) { + MVT VT = Node->getValueType(0); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Tmp3 = Result.getValue(0); + Tmp4 = Result.getValue(1); + + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(LD->getMemoryVT().getTypeForMVT()); + if (LD->getAlignment() < ABIAlignment){ + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG, + TLI); + Tmp3 = Result.getOperand(0); + Tmp4 = Result.getOperand(1); + Tmp3 = LegalizeOp(Tmp3); + Tmp4 = LegalizeOp(Tmp4); + } + } + break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Tmp3, DAG); + if (Tmp1.getNode()) { + Tmp3 = LegalizeOp(Tmp1); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); + } + break; + case TargetLowering::Promote: { + // Only promote a load of vector type to another. + assert(VT.isVector() && "Cannot promote this load!"); + // Change base type to a different vector type. + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + + Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1)); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); + break; + } + } + // Since loads produce two values, make sure to remember that we + // legalized both of them. + AddLegalizedOperand(SDValue(Node, 0), Tmp3); + AddLegalizedOperand(SDValue(Node, 1), Tmp4); + return Op.getResNo() ? Tmp4 : Tmp3; + } else { + MVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + MVT NVT = MVT::getIntegerVT(NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getSrcValue(), SVOffset, + NVT, isVolatile, Alignment); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(SrcVT.isExtended() && !SrcVT.isVector() && + "Unsupported extload!"); + unsigned RoundWidth = 1 << Log2_32(SrcWidth); + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + MVT RoundVT = MVT::getIntegerVT(RoundWidth); + MVT ExtraVT = MVT::getIntegerVT(ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, + Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset, RoundVT, isVolatile, + Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset + IncrementSize, + ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset, RoundVT, isVolatile, + Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, + Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset + IncrementSize, + ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } + + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else { + switch (TLI.getLoadExtAction(ExtType, SrcVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); + + if (isCustom) { + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Tmp1 = LegalizeOp(Tmp3); + Tmp2 = LegalizeOp(Tmp3.getValue(1)); + } + } else { + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(LD->getMemoryVT().getTypeForMVT()); + if (LD->getAlignment() < ABIAlignment){ + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG, + TLI); + Tmp1 = Result.getOperand(0); + Tmp2 = Result.getOperand(1); + Tmp1 = LegalizeOp(Tmp1); + Tmp2 = LegalizeOp(Tmp2); + } + } + } + break; + case TargetLowering::Expand: + // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND + if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) { + SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + Result = DAG.getNode(ISD::FP_EXTEND, dl, + Node->getValueType(0), Load); + Tmp1 = LegalizeOp(Result); // Relegalize new nodes. + Tmp2 = LegalizeOp(Load.getValue(1)); + break; + } + assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an explicit + // zero/sign extend inreg. + Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), SrcVT, + LD->isVolatile(), LD->getAlignment()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT); + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + break; + } + } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + AddLegalizedOperand(SDValue(Node, 0), Tmp1); + AddLegalizedOperand(SDValue(Node, 1), Tmp2); + return Op.getResNo() ? Tmp2 : Tmp1; + } + } + case ISD::STORE: { + StoreSDNode *ST = cast<StoreSDNode>(Node); + Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + + if (!ST->isTruncatingStore()) { + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // FIXME: We shouldn't do this for TargetConstantFP's. + // FIXME: move this to the DAG Combiner! Note that we can't regress due + // to phase ordering between legalized code and the dag combiner. This + // probably means that we need to integrate dag combiner and legalizer + // together. + // We generally can't do this one for long doubles. + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { + if (CFP->getValueType(0) == MVT::f32 && + getTypeAction(MVT::i32) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF(). + bitcastToAPInt().zextOrTrunc(32), + MVT::i32); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + break; + } else if (CFP->getValueType(0) == MVT::f64) { + // If this target supports 64-bit registers, do a single 64-bit store. + if (getTypeAction(MVT::i64) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + zextOrTrunc(64), MVT::i64); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + break; + } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + // Otherwise, if the target supports 32-bit registers, use 2 32-bit + // stores. If the target supports neither 32- nor 64-bits, this + // xform is certainly not worth it. + const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32); + SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(4)); + Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, + isVolatile, MinAlign(Alignment, 4U)); + + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + break; + } + } + } + + { + Tmp3 = LegalizeOp(ST->getValue()); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, + ST->getOffset()); + + MVT VT = Tmp3.getValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(ST->getMemoryVT().getTypeForMVT()); + if (ST->getAlignment() < ABIAlignment) + Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG, + TLI); + } + break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.getNode()) Result = Tmp1; + break; + case TargetLowering::Promote: + assert(VT.isVector() && "Unknown legal promote case!"); + Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl, + TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getSrcValue(), SVOffset, isVolatile, + Alignment); + break; + } + break; + } + } else { + Tmp3 = LegalizeOp(ST->getValue()); + + MVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + MVT NVT = MVT::getIntegerVT(StVT.getStoreSizeInBits()); + Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); + Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, NVT, isVolatile, Alignment); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(StVT.isExtended() && !StVT.isVector() && + "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + MVT RoundVT = MVT::getIntegerVT(RoundWidth); + MVT ExtraVT = MVT::getIntegerVT(ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, RoundVT, + isVolatile, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, + DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), + SVOffset + IncrementSize, ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + } else { + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, + DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), + SVOffset, RoundVT, isVolatile, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset + IncrementSize, ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + } + + // The order of the stores doesn't matter. + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } else { + if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || + Tmp2 != ST->getBasePtr()) + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, + ST->getOffset()); + + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(ST->getMemoryVT().getTypeForMVT()); + if (ST->getAlignment() < ABIAlignment) + Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG, + TLI); + } + break; + case TargetLowering::Custom: + Result = TLI.LowerOperation(Result, DAG); + break; + case Expand: + // TRUNCSTORE:i16 i32 -> STORE i16 + assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); + Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + break; + } + } + } + break; + } + } + assert(Result.getValueType() == Op.getValueType() && + "Bad legalization!"); + + // Make sure that the generated code is itself legal. + if (Result != Op) + Result = LegalizeOp(Result); + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { + SDValue Vec = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + // Store the value to a temporary stack slot, then LOAD the returned part. + SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); + + // Add the offset to the index. + unsigned EltSize = + Vec.getValueType().getVectorElementType().getSizeInBits()/8; + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, Idx.getValueType())); + + if (Idx.getValueType().bitsGT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); + else + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0); +} + +SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + assert((Tmp2.getValueType() == MVT::f32 || + Tmp2.getValueType() == MVT::f64) && + "Ugly special-cased code!"); + // Get the sign bit of the RHS. + SDValue SignBit; + MVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32; + if (isTypeLegal(IVT)) { + SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2); + } else { + assert(isTypeLegal(TLI.getPointerTy()) && + (TLI.getPointerTy() == MVT::i32 || + TLI.getPointerTy() == MVT::i64) && + "Legal type for load?!"); + SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType()); + SDValue StorePtr = StackPtr, LoadPtr = StackPtr; + SDValue Ch = + DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0); + if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian()) + LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), + LoadPtr, DAG.getIntPtrConstant(4)); + SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(), + Ch, LoadPtr, NULL, 0, MVT::i32); + } + SignBit = + DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()), + SignBit, DAG.getConstant(0, SignBit.getValueType()), + ISD::SETLT); + // Get the absolute value of the result. + SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); + // Select between the nabs and abs value based on the sign bit of + // the input. + return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit, + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); +} + +SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) { + DebugLoc dl = Node->getDebugLoc(); + DwarfWriter *DW = DAG.getDwarfWriter(); + bool useDEBUG_LOC = TLI.isOperationLegalOrCustom(ISD::DEBUG_LOC, + MVT::Other); + bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other); + + const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(Node); + GlobalVariable *CU_GV = cast<GlobalVariable>(DSP->getCompileUnit()); + if (DW && (useDEBUG_LOC || useLABEL) && !CU_GV->isDeclaration()) { + DICompileUnit CU(cast<GlobalVariable>(DSP->getCompileUnit())); + + unsigned Line = DSP->getLine(); + unsigned Col = DSP->getColumn(); + + if (OptLevel == CodeGenOpt::None) { + // A bit self-referential to have DebugLoc on Debug_Loc nodes, but it + // won't hurt anything. + if (useDEBUG_LOC) { + return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0), + DAG.getConstant(Line, MVT::i32), + DAG.getConstant(Col, MVT::i32), + DAG.getSrcValue(CU.getGV())); + } else { + unsigned ID = DW->RecordSourceLine(Line, Col, CU); + return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID); + } + } + } + return Node->getOperand(0); +} + +void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, + SmallVectorImpl<SDValue> &Results) { + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" + " not tell us which reg is the stack pointer!"); + DebugLoc dl = Node->getDebugLoc(); + MVT VT = Node->getValueType(0); + SDValue Tmp1 = SDValue(Node, 0); + SDValue Tmp2 = SDValue(Node, 1); + SDValue Tmp3 = Node->getOperand(2); + SDValue Chain = Tmp1.getOperand(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + + SDValue Size = Tmp2.getOperand(1); + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); + Chain = SP.getValue(1); + unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); + unsigned StackAlign = + TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); + if (Align > StackAlign) + SP = DAG.getNode(ISD::AND, dl, VT, SP, + DAG.getConstant(-(uint64_t)Align, VT)); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain + + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), SDValue()); + + Results.push_back(Tmp1); + Results.push_back(Tmp2); +} + +/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and +/// condition code CC on the current target. This routine assumes LHS and rHS +/// have already been legalized by LegalizeSetCCOperands. It expands SETCC with +/// illegal condition code into AND / OR of multiple SETCC values. +void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT, + SDValue &LHS, SDValue &RHS, + SDValue &CC, + DebugLoc dl) { + MVT OpVT = LHS.getValueType(); + ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + switch (TLI.getCondCodeAction(CCCode, OpVT)) { + default: assert(0 && "Unknown condition code action!"); + case TargetLowering::Legal: + // Nothing to do. + break; + case TargetLowering::Expand: { + ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + unsigned Opc = 0; + switch (CCCode) { + default: assert(0 && "Don't know how to expand this condition!"); abort(); + case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + // FIXME: Implement more expansions. + } + + SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); + SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + RHS = SDValue(); + CC = SDValue(); + break; + } + } +} + +/// EmitStackConvert - Emit a store/load combination to the stack. This stores +/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does +/// a load from the stack slot to DestVT, extending it if needed. +/// The resultant code need not be legal. +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, + MVT SlotVT, + MVT DestVT, + DebugLoc dl) { + // Create the stack frame object. + unsigned SrcAlign = + TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType(). + getTypeForMVT()); + SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); + int SPFI = StackPtrFI->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + + unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); + unsigned SlotSize = SlotVT.getSizeInBits(); + unsigned DestSize = DestVT.getSizeInBits(); + unsigned DestAlign = + TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForMVT()); + + // Emit a store to the stack slot. Use a truncstore if the input value is + // later than DestVT. + SDValue Store; + + if (SrcSize > SlotSize) + Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + SV, 0, SlotVT, false, SrcAlign); + else { + assert(SrcSize == SlotSize && "Invalid store"); + Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + SV, 0, false, SrcAlign); + } + + // Result is a load from the stack slot. + if (SlotSize == DestSize) + return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign); + + assert(SlotSize < DestSize && "Unknown extension!"); + return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT, + false, DestAlign); +} + +SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { + DebugLoc dl = Node->getDebugLoc(); + // Create a vector sized/aligned stack slot, store the value to element #0, + // then load the whole vector back out. + SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr); + int SPFI = StackPtrFI->getIndex(); + + SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), + StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0, + Node->getValueType(0).getVectorElementType()); + return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0); +} + + +/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't +/// support the operation, but do support the resultant vector type. +SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { + unsigned NumElems = Node->getNumOperands(); + SDValue SplatValue = Node->getOperand(0); + DebugLoc dl = Node->getDebugLoc(); + MVT VT = Node->getValueType(0); + MVT OpVT = SplatValue.getValueType(); + MVT EltVT = VT.getVectorElementType(); + + // If the only non-undef value is the low element, turn this into a + // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. + bool isOnlyLowElement = true; + + // FIXME: it would be far nicer to change this into map<SDValue,uint64_t> + // and use a bitmask instead of a list of elements. + // FIXME: this doesn't treat <0, u, 0, u> for example, as a splat. + std::map<SDValue, std::vector<unsigned> > Values; + Values[SplatValue].push_back(0); + bool isConstant = true; + if (!isa<ConstantFPSDNode>(SplatValue) && !isa<ConstantSDNode>(SplatValue) && + SplatValue.getOpcode() != ISD::UNDEF) + isConstant = false; + + for (unsigned i = 1; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + Values[V].push_back(i); + if (V.getOpcode() != ISD::UNDEF) + isOnlyLowElement = false; + if (SplatValue != V) + SplatValue = SDValue(0, 0); + + // If this isn't a constant element or an undef, we can't use a constant + // pool load. + if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V) && + V.getOpcode() != ISD::UNDEF) + isConstant = false; + } + + if (isOnlyLowElement) { + // If the low element is an undef too, then this whole things is an undef. + if (Node->getOperand(0).getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + // Otherwise, turn this into a scalar_to_vector node. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0)); + } + + // If all elements are constants, create a load from the constant pool. + if (isConstant) { + std::vector<Constant*> CV; + for (unsigned i = 0, e = NumElems; i != e; ++i) { + if (ConstantFPSDNode *V = + dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) { + CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue())); + } else if (ConstantSDNode *V = + dyn_cast<ConstantSDNode>(Node->getOperand(i))) { + CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + } else { + assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); + const Type *OpNTy = OpVT.getTypeForMVT(); + CV.push_back(UndefValue::get(OpNTy)); + } + } + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment); + } + + if (SplatValue.getNode()) { // Splat of one value? + // Build the shuffle constant vector: <0, 0, 0, 0> + SmallVector<int, 8> ZeroVec(NumElems, 0); + + // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it. + if (TLI.isShuffleMaskLegal(ZeroVec, Node->getValueType(0))) { + // Get the splatted value into the low element of a vector register. + SDValue LowValVec = + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT), + &ZeroVec[0]); + } + } + + // If there are only two unique elements, we may be able to turn this into a + // vector shuffle. + if (Values.size() == 2) { + // Get the two values in deterministic order. + SDValue Val1 = Node->getOperand(1); + SDValue Val2; + std::map<SDValue, std::vector<unsigned> >::iterator MI = Values.begin(); + if (MI->first != Val1) + Val2 = MI->first; + else + Val2 = (++MI)->first; + + // If Val1 is an undef, make sure it ends up as Val2, to ensure that our + // vector shuffle has the undef vector on the RHS. + if (Val1.getOpcode() == ISD::UNDEF) + std::swap(Val1, Val2); + + // Build the shuffle constant vector: e.g. <0, 4, 0, 4> + SmallVector<int, 8> ShuffleMask(NumElems, -1); + + // Set elements of the shuffle mask for Val1. + std::vector<unsigned> &Val1Elts = Values[Val1]; + for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i) + ShuffleMask[Val1Elts[i]] = 0; + + // Set elements of the shuffle mask for Val2. + std::vector<unsigned> &Val2Elts = Values[Val2]; + for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i) + if (Val2.getOpcode() != ISD::UNDEF) + ShuffleMask[Val2Elts[i]] = NumElems; + + // If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it. + if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) && + TLI.isShuffleMaskLegal(ShuffleMask, VT)) { + Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1); + Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2); + return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]); + } + } + + // Otherwise, we can't handle this case efficiently. Allocate a sufficiently + // aligned object on the stack, store each element into it, then load + // the result as a vector. + // Create the stack frame object. + SDValue FIPtr = DAG.CreateStackTemporary(VT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store of each element to the stack slot. + SmallVector<SDValue, 8> Stores; + unsigned TypeByteSize = OpVT.getSizeInBits() / 8; + // Store (in the right endianness) the elements to memory. + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = TypeByteSize*i; + + SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); + + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), + Idx, SV, Offset)); + } + + SDValue StoreChain; + if (!Stores.empty()) // Not all undef elements? + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + else + StoreChain = DAG.getEntryNode(); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0); +} + +// ExpandLibCall - Expand a node into a call to a libcall. If the result value +// does not fit into a register, return the lo part and set the hi part to the +// by-reg argument. If it does fit into a single register, return the result +// and leave the Hi part unset. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, + bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + MVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForMVT(); + Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Node->getValueType(0).getTypeForMVT(); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + CallingConv::C, false, Callee, Args, DAG, + Node->getDebugLoc()); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo.first; +} + +SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT()) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::f32: LC = Call_F32; break; + case MVT::f64: LC = Call_F64; break; + case MVT::f80: LC = Call_F80; break; + case MVT::ppcf128: LC = Call_PPCF128; break; + } + return ExpandLibCall(LC, Node, false); +} + +SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT()) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i16: LC = Call_I16; break; + case MVT::i32: LC = Call_I32; break; + case MVT::i64: LC = Call_I64; break; + case MVT::i128: LC = Call_I128; break; + } + return ExpandLibCall(LC, Node, isSigned); +} + +/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a +/// INT_TO_FP operation of the specified operand when the target requests that +/// we expand it. At this point, we know that the result and operand types are +/// legal for the target. +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, + SDValue Op0, + MVT DestVT, + DebugLoc dl) { + if (Op0.getValueType() == MVT::i32) { + // simple 32-bit [signed|unsigned] integer to float/double expansion + + // Get the stack frame index of a 8 byte buffer. + SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); + + // word offset constant for Hi/Lo address computation + SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy()); + // set up Hi and Lo (into buffer) address based on endian + SDValue Hi = StackSlot; + SDValue Lo = DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), StackSlot, WordOff); + if (TLI.isLittleEndian()) + std::swap(Hi, Lo); + + // if signed map to unsigned space + SDValue Op0Mapped; + if (isSigned) { + // constant used to invert sign bit (signed to unsigned mapping) + SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32); + Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); + } else { + Op0Mapped = Op0; + } + // store the lo of the constructed double - based on integer input + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, + Op0Mapped, Lo, NULL, 0); + // initial hi portion of constructed double + SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32); + // store the hi of the constructed double - biased exponent + SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0); + // load the constructed double + SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0); + // FP constant to bias correct the final result + SDValue Bias = DAG.getConstantFP(isSigned ? + BitsToDouble(0x4330000080000000ULL) : + BitsToDouble(0x4330000000000000ULL), + MVT::f64); + // subtract the bias + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + // final result + SDValue Result; + // handle final rounding + if (DestVT == MVT::f64) { + // do nothing + Result = Sub; + } else if (DestVT.bitsLT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, + DAG.getIntPtrConstant(0)); + } else if (DestVT.bitsGT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); + } + return Result; + } + assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + + SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, Op0.getValueType()), + ISD::SETLT); + SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4); + SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), + SignSet, Four, Zero); + + // If the sign bit of the integer is set, the large number will be treated + // as a negative number. To counteract this, the dynamic code adds an + // offset depending on the data type. + uint64_t FF; + switch (Op0.getValueType().getSimpleVT()) { + default: assert(0 && "Unsupported integer type!"); + case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) + case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) + case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) + case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) + } + if (TLI.isLittleEndian()) FF <<= 32; + Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF); + + SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset); + Alignment = std::min(Alignment, 4u); + SDValue FudgeInReg; + if (DestVT == MVT::f32) + FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment); + else { + FudgeInReg = + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + MVT::f32, false, Alignment)); + } + + return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); +} + +/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a +/// *INT_TO_FP operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP +/// operation that takes a larger input. +SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, + MVT DestVT, + bool isSigned, + DebugLoc dl) { + // First step, figure out the appropriate *INT_TO_FP operation to use. + MVT NewInTy = LegalOp.getValueType(); + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1); + assert(NewInTy.isInteger() && "Ran out of possibilities!"); + + // If the target supports SINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { + OpToUse = ISD::SINT_TO_FP; + break; + } + if (isSigned) continue; + + // If the target supports UINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { + OpToUse = ISD::UINT_TO_FP; + break; + } + + // Otherwise, try a larger type. + } + + // Okay, we found the operation and type to use. Zero extend our input to the + // desired type then run the operation on it. + return DAG.getNode(OpToUse, dl, DestVT, + DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, NewInTy, LegalOp)); +} + +/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a +/// FP_TO_*INT operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT +/// operation that returns a larger result. +SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, + MVT DestVT, + bool isSigned, + DebugLoc dl) { + // First step, figure out the appropriate FP_TO*INT operation to use. + MVT NewOutTy = DestVT; + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT()+1); + assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { + OpToUse = ISD::FP_TO_SINT; + break; + } + + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { + OpToUse = ISD::FP_TO_UINT; + break; + } + + // Otherwise, try a larger type. + } + + + // Okay, we found the operation and type to use. + SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); + + // Truncate the result of the extended FP_TO_*INT operation to the desired + // size. + return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); +} + +/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. +/// +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { + MVT VT = Op.getValueType(); + MVT SHVT = TLI.getShiftAmountTy(); + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT.getSimpleVT()) { + default: assert(0 && "Unhandled Expand type in BSWAP!"); abort(); + case MVT::i16: + Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT)); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT)); + Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT)); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); + Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); + } +} + +/// ExpandBitCount - Expand the specified bitcount instruction into operations. +/// +SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, + DebugLoc dl) { + switch (Opc) { + default: assert(0 && "Cannot expand this yet!"); + case ISD::CTPOP: { + static const uint64_t mask[6] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, + 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, + 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL + }; + MVT VT = Op.getValueType(); + MVT ShVT = TLI.getShiftAmountTy(); + unsigned len = VT.getSizeInBits(); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8]) + unsigned EltSize = VT.isVector() ? + VT.getVectorElementType().getSizeInBits() : len; + SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT); + SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); + Op = DAG.getNode(ISD::ADD, dl, VT, + DAG.getNode(ISD::AND, dl, VT, Op, Tmp2), + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3), + Tmp2)); + } + return Op; + } + case ISD::CTLZ: { + // for now, we do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + // + // but see also: http://www.hackersdelight.org/HDcode/nlz.cc + MVT VT = Op.getValueType(); + MVT ShVT = TLI.getShiftAmountTy(); + unsigned len = VT.getSizeInBits(); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); + Op = DAG.getNode(ISD::OR, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); + } + Op = DAG.getNOT(dl, Op, VT); + return DAG.getNode(ISD::CTPOP, dl, VT, Op); + } + case ISD::CTTZ: { + // for now, we use: { return popcount(~x & (x - 1)); } + // unless the target has ctlz but not ctpop, in which case we use: + // { return 32 - nlz(~x & (x-1)); } + // see also http://www.hackersdelight.org/HDcode/ntz.cc + MVT VT = Op.getValueType(); + SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNOT(dl, Op, VT), + DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getConstant(1, VT))); + // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. + if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && + TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) + return DAG.getNode(ISD::SUB, dl, VT, + DAG.getConstant(VT.getSizeInBits(), VT), + DAG.getNode(ISD::CTLZ, dl, VT, Tmp3)); + return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3); + } + } +} + +void SelectionDAGLegalize::ExpandNode(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + switch (Node->getOpcode()) { + case ISD::CTPOP: + case ISD::CTLZ: + case ISD::CTTZ: + Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); + Results.push_back(Tmp1); + break; + case ISD::BSWAP: + Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); + break; + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::FRAME_TO_ARGS_OFFSET: + Results.push_back(DAG.getConstant(0, Node->getValueType(0))); + break; + case ISD::FLT_ROUNDS_: + Results.push_back(DAG.getConstant(1, Node->getValueType(0))); + break; + case ISD::EH_RETURN: + case ISD::DECLARE: + case ISD::DBG_LABEL: + case ISD::EH_LABEL: + case ISD::PREFETCH: + case ISD::MEMBARRIER: + case ISD::VAEND: + Results.push_back(Node->getOperand(0)); + break; + case ISD::DBG_STOPPOINT: + Results.push_back(ExpandDBG_STOPPOINT(Node)); + break; + case ISD::DYNAMIC_STACKALLOC: + ExpandDYNAMIC_STACKALLOC(Node, Results); + break; + case ISD::MERGE_VALUES: + for (unsigned i = 0; i < Node->getNumValues(); i++) + Results.push_back(Node->getOperand(i)); + break; + case ISD::UNDEF: { + MVT VT = Node->getValueType(0); + if (VT.isInteger()) + Results.push_back(DAG.getConstant(0, VT)); + else if (VT.isFloatingPoint()) + Results.push_back(DAG.getConstantFP(0, VT)); + else + assert(0 && "Unknown value type!"); + break; + } + case ISD::TRAP: { + // If this operation is not supported, lower it to 'abort()' call + TargetLowering::ArgListTy Args; + std::pair<SDValue, SDValue> CallResult = + TLI.LowerCallTo(Node->getOperand(0), Type::VoidTy, + false, false, false, false, CallingConv::C, false, + DAG.getExternalSymbol("abort", TLI.getPointerTy()), + Args, DAG, dl); + Results.push_back(CallResult.second); + break; + } + case ISD::FP_ROUND: + case ISD::BIT_CONVERT: + Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_EXTEND: + Tmp1 = EmitStackConvert(Node->getOperand(0), + Node->getOperand(0).getValueType(), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::SIGN_EXTEND_INREG: { + // NOTE: we could fall back on load/store here too for targets without + // SAR. However, it is doubtful that any exist. + MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + unsigned BitsDiff = Node->getValueType(0).getSizeInBits() - + ExtraVT.getSizeInBits(); + SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy()); + Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), + Node->getOperand(0), ShiftCst); + Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); + Results.push_back(Tmp1); + break; + } + case ISD::FP_ROUND_INREG: { + // The only way we can lower this is to turn it into a TRUNCSTORE, + // EXTLOAD pair, targetting a temporary location (a stack slot). + + // NOTE: there is a choice here between constantly creating new stack + // slots and always reusing the same one. We currently always create + // new ones, as reuse may inhibit scheduling. + MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + } + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, + Node->getOperand(0), Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_TO_UINT: { + SDValue True, False; + MVT VT = Node->getOperand(0).getValueType(); + MVT NVT = Node->getValueType(0); + const uint64_t zero[] = {0, 0}; + APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); + APInt x = APInt::getSignBit(NVT.getSizeInBits()); + (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); + Tmp1 = DAG.getConstantFP(apf, VT); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + Node->getOperand(0), + Tmp1, ISD::SETLT); + True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); + False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, + DAG.getNode(ISD::FSUB, dl, VT, + Node->getOperand(0), Tmp1)); + False = DAG.getNode(ISD::XOR, dl, NVT, False, + DAG.getConstant(x, NVT)); + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False); + Results.push_back(Tmp1); + break; + } + case ISD::VAARG: { + const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + MVT VT = Node->getValueType(0); + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0); + // Increment the pointer, VAList, to the next vaarg + Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(TLI.getTargetData()-> + getTypeAllocSize(VT.getTypeForMVT()), + TLI.getPointerTy())); + // Store the incremented VAList to the legalized pointer + Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0); + // Load the actual argument out of the pointer VAList + Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0)); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::VACOPY: { + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); + const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); + Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), + Node->getOperand(2), VS, 0); + Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_VECTOR_ELT: + if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) + // This must be an access of the only element. Return it. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), + Node->getOperand(0)); + else + Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); + Results.push_back(Tmp1); + break; + case ISD::EXTRACT_SUBVECTOR: + Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); + break; + case ISD::CONCAT_VECTORS: { + // Use extract/insert/build vector for now. We might try to be + // more clever later. + SmallVector<SDValue, 8> Ops; + unsigned NumOperands = Node->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue SubOp = Node->getOperand(i); + MVT VVT = SubOp.getNode()->getValueType(0); + MVT EltVT = VVT.getVectorElementType(); + unsigned NumSubElem = VVT.getVectorNumElements(); + for (unsigned j=0; j < NumSubElem; ++j) { + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, + DAG.getIntPtrConstant(j))); + } + } + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Ops[0], Ops.size()); + Results.push_back(Tmp1); + break; + } + case ISD::SCALAR_TO_VECTOR: + Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); + break; + case ISD::INSERT_VECTOR_ELT: + Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0), + Node->getOperand(1), + Node->getOperand(2), dl)); + break; + case ISD::VECTOR_SHUFFLE: { + SmallVector<int, 8> Mask; + cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + + MVT VT = Node->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != NumElems; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + continue; + } + unsigned Idx = Mask[i]; + if (Idx < NumElems) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Node->getOperand(0), + DAG.getIntPtrConstant(Idx))); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Node->getOperand(1), + DAG.getIntPtrConstant(Idx - NumElems))); + } + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_ELEMENT: { + MVT OpTy = Node->getOperand(0).getValueType(); + if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { + // 1 -> Hi + Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), + DAG.getConstant(OpTy.getSizeInBits()/2, + TLI.getShiftAmountTy())); + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); + } else { + // 0 -> Lo + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), + Node->getOperand(0)); + } + Results.push_back(Tmp1); + break; + } + case ISD::STACKSAVE: + // Expand to CopyFromReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + } else { + Results.push_back(DAG.getUNDEF(Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::STACKRESTORE: + // Expand to CopyToReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP, + Node->getOperand(1))); + } else { + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::FCOPYSIGN: + Results.push_back(ExpandFCOPYSIGN(Node)); + break; + case ISD::FNEG: + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0)); + Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, + Node->getOperand(0)); + Results.push_back(Tmp1); + break; + case ISD::FABS: { + // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). + MVT VT = Node->getValueType(0); + Tmp1 = Node->getOperand(0); + Tmp2 = DAG.getConstantFP(0.0, VT); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, ISD::SETUGT); + Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); + Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3); + Results.push_back(Tmp1); + break; + } + case ISD::FSQRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128)); + break; + case ISD::FSIN: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_PPCF128)); + break; + case ISD::FCOS: + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_PPCF128)); + break; + case ISD::FLOG: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_PPCF128)); + break; + case ISD::FLOG2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128)); + break; + case ISD::FLOG10: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128)); + break; + case ISD::FEXP: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_PPCF128)); + break; + case ISD::FEXP2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128)); + break; + case ISD::FTRUNC: + Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128)); + break; + case ISD::FFLOOR: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128)); + break; + case ISD::FCEIL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128)); + break; + case ISD::FRINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_PPCF128)); + break; + case ISD::FNEARBYINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128)); + break; + case ISD::FPOWI: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_PPCF128)); + break; + case ISD::FPOW: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_PPCF128)); + break; + case ISD::FDIV: + Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_PPCF128)); + break; + case ISD::FREM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_PPCF128)); + break; + case ISD::ConstantFP: { + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); + // Check to see if this FP immediate is already legal. + bool isLegal = false; + for (TargetLowering::legal_fpimm_iterator I = TLI.legal_fpimm_begin(), + E = TLI.legal_fpimm_end(); I != E; ++I) { + if (CFP->isExactlyValue(*I)) { + isLegal = true; + break; + } + } + // If this is a legal constant, turn it into a TargetConstantFP node. + if (isLegal) + Results.push_back(SDValue(Node, 0)); + else + Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); + break; + } + case ISD::EHSELECTION: { + unsigned Reg = TLI.getExceptionSelectorRegister(); + assert(Reg && "Can't expand to unknown register!"); + Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::EXCEPTIONADDR: { + unsigned Reg = TLI.getExceptionAddressRegister(); + assert(Reg && "Can't expand to unknown register!"); + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::SUB: { + MVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && + TLI.isOperationLegalOrCustom(ISD::XOR, VT) && + "Don't know how to expand this subtraction!"); + Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); + Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT)); + Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); + break; + } + case ISD::UREM: + case ISD::SREM: { + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + bool isSigned = Node->getOpcode() == ISD::SREM; + unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + Tmp2 = Node->getOperand(0); + Tmp3 = Node->getOperand(1); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); + } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { + // X % Y -> X-X/Y*Y + Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); + Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); + } else if (isSigned) { + Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128); + } else { + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128); + } + Results.push_back(Tmp1); + break; + } + case ISD::UDIV: + case ISD::SDIV: { + bool isSigned = Node->getOpcode() == ISD::SDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + else if (isSigned) + Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128); + else + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128); + Results.push_back(Tmp1); + break; + } + case ISD::MULHU: + case ISD::MULHS: { + unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : + ISD::SMUL_LOHI; + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) && + "If this wasn't legal, it shouldn't have been created!"); + Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + Results.push_back(Tmp1.getValue(1)); + break; + } + case ISD::MUL: { + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + // See if multiply or divide can be lowered using two-result operations. + // We just need the low half of the multiply; try both the signed + // and unsigned forms. If the target supports both SMUL_LOHI and + // UMUL_LOHI, form a preference by checking which forms of plain + // MULH it supports. + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT); + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT); + unsigned OpToUse = 0; + if (HasSMUL_LOHI && !HasMULHS) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI && !HasMULHU) { + OpToUse = ISD::UMUL_LOHI; + } else if (HasSMUL_LOHI) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI) { + OpToUse = ISD::UMUL_LOHI; + } + if (OpToUse) { + Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0), + Node->getOperand(1))); + break; + } + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128); + Results.push_back(Tmp1); + break; + } + case ISD::SADDO: + case ISD::SSUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + MVT OType = Node->getValueType(1); + + SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + Results.push_back(Cmp); + break; + } + case ISD::UADDO: + case ISD::USUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS, + Node->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT)); + break; + } + case ISD::BUILD_PAIR: { + MVT PairTy = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); + Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, + DAG.getConstant(PairTy.getSizeInBits()/2, + TLI.getShiftAmountTy())); + Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); + break; + } + case ISD::SELECT: + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + if (Tmp1.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1), + Tmp2, Tmp3, + cast<CondCodeSDNode>(Tmp1.getOperand(2))->get()); + } else { + Tmp1 = DAG.getSelectCC(dl, Tmp1, + DAG.getConstant(0, Tmp1.getValueType()), + Tmp2, Tmp3, ISD::SETNE); + } + Results.push_back(Tmp1); + break; + case ISD::BR_JT: { + SDValue Chain = Node->getOperand(0); + SDValue Table = Node->getOperand(1); + SDValue Index = Node->getOperand(2); + + MVT PTy = TLI.getPointerTy(); + MachineFunction &MF = DAG.getMachineFunction(); + unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize(); + Index= DAG.getNode(ISD::MUL, dl, PTy, + Index, DAG.getConstant(EntrySize, PTy)); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + + MVT MemVT = MVT::getIntegerVT(EntrySize * 8); + SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, + PseudoSourceValue::getJumpTable(), 0, MemVT); + Addr = LD; + if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) { + // For PIC, the sequence is: + // BRIND(load(Jumptable + index) + RelocBase) + // RelocBase can be JumpTable, GOT or some sort of global base. + Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, + TLI.getPICJumpTableRelocBase(Table, DAG)); + } + Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr); + Results.push_back(Tmp1); + break; + } + case ISD::BRCOND: + // Expand brcond's setcc into its constituent parts and create a BR_CC + // Node. + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + if (Tmp2.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, + Tmp1, Tmp2.getOperand(2), + Tmp2.getOperand(0), Tmp2.getOperand(1), + Node->getOperand(2)); + } else { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, + DAG.getCondCode(ISD::SETNE), Tmp2, + DAG.getConstant(0, Tmp2.getValueType()), + Node->getOperand(2)); + } + Results.push_back(Tmp1); + break; + case ISD::SETCC: { + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl); + + // If we expanded the SETCC into an AND/OR, return the new node + if (Tmp2.getNode() == 0) { + Results.push_back(Tmp1); + break; + } + + // Otherwise, SETCC for the given comparison type must be completely + // illegal; expand it into a SELECT_CC. + MVT VT = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, + DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3); + Results.push_back(Tmp1); + break; + } + case ISD::SELECT_CC: { + Tmp1 = Node->getOperand(0); // LHS + Tmp2 = Node->getOperand(1); // RHS + Tmp3 = Node->getOperand(2); // True + Tmp4 = Node->getOperand(3); // False + SDValue CC = Node->getOperand(4); + + LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, CC, dl); + + assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!"); + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, + Tmp3, Tmp4, CC); + Results.push_back(Tmp1); + break; + } + case ISD::BR_CC: { + Tmp1 = Node->getOperand(0); // Chain + Tmp2 = Node->getOperand(2); // LHS + Tmp3 = Node->getOperand(3); // RHS + Tmp4 = Node->getOperand(1); // CC + + LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), + Tmp2, Tmp3, Tmp4, dl); + LastCALLSEQ_END = DAG.getEntryNode(); + + assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); + Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, + Tmp3, Node->getOperand(4)); + Results.push_back(Tmp1); + break; + } + case ISD::GLOBAL_OFFSET_TABLE: + case ISD::GlobalAddress: + case ISD::GlobalTLSAddress: + case ISD::ExternalSymbol: + case ISD::ConstantPool: + case ISD::JumpTable: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + // FIXME: Custom lowering for these operations shouldn't return null! + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + Results.push_back(SDValue(Node, i)); + break; + } +} +void SelectionDAGLegalize::PromoteNode(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + MVT OVT = Node->getValueType(0); + if (Node->getOpcode() == ISD::UINT_TO_FP || + Node->getOpcode() == ISD::SINT_TO_FP) { + OVT = Node->getOperand(0).getValueType(); + } + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1, Tmp2, Tmp3; + switch (Node->getOpcode()) { + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + // Zero extend the argument. + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + // Perform the larger operation. + Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1); + if (Node->getOpcode() == ISD::CTTZ) { + //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), + ISD::SETEQ); + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, + DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); + } else if (Node->getOpcode() == ISD::CTLZ) { + // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) + Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), NVT)); + } + Results.push_back(Tmp1); + break; + case ISD::BSWAP: { + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Tmp1); + Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); + Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, + DAG.getConstant(DiffBits, TLI.getShiftAmountTy())); + Results.push_back(Tmp1); + break; + } + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::FP_TO_SINT, dl); + Results.push_back(Tmp1); + break; + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::SINT_TO_FP, dl); + Results.push_back(Tmp1); + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: + assert(OVT.isVector() && "Don't know how to promote scalar logic ops"); + // Bit convert each of the values to the new type. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + // Bit convert the result back the original type. + Results.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1)); + break; + case ISD::SELECT: + unsigned ExtOp, TruncOp; + if (Node->getValueType(0).isVector()) { + ExtOp = ISD::BIT_CONVERT; + TruncOp = ISD::BIT_CONVERT; + } else if (Node->getValueType(0).isInteger()) { + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } else { + ExtOp = ISD::FP_EXTEND; + TruncOp = ISD::FP_ROUND; + } + Tmp1 = Node->getOperand(0); + // Promote each of the values to the new type. + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + // Perform the larger operation, then round down. + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3); + if (TruncOp != ISD::FP_ROUND) + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); + else + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, + DAG.getIntPtrConstant(0)); + Results.push_back(Tmp1); + break; + case ISD::VECTOR_SHUFFLE: { + SmallVector<int, 8> Mask; + cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + + // Cast the two input vectors. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); + + // Convert the shuffle mask to the right # elements. + Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1); + Results.push_back(Tmp1); + break; + } + case ISD::SETCC: { + // First step, figure out the appropriate operation to use. + // Allow SETCC to not be supported for all legal data types + // Mostly this targets FP + MVT NewInTy = Node->getOperand(0).getValueType(); + MVT OldVT = NewInTy; OldVT = OldVT; + + // Scan for the appropriate larger type to use. + while (1) { + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1); + + assert(NewInTy.isInteger() == OldVT.isInteger() && + "Fell off of the edge of the integer world"); + assert(NewInTy.isFloatingPoint() == OldVT.isFloatingPoint() && + "Fell off of the edge of the floating point world"); + + // If the target supports SETCC of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::SETCC, NewInTy)) + break; + } + if (NewInTy.isInteger()) + assert(0 && "Cannot promote Legal Integer SETCC yet"); + else { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp1); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp2); + } + Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Node->getOperand(2))); + break; + } + } +} + +// SelectionDAG::Legalize - This is the entry point for the file. +// +void SelectionDAG::Legalize(bool TypesNeedLegalizing, + CodeGenOpt::Level OptLevel) { + /// run - This is the main entry point to this class. + /// + SelectionDAGLegalize(*this, OptLevel).LegalizeDAG(); +} + diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp new file mode 100644 index 0000000..c3c1bea --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -0,0 +1,1388 @@ +//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements float type expansion and softening for LegalizeTypes. +// Softening is the act of turning a computation in an illegal floating point +// type into a computation in an integer type of the same size; also known as +// "soft float". For example, turning f32 arithmetic into operations using i32. +// The resulting integer value is the same as what you would get by performing +// the floating point operation and bitcasting the result to the integer type. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. For example, +// implementing ppcf128 arithmetic in two f64 registers. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +using namespace llvm; + +/// GetFPLibCall - Return the right libcall for the given floating point type. +static RTLIB::Libcall GetFPLibCall(MVT VT, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128) { + return + VT == MVT::f32 ? Call_F32 : + VT == MVT::f64 ? Call_F64 : + VT == MVT::f80 ? Call_F80 : + VT == MVT::ppcf128 ? Call_PPCF128 : + RTLIB::UNKNOWN_LIBCALL; +} + +//===----------------------------------------------------------------------===// +// Result Float to Integer Conversion. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Soften float result " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SoftenFloatResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to soften the result of this operator!"); + abort(); + + case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break; + case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; + case ISD::ConstantFP: + R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N)); + break; + case ISD::EXTRACT_VECTOR_ELT: + R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; + case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; + case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; + case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; + case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; + case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; + case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; + case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; + case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; + case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetSoftenedFloat(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) { + return BitConvertToInteger(N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { + // Convert the inputs to integers, and build a new pair out of them. + return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), + TLI.getTypeToTransformTo(N->getValueType(0)), + BitConvertToInteger(N->getOperand(0)), + BitConvertToInteger(N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { + return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), + TLI.getTypeToTransformTo(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + NewOp.getValueType().getVectorElementType(), + NewOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned Size = NVT.getSizeInBits(); + + // Mask = ~(1 << (Size-1)) + SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1), + NVT); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(0)); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + + MVT LVT = LHS.getValueType(); + MVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // First get the sign bit of second operand. + SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT), + DAG.getConstant(RSize - 1, + TLI.getShiftAmountTy())); + SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); + if (SizeDiff > 0) { + SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit, + DAG.getConstant(SizeDiff, TLI.getShiftAmountTy())); + SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); + } else if (SizeDiff < 0) { + SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); + SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit, + DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy())); + } + + // Clear the sign bit of the first operand. + SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT), + DAG.getConstant(LSize - 1, + TLI.getShiftAmountTy())); + Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT)); + LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); + + // Or the value with the sign bit. + return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), + GetSoftenedFloat(N->getOperand(0)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); + return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::i32 && + "Unsupported power type!"); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { + LoadSDNode *L = cast<LoadSDNode>(N); + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + SDValue NewL; + if (L->getExtensionType() == ISD::NON_EXTLOAD) { + NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(), + NVT, L->getChain(), L->getBasePtr(), L->getOffset(), + L->getSrcValue(), L->getSrcValueOffset(), NVT, + L->isVolatile(), L->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return NewL; + } + + // Do a non-extending load followed by FP_EXTEND. + NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD, + L->getMemoryVT(), L->getChain(), + L->getBasePtr(), L->getOffset(), + L->getSrcValue(), L->getSrcValueOffset(), + L->getMemoryVT(), + L->isVolatile(), L->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(1)); + SDValue RHS = GetSoftenedFloat(N->getOperand(2)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0),LHS,RHS); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(2)); + SDValue RHS = GetSoftenedFloat(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + SDValue NewVAARG; + NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + return NewVAARG; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { + bool Signed = N->getOpcode() == ISD::SINT_TO_FP; + MVT SVT = N->getOperand(0).getValueType(); + MVT RVT = N->getValueType(0); + MVT NVT = MVT(); + DebugLoc dl = N->getDebugLoc(); + + // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to + // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly + // match. Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE; + t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) { + NVT = (MVT::SimpleValueType)t; + // The source needs to big enough to hold the operand. + if (NVT.bitsGE(SVT)) + LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + // Sign/zero extend the argument if the libcall takes a larger type. + SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + NVT, N->getOperand(0)); + return MakeLibCall(LC, TLI.getTypeToTransformTo(RVT), &Op, 1, false, dl); +} + + +//===----------------------------------------------------------------------===// +// Operand Float to Integer Conversion.. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SoftenFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to soften this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; + case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// SoftenSetCCOperands - Soften the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl) { + SDValue LHSInt = GetSoftenedFloat(NewLHS); + SDValue RHSInt = GetSoftenedFloat(NewRHS); + MVT VT = NewLHS.getValueType(); + + assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!"); + + // Expand into one or more soft-fp libcall(s). + RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + switch (CCCode) { + case ISD::SETEQ: + case ISD::SETOEQ: + LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; + break; + case ISD::SETNE: + case ISD::SETUNE: + LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64; + break; + case ISD::SETGE: + case ISD::SETOGE: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; + break; + case ISD::SETLT: + case ISD::SETOLT: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + break; + case ISD::SETLE: + case ISD::SETOLE: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; + break; + case ISD::SETGT: + case ISD::SETOGT: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; + break; + case ISD::SETUO: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; + break; + case ISD::SETO: + LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64; + break; + default: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; + switch (CCCode) { + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + // Fallthrough + case ISD::SETUGT: + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; + break; + case ISD::SETUGE: + LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; + break; + case ISD::SETULT: + LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + break; + case ISD::SETULE: + LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; + break; + case ISD::SETUEQ: + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; + break; + default: assert(false && "Do not know how to soften this setcc!"); + } + } + + MVT RetVT = MVT::i32; // FIXME: is this the correct return type? + SDValue Ops[2] = { LHSInt, RHSInt }; + NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewRHS = DAG.getConstant(0, RetVT); + CCCode = TLI.getCmpLibcallCC(LC1); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { + SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), + NewLHS, NewRHS, DAG.getCondCode(CCCode)); + NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS, + NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2))); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + NewRHS = SDValue(); + } +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) { + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), + GetSoftenedFloat(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { + MVT SVT = N->getOperand(0).getValueType(); + MVT RVT = N->getValueType(0); + + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { + MVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { + MVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only soften the stored value!"); + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + DebugLoc dl = N->getDebugLoc(); + + if (ST->isTruncatingStore()) + // Do an FP_ROUND followed by a non-truncating store. + Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), + Val, DAG.getIntPtrConstant(0))); + else + Val = GetSoftenedFloat(Val); + + return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->isVolatile(), ST->getAlignment()); +} + + +//===----------------------------------------------------------------------===// +// Float Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Expand float result: "; N->dump(&DAG); cerr << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ExpandFloatResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to expand the result of this operator!"); + abort(); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; + case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; + case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; + case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; + case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; + case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; + case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; + case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; + case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; + case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; + case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; + case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break; + case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; + case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; + case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; + case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedFloat(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + assert(NVT.getSizeInBits() == integerPartWidth && + "Do not know how to expand this float constant!"); + APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt(); + Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1, + &C.getRawData()[1])), NVT); + Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1, + &C.getRawData()[0])), NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + DebugLoc dl = N->getDebugLoc(); + SDValue Tmp; + GetExpandedFloat(N->getOperand(0), Lo, Tmp); + Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp); + // Lo = Hi==fabs(Hi) ? Lo : -Lo; + Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo, + DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo), + DAG.getCondCode(ISD::SETEQ)); +} + +void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32,RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32,RTLIB::LOG10_F64, + RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedFloat(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + DebugLoc dl = N->getDebugLoc(); + + MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + + Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getMemoryVT(), + LD->isVolatile(), LD->getAlignment()); + + // Remember the chain. + Chain = Hi.getValue(1); + + // The low part is zero. + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + + // Modified the chain - switch anything that used the old chain to use the + // new one. + ReplaceValueWith(SDValue(LD, 1), Chain); +} + +void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!"); + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Src = N->getOperand(0); + MVT SrcVT = Src.getValueType(); + bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; + DebugLoc dl = N->getDebugLoc(); + + // First do an SINT_TO_FP, whether the original was signed or unsigned. + // When promoting partial word types to i32 we must honor the signedness, + // though. + if (SrcVT.bitsLE(MVT::i32)) { + // The integer can be represented exactly in an f64. + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i32, Src); + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); + } else { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (SrcVT.bitsLE(MVT::i64)) { + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i64, Src); + LC = RTLIB::SINTTOFP_I64_PPCF128; + } else if (SrcVT.bitsLE(MVT::i128)) { + Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src); + LC = RTLIB::SINTTOFP_I128_PPCF128; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + Hi = MakeLibCall(LC, VT, &Src, 1, true, dl); + GetPairElements(Hi, Lo, Hi); + } + + if (isSigned) + return; + + // Unsigned - fix up the SINT_TO_FP value just calculated. + Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi); + SrcVT = Src.getValueType(); + + // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128. + static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 }; + static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 }; + static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 }; + const uint64_t *Parts = 0; + + switch (SrcVT.getSimpleVT()) { + default: + assert(false && "Unsupported UINT_TO_FP!"); + case MVT::i32: + Parts = TwoE32; + break; + case MVT::i64: + Parts = TwoE64; + break; + case MVT::i128: + Parts = TwoE128; + break; + } + + Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, + DAG.getConstantFP(APFloat(APInt(128, 2, Parts)), + MVT::ppcf128)); + Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), + Lo, Hi, DAG.getCondCode(ISD::SETLT)); + GetPairElements(Lo, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Float Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatOperand - This method is called when the specified operand of the +/// specified node is found to need expansion. At this point, all of the result +/// types of the node are known to be legal, but other operands of the node may +/// need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Expand float operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) + == TargetLowering::Custom) + Res = TLI.LowerOperation(SDValue(N, 0), DAG); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + cerr << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; + #endif + assert(0 && "Do not know how to expand this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + + case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; + case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedFloat(NewLHS, LHSLo, LHSHi); + GetExpandedFloat(NewRHS, RHSLo, RHSHi); + + MVT VT = NewLHS.getValueType(); + assert(VT == MVT::ppcf128 && "Unsupported setcc type!"); + + // FIXME: This generated code sucks. We want to generate + // FCMPU crN, hi1, hi2 + // BNE crN, L: + // FCMPU crN, lo1, lo2 + // The following can be improved, but not that much. + SDValue Tmp1, Tmp2, Tmp3; + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETOEQ); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, CCCode); + Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETUNE); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode); + Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); + NewRHS = SDValue(); // LHS is the result, not a compare. +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(0), Lo, Hi); + // Round it the rest of the way (e.g. to f32) if needed. + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + N->getValueType(0), Hi, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { + MVT RVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128, + N->getOperand(0), DAG.getValueType(MVT::f64)); + Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res, + DAG.getIntPtrConstant(1)); + return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + } + + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { + MVT RVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; + APFloat APF = APFloat(APInt(128, 2, TwoE31)); + SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); + // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X + // FIXME: generated code sucks. + return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp, + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, + DAG.getNode(ISD::FSUB, dl, + MVT::ppcf128, + N->getOperand(0), + Tmp)), + DAG.getConstant(0x80000000, MVT::i32)), + DAG.getNode(ISD::FP_TO_SINT, dl, + MVT::i32, N->getOperand(0)), + DAG.getCondCode(ISD::SETGE)); + } + + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + StoreSDNode *ST = cast<StoreSDNode>(N); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + MVT NVT = TLI.getTypeToTransformTo(ST->getValue().getValueType()); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + + SDValue Lo, Hi; + GetExpandedOp(ST->getValue(), Lo, Hi); + + return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp new file mode 100644 index 0000000..eb9342c --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -0,0 +1,2382 @@ +//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer type expansion and promotion for LegalizeTypes. +// Promotion is the act of changing a computation in an illegal type into a +// computation in a larger type. For example, implementing i8 arithmetic in an +// i32 register (often needed on powerpc). +// Expansion is the act of changing a computation in an illegal type into a +// computation in two identical registers of a smaller type. For example, +// implementing i64 arithmetic in two i32 registers (often needed on 32-bit +// targets). +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Integer Result Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerResult - This method is called when a result of a node is +/// found to be in need of promotion to a larger type. At this point, the node +/// may also have invalid operands or may have other results that need +/// expansion, we just know that (at least) one result needs promotion. +void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Promote integer result: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "PromoteIntegerResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to promote this operator!"); + abort(); + case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; + case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; + case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break; + case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; + case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; + case ISD::Constant: Res = PromoteIntRes_Constant(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntRes_CONVERT_RNDSAT(N); break; + case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; + case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; + case ISD::EXTRACT_VECTOR_ELT: + Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; + case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; + case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; + case ISD::SIGN_EXTEND_INREG: + Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; + case ISD::SRA: Res = PromoteIntRes_SRA(N); break; + case ISD::SRL: Res = PromoteIntRes_SRL(N); break; + case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; + case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; + case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; + + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + + case ISD::SDIV: + case ISD::SREM: Res = PromoteIntRes_SDIV(N); break; + + case ISD::UDIV: + case ISD::UREM: Res = PromoteIntRes_UDIV(N); break; + + case ISD::SADDO: + case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; + case ISD::UADDO: + case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break; + case ISD::SMULO: + case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: + Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break; + + case ISD::ATOMIC_CMP_SWAP: + Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break; + } + + // If the result is null then the sub-method took care of registering it. + if (Res.getNode()) + SetPromotedInteger(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { + // Sign-extend the new bits, and continue the assertion. + SDValue Op = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertSext, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) { + // Zero the new bits, and continue the assertion. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertZext, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), + N->getChain(), N->getBasePtr(), + Op2, N->getSrcValue(), N->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Op3 = GetPromotedInteger(N->getOperand(3)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), N->getChain(), N->getBasePtr(), + Op2, Op3, N->getSrcValue(), N->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + MVT NInVT = TLI.getTypeToTransformTo(InVT); + MVT OutVT = N->getValueType(0); + MVT NOutVT = TLI.getTypeToTransformTo(OutVT); + DebugLoc dl = N->getDebugLoc(); + + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + break; + case Legal: + break; + case PromoteInteger: + if (NOutVT.bitsEq(NInVT)) + // The input promotes to the same size. Convert the promoted value. + return DAG.getNode(ISD::BIT_CONVERT, dl, + NOutVT, GetPromotedInteger(InOp)); + break; + case SoftenFloat: + // Promote the integer operand by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case ExpandInteger: + case ExpandFloat: + break; + case ScalarizeVector: + // Convert the element to an integer and promote it by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + case SplitVector: { + // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split + // pieces of the input into integers and reassemble in the final type. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + InOp = DAG.getNode(ISD::ANY_EXTEND, dl, + MVT::getIntegerVT(NOutVT.getSizeInBits()), + JoinIntegers(Lo, Hi)); + return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp); + } + case WidenVector: + if (OutVT.bitsEq(NInVT)) + // The input is widened to the same size. Convert to the widened value. + return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp)); + } + + // Otherwise, lower the bit-convert to a store/load from the stack. + // Create the stack frame object. Make sure it is aligned for both + // the source and destination types. + SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); + + // Result is an extending load from the stack slot. + return DAG.getExtLoad(ISD::EXTLOAD, dl, NOutVT, Store, FIPtr, SV, 0, OutVT); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + MVT OVT = N->getValueType(0); + MVT NVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + DAG.getConstant(DiffBits, TLI.getPointerTy())); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { + // The pair element type may be legal, or may not promote to the same type as + // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), + TLI.getTypeToTransformTo(N->getValueType(0)), + JoinIntegers(N->getOperand(0), N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { + MVT VT = N->getValueType(0); + // FIXME there is no actual debug info here + DebugLoc dl = N->getDebugLoc(); + // Zero extend things like i1, sign extend everything else. It shouldn't + // matter in theory which one we pick, but this tends to give better code? + unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(VT), + SDValue(N, 0)); + assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?"); + return Result; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && + "can only promote integers"); + MVT OutVT = TLI.getTypeToTransformTo(N->getValueType(0)); + return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0), + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + DebugLoc dl = N->getDebugLoc(); + MVT OVT = N->getValueType(0); + MVT NVT = Op.getValueType(); + Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op); + // Subtract off the extra leading bits in the bigger type. + return DAG.getNode(ISD::SUB, dl, NVT, Op, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), NVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + MVT OVT = N->getValueType(0); + MVT NVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + APInt TopBit(NVT.getSizeInBits(), 0); + TopBit.set(OVT.getSizeInBits()); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); + return DAG.getNode(ISD::CTTZ, dl, NVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { + MVT OldVT = N->getValueType(0); + SDValue OldVec = N->getOperand(0); + if (getTypeAction(OldVec.getValueType()) == WidenVector) + OldVec = GetWidenedVector(N->getOperand(0)); + unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + if (OldElts == 1) { + assert(!isTypeLegal(OldVec.getValueType()) && + "Legal one-element vector of a type needing promotion!"); + // It is tempting to follow GetScalarizedVector by a call to + // GetPromotedInteger, but this would be wrong because the + // scalarized value may not yet have been processed. + return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), + GetScalarizedVector(OldVec)); + } + + // Convert to a vector half as long with an element type of twice the width, + // for example <4 x i16> -> <2 x i32>. + assert(!(OldElts & 1) && "Odd length vectors not supported!"); + MVT NewVT = MVT::getIntegerVT(2 * OldVT.getSizeInBits()); + assert(OldVT.isSimple() && NewVT.isSimple()); + + SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::getVectorVT(NewVT, OldElts / 2), + OldVec); + + // Extract the element at OldIdx / 2 from the new vector. + SDValue OldIdx = N->getOperand(1); + SDValue NewIdx = DAG.getNode(ISD::SRL, dl, OldIdx.getValueType(), OldIdx, + DAG.getConstant(1, TLI.getPointerTy())); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, NewIdx); + + // Select the appropriate half of the element: Lo if OldIdx was even, + // Hi if it was odd. + SDValue Lo = Elt; + SDValue Hi = DAG.getNode(ISD::SRL, dl, NewVT, Elt, + DAG.getConstant(OldVT.getSizeInBits(), + TLI.getPointerTy())); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Extend to the promoted type. + SDValue Odd = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, OldIdx); + SDValue Res = DAG.getNode(ISD::SELECT, dl, NewVT, Odd, Hi, Lo); + return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned NewOpc = N->getOpcode(); + DebugLoc dl = N->getDebugLoc(); + + // If we're promoting a UINT to a larger size, check to see if the new node + // will be legal. If it isn't, check to see if FP_TO_SINT is legal, since + // we can use that instead. This allows us to generate better code for + // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not + // legal, such as PowerPC. + if (N->getOpcode() == ISD::FP_TO_UINT && + !TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) + NewOpc = ISD::FP_TO_SINT; + + SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + ISD::AssertZext : ISD::AssertSext, dl, + NVT, Res, DAG.getValueType(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + + if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) { + SDValue Res = GetPromotedInteger(N->getOperand(0)); + assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!"); + + // If the result and operand types are the same after promotion, simplify + // to an in-register extension. + if (NVT == Res.getValueType()) { + // The high bits are not guaranteed to be anything. Insert an extend. + if (N->getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(N->getOperand(0).getValueType())); + if (N->getOpcode() == ISD::ZERO_EXTEND) + return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType()); + assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); + return Res; + } + } + + // Otherwise, just extend the original operand all the way to the larger type. + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + ISD::LoadExtType ExtType = + ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); + DebugLoc dl = N->getDebugLoc(); + SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT(), N->isVolatile(), + N->getAlignment()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +/// Promote the overflow flag of an overflowing arithmetic node. +SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { + // Simply change the return type of the boolean result. + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(1)); + MVT ValueVTs[] = { N->getValueType(0), NVT }; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; + SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + DAG.getVTList(ValueVTs, 2), Ops, 2); + + // Modified the sum result - switch anything that used the old sum to use + // the new one. + ReplaceValueWith(SDValue(N, 0), Res); + + return SDValue(Res.getNode(), 1); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // sign extension of its truncation to the original type. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + MVT OVT = N->getOperand(0).getValueType(); + MVT NVT = LHS.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: sign extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(OVT)); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(1)); + SDValue RHS = GetPromotedInteger(N->getOperand(2)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0),LHS,RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(2)); + SDValue RHS = GetPromotedInteger(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { + MVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); + assert(isTypeLegal(SVT) && "Illegal SetCC type!"); + DebugLoc dl = N->getDebugLoc(); + + // Get the SETCC result using the canonical SETCC type. + SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); + + // Convert to the expected type. + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); + return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { + return DAG.getNode(ISD::SHL, N->getDebugLoc(), + TLI.getTypeToTransformTo(N->getValueType(0)), + GetPromotedInteger(N->getOperand(0)), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { + // The input may have strange things in the top bits of the registers, but + // these operations don't care. They may have weird bits going out, but + // that too is okay if they are integer operations. + SDValue LHS = GetPromotedInteger(N->getOperand(0)); + SDValue RHS = GetPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { + // The input value must be properly sign extended. + SDValue Res = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SRA, N->getDebugLoc(), + Res.getValueType(), Res, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { + // The input value must be properly zero extended. + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Res = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Res; + + switch (getTypeAction(N->getOperand(0).getValueType())) { + default: assert(0 && "Unknown type action!"); + case Legal: + case ExpandInteger: + Res = N->getOperand(0); + break; + case PromoteInteger: + Res = GetPromotedInteger(N->getOperand(0)); + break; + } + + // Truncate to NVT instead of VT + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // zero extension of its truncation to the original type. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + MVT OVT = N->getOperand(0).getValueType(); + MVT NVT = LHS.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: zero extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + MVT RegVT = TLI.getRegisterType(VT); + unsigned NumRegs = TLI.getNumRegisters(VT); + // The argument is passed as NumRegs registers of type RegVT. + + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned i = 0; i < NumRegs; ++i) { + Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2)); + Chain = Parts[i].getValue(1); + } + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::reverse(Parts.begin(), Parts.end()); + + // Assemble the parts in the promoted type. + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]); + for (unsigned i = 1; i < NumRegs; ++i) { + SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); + // Shift it to the right position and "or" it in. + Part = DAG.getNode(ISD::SHL, dl, NVT, Part, + DAG.getConstant(i * RegVT.getSizeInBits(), + TLI.getPointerTy())); + Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part); + } + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { + assert(ResNo == 1 && "Only boolean result promotion currently supported!"); + return PromoteIntRes_Overflow(N); +} + +//===----------------------------------------------------------------------===// +// Integer Operand Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need promotion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Promote integer operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + cerr << "PromoteIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; + #endif + assert(0 && "Do not know how to promote this operator's operand!"); + abort(); + + case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; + case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break; + case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; + case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break; + case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntOp_CONVERT_RNDSAT(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; + case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; + case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; + case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; + case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; + case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; + case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// PromoteSetCCOperands - Promote the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, + ISD::CondCode CCCode) { + // We have to insert explicit sign or zero extends. Note that we could + // insert sign extends for ALL conditions, but zero extend is cheaper on + // many machines (an AND instead of two shifts), so prefer it. + switch (CCCode) { + default: assert(0 && "Unknown integer comparison!"); + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGE: + case ISD::SETUGT: + case ISD::SETULE: + case ISD::SETULT: + // ALL of these operations will work if we either sign or zero extend + // the operands (including the unsigned comparisons!). Zero extend is + // usually a simpler/cheaper operation, so prefer it. + NewLHS = ZExtPromotedInteger(NewLHS); + NewRHS = ZExtPromotedInteger(NewRHS); + break; + case ISD::SETGE: + case ISD::SETGT: + case ISD::SETLT: + case ISD::SETLE: + NewLHS = SExtPromotedInteger(NewLHS); + NewRHS = SExtPromotedInteger(NewRHS); + break; + } +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) { + // This should only occur in unusual situations like bitcasting to an + // x86_fp80, so just turn it into a store+load + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 2 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get()); + + // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always + // legal types. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "only know how to promote condition"); + + // Promote all the way up to the canonical SetCC type. + MVT SVT = TLI.getSetCCResultType(MVT::Other); + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); + + // The chain (Op#0) and basic block destination (Op#2) are always legal types. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond, + N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { + // Since the result type is legal, the operands must promote to it. + MVT OVT = N->getOperand(0).getValueType(); + SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); + SDValue Hi = GetPromotedInteger(N->getOperand(1)); + assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); + DebugLoc dl = N->getDebugLoc(); + + Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, + DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy())); + return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type is not. This implies + // that the vector is a power-of-two in length and that the element + // type does not have a strange size (eg: it is not i1). + MVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + + // Promote the inserted value. The type does not need to match the + // vector element type. Check that any extra bits introduced will be + // truncated away. + assert(N->getOperand(0).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + + SmallVector<SDValue, 16> NewOps; + for (unsigned i = 0; i < NumElts; ++i) + NewOps.push_back(GetPromotedInteger(N->getOperand(i))); + + return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) && + "can only promote integer arguments"); + SDValue InOp = GetPromotedInteger(N->getOperand(0)); + return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp, + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, + unsigned OpNo) { + if (OpNo == 1) { + // Promote the inserted value. This is valid because the type does not + // have to match the vector element type. + + // Check that any extra bits introduced will be truncated away. + assert(N->getOperand(1).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + GetPromotedInteger(N->getOperand(1)), + N->getOperand(2)); + } + + assert(OpNo == 2 && "Different operand and result vector types?"); + + // Promote the index. + SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + N->getOperand(1), Idx); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { + SDValue NewOps[6]; + DebugLoc dl = N->getDebugLoc(); + NewOps[0] = N->getOperand(0); + for (unsigned i = 1; i < array_lengthof(NewOps); ++i) { + SDValue Flag = GetPromotedInteger(N->getOperand(i)); + NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); + } + return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps, + array_lengthof(NewOps)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { + // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote + // the operand in place. + return DAG.UpdateNodeOperands(SDValue(N, 0), + GetPromotedInteger(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Only know how to promote condition"); + + // Promote all the way up to the canonical SetCC type. + MVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); + SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT); + + return DAG.UpdateNodeOperands(SDValue(N, 0), Cond, + N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get()); + + // The CC (#4) and the possible return values (#2 and #3) have legal types. + return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get()); + + // The CC (#2) is always legal. + return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + DebugLoc dl = N->getDebugLoc(); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), + Op, DAG.getValueType(N->getOperand(0).getValueType())); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { + return DAG.UpdateNodeOperands(SDValue(N, 0), + SExtPromotedInteger(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + DebugLoc dl = N->getDebugLoc(); + + SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. + + // Truncate the value and store the result. + return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(), + SVOffset, N->getMemoryVT(), + isVolatile, Alignment); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { + return DAG.UpdateNodeOperands(SDValue(N, 0), + ZExtPromotedInteger(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue Op = GetPromotedInteger(N->getOperand(0)); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); +} + + +//===----------------------------------------------------------------------===// +// Integer Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Expand integer result: "; N->dump(&DAG); cerr << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ExpandIntegerResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to expand the result of this operator!"); + abort(); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; + case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; + case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; + case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; + case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; + case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; + case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; + case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; + case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; + case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; + case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; + case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; + case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break; + case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break; + case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break; + case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break; + case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; + + case ISD::AND: + case ISD::OR: + case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; + + case ISD::ADD: + case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break; + + case ISD::ADDC: + case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break; + + case ISD::ADDE: + case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedInteger(SDValue(N, ResNo), Lo, Hi); +} + +/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded, +/// and the shift amount is a constant 'Amt'. Expand the operation. +void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // Expand the incoming operand to be shifted, so that we have its parts + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + MVT NVT = InL.getValueType(); + unsigned VTBits = N->getValueType(0).getSizeInBits(); + unsigned NVTBits = NVT.getSizeInBits(); + MVT ShTy = N->getOperand(1).getValueType(); + + if (N->getOpcode() == ISD::SHL) { + if (Amt > VTBits) { + Lo = Hi = DAG.getConstant(0, NVT); + } else if (Amt > NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getNode(ISD::SHL, dl, + NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy)); + } else if (Amt == NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = InL; + } else if (Amt == 1 && + TLI.isOperationLegalOrCustom(ISD::ADDC, + TLI.getTypeToExpandTo(NVT))) { + // Emit this X << 1 as X+X. + SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + SDValue LoOps[2] = { InL, InL }; + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy)); + Hi = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(NVTBits-Amt, ShTy))); + } + return; + } + + if (N->getOpcode() == ISD::SRL) { + if (Amt > VTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, NVT); + } else if (Amt > NVTBits) { + Lo = DAG.getNode(ISD::SRL, dl, + NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy)); + Hi = DAG.getConstant(0, NVT); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getConstant(0, NVT); + } else { + Lo = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(NVTBits-Amt, ShTy))); + Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + } + return; + } + + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + if (Amt > VTBits) { + Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Amt > NVTBits) { + Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(Amt-NVTBits, ShTy)); + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else { + Lo = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(NVTBits-Amt, ShTy))); + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + } +} + +/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify +/// this shift based on knowledge of the high bit of the shift amount. If we +/// can tell this, we know that it is >= 32 or < 32, without knowing the actual +/// shift amount. +bool DAGTypeLegalizer:: +ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + MVT ShTy = Amt.getValueType(); + unsigned ShBits = ShTy.getSizeInBits(); + unsigned NVTBits = NVT.getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + DebugLoc dl = N->getDebugLoc(); + + APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne); + + // If we don't know anything about the high bits, exit. + if (((KnownZero|KnownOne) & HighBitMask) == 0) + return false; + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + // If we know that any of the high bits of the shift amount are one, then we + // can do this as a couple of simple shifts. + if (KnownOne.intersects(HighBitMask)) { + // Mask out the high bit, which we know is set. + Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, + DAG.getConstant(~HighBitMask, ShTy)); + + switch (N->getOpcode()) { + default: assert(0 && "Unknown shift"); + case ISD::SHL: + Lo = DAG.getConstant(0, NVT); // Low part is zero. + Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. + return true; + case ISD::SRL: + Hi = DAG.getConstant(0, NVT); // Hi part is zero. + Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + case ISD::SRA: + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. + DAG.getConstant(NVTBits-1, ShTy)); + Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + } + } + +#if 0 + // FIXME: This code is broken for shifts with a zero amount! + // If we know that all of the high bits of the shift amount are zero, then we + // can do this as a couple of simple shifts. + if ((KnownZero & HighBitMask) == HighBitMask) { + // Compute 32-amt. + SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy, + DAG.getConstant(NVTBits, ShTy), + Amt); + unsigned Op1, Op2; + switch (N->getOpcode()) { + default: assert(0 && "Unknown shift"); + case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break; + case ISD::SRL: + case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; + } + + Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, NVT, + DAG.getNode(Op1, NVT, InH, Amt), + DAG.getNode(Op2, NVT, InL, Amt2)); + return true; + } +#endif + + return false; +} + +/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift +/// of any size. +bool DAGTypeLegalizer:: +ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + MVT ShTy = Amt.getValueType(); + unsigned NVTBits = NVT.getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + DebugLoc dl = N->getDebugLoc(); + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); + SDValue Amt2 = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); + SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), + Amt, NVBitsNode, ISD::SETULT); + + SDValue Lo1, Hi1, Lo2, Hi2; + switch (N->getOpcode()) { + default: assert(0 && "Unknown shift"); + case ISD::SHL: + // ShAmt < NVTBits + Lo1 = DAG.getConstant(0, NVT); // Low part is zero. + Hi1 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. + + // ShAmt >= NVTBits + Lo2 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); + Hi2 = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt2)); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + return true; + case ISD::SRL: + // ShAmt < NVTBits + Hi1 = DAG.getConstant(0, NVT); // Hi part is zero. + Lo1 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. + + // ShAmt >= NVTBits + Hi2 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); + Lo2 = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2)); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + return true; + case ISD::SRA: + // ShAmt < NVTBits + Hi1 = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. + DAG.getConstant(NVTBits-1, ShTy)); + Lo1 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. + + // ShAmt >= NVTBits + Hi2 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); + Lo2 = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2)); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + return true; + } + + return false; +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + + MVT NVT = LHSL.getValueType(); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support + // them. TODO: Teach operation legalization how to expand unsupported + // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate + // a carry of type MVT::Flag, but there doesn't seem to be any way to + // generate a value of this type in the expanded code sequence. + bool hasCarry = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::ADDC : ISD::SUBC, + TLI.getTypeToExpandTo(NVT)); + + if (hasCarry) { + SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + } + } else { + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); + SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], + ISD::SETULT); + SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], + ISD::SETULT); + SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, + DAG.getConstant(1, NVT), Carry1); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); + } else { + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); + SDValue Cmp = + DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), + LoOps[0], LoOps[1], ISD::SETULT); + SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); + } + } +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + if (N->getOpcode() == ISD::ADDC) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + } + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; + SDValue HiOps[3] = { LHSH, RHSH }; + + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is any extension of the input (which degenerates to a copy). + Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op); + Hi = DAG.getUNDEF(NVT); // The high part is undefined. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi, + DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part replicates the sign bit of Lo, make it explicit. + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(NVTBits-1, TLI.getPointerTy())); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi, + DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part must be zero, make it explicit. + Hi = DAG.getConstant(0, NVT); + } +} + +void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned NBitWidth = NVT.getSizeInBits(); + const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue(); + Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + + SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, + DAG.getConstant(0, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, + DAG.getNode(ISD::ADD, dl, NVT, LoLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo), + DAG.getNode(ISD::CTPOP, dl, NVT, Hi)); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + + SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, + DAG.getConstant(0, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, + DAG.getNode(ISD::ADD, dl, NVT, HiLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + MVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); + SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + MVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); + SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, + SDValue &Lo, SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + ISD::LoadExtType ExtType = N->getExtensionType(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + DebugLoc dl = N->getDebugLoc(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + MVT EVT = N->getMemoryVT(); + + Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + EVT, isVolatile, Alignment); + + // Remember the chain. + Ch = Lo.getValue(1); + + if (ExtType == ISD::SEXTLOAD) { + // The high part is obtained by SRA'ing all but one of the bits of the + // lo part. + unsigned LoSize = Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + } else if (ExtType == ISD::ZEXTLOAD) { + // The high part is just a zero. + Hi = DAG.getConstant(0, NVT); + } else { + assert(ExtType == ISD::EXTLOAD && "Unknown extload!"); + // The high part is undefined. + Hi = DAG.getUNDEF(NVT); + } + } else if (TLI.isLittleEndian()) { + // Little-endian - low bits are at low addresses. + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + isVolatile, Alignment); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + MVT NEVT = MVT::getIntegerVT(ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, NEVT, + isVolatile, MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + } else { + // Big-endian - high bits are at low addresses. Favor aligned loads at + // the cost of some bit-fiddling. + MVT EVT = N->getMemoryVT(); + unsigned EBytes = EVT.getStoreSizeInBits()/8; + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + + // Load both the high bits and maybe some of the low bits. + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits), + isVolatile, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Load the rest of the low bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, + MVT::getIntegerVT(ExcessBits), + isVolatile, MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer low bits from the bottom of Hi to the top of Lo. + Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, + DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + // Move high bits to the right position in Hi. + Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, + NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + } + } + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); +} + +void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH); +} + +void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT); + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT); + if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + unsigned OuterBitSize = VT.getSizeInBits(); + unsigned InnerBitSize = NVT.getSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); + unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); + + APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); + if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && + DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { + // The inputs are both zero-extended. + if (HasUMUL_LOHI) { + // We can emit a umul_lohi. + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return; + } + if (HasMULHU) { + // We can emit a mulhu+mul. + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); + return; + } + } + if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { + // The input values are both sign-extended. + if (HasSMUL_LOHI) { + // We can emit a smul_lohi. + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return; + } + if (HasMULHS) { + // We can emit a mulhs+mul. + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL); + return; + } + } + if (HasUMUL_LOHI) { + // Lo,Hi = umul LHS, RHS. + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(NVT, NVT), LL, RL); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); + return; + } + if (HasMULHU) { + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); + RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); + return; + } + } + + // If nothing else, we can make a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (VT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (VT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (VT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::SDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::SDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::SDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // If we can emit an efficient shift operation, do so now. Check to see if + // the RHS is a constant. + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi); + + // If we can determine that the high bit of the shift is zero or one, even if + // the low bits are variable, emit this shift in an optimized form. + if (ExpandShiftWithKnownAmountBit(N, Lo, Hi)) + return; + + // If this target supports shift_PARTS, use it. First, map to the _PARTS opc. + unsigned PartsOpc; + if (N->getOpcode() == ISD::SHL) { + PartsOpc = ISD::SHL_PARTS; + } else if (N->getOpcode() == ISD::SRL) { + PartsOpc = ISD::SRL_PARTS; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + PartsOpc = ISD::SRA_PARTS; + } + + // Next check to see if the target supports this SHL_PARTS operation or if it + // will custom expand it. + MVT NVT = TLI.getTypeToTransformTo(VT); + TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT); + if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || + Action == TargetLowering::Custom) { + // Expand the subcomponents. + SDValue LHSL, LHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + + SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) }; + MVT VT = LHSL.getValueType(); + Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); + Hi = Lo.getValue(1); + return; + } + + // Otherwise, emit a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + bool isSigned; + if (N->getOpcode() == ISD::SHL) { + isSigned = false; /*sign irrelevant*/ + if (VT == MVT::i16) + LC = RTLIB::SHL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SHL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SHL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SHL_I128; + } else if (N->getOpcode() == ISD::SRL) { + isSigned = false; + if (VT == MVT::i16) + LC = RTLIB::SRL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRL_I128; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + isSigned = true; + if (VT == MVT::i16) + LC = RTLIB::SRA_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRA_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRA_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRA_I128; + } + + if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + return; + } + + if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi)) + assert(0 && "Unsupported shift!"); +} + +void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is sign extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0)); + // The high part is obtained by SRA'ing all but one of the bits of low part. + unsigned LoSize = NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(MVT::getIntegerVT(ExcessBits))); + } +} + +void DAGTypeLegalizer:: +ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + + if (EVT.bitsLE(Lo.getValueType())) { + // sext_inreg the low part if needed. + Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo, + N->getOperand(1)); + + // The high part gets the sign extension from the lo-part. This handles + // things like sextinreg V:i64 from i8. + Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, + DAG.getConstant(Hi.getValueType().getSizeInBits()-1, + TLI.getPointerTy())); + } else { + // For example, extension of an i48 to an i64. Leave the low part alone, + // sext_inreg the high part. + unsigned ExcessBits = + EVT.getSizeInBits() - Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(MVT::getIntegerVT(ExcessBits))); + } +} + +void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::SREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::SREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::SREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); + Hi = DAG.getNode(ISD::SRL, dl, + N->getOperand(0).getValueType(), N->getOperand(0), + DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy())); + Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::UDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::UDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::UDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::UREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::UREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::UREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is zero extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); + Hi = DAG.getConstant(0, NVT); // The high part is just a zero. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getZeroExtendInReg(Hi, dl, MVT::getIntegerVT(ExcessBits)); + } +} + + +//===----------------------------------------------------------------------===// +// Integer Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need expansion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Expand integer operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + cerr << "ExpandIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; + #endif + assert(0 && "Do not know how to expand this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; + case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; + case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; + case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(NewLHS, LHSLo, LHSHi); + GetExpandedInteger(NewRHS, RHSLo, RHSHi); + + MVT VT = NewLHS.getValueType(); + + if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { + if (RHSLo == RHSHi) { + if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) { + if (RHSCST->isAllOnesValue()) { + // Equality comparison to -1. + NewLHS = DAG.getNode(ISD::AND, dl, + LHSLo.getValueType(), LHSLo, LHSHi); + NewRHS = RHSLo; + return; + } + } + } + + NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo); + NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi); + NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS); + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + return; + } + + // If this is a comparison of the sign bit, just look at the top part. + // X > -1, x < 0 + if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS)) + if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0 + (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1 + NewLHS = LHSHi; + NewRHS = RHSHi; + return; + } + + // FIXME: This generated code sucks. + ISD::CondCode LowCC; + switch (CCCode) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETLT: + case ISD::SETULT: LowCC = ISD::SETULT; break; + case ISD::SETGT: + case ISD::SETUGT: LowCC = ISD::SETUGT; break; + case ISD::SETLE: + case ISD::SETULE: LowCC = ISD::SETULE; break; + case ISD::SETGE: + case ISD::SETUGE: LowCC = ISD::SETUGE; break; + } + + // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison + // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands + // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2; + + // NOTE: on targets without efficient SELECT of bools, we can always use + // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL); + SDValue Tmp1, Tmp2; + Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); + if (!Tmp1.getNode()) + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC); + Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); + if (!Tmp2.getNode()) + Tmp2 = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, DAG.getCondCode(CCCode)); + + ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode()); + ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode()); + if ((Tmp1C && Tmp1C->isNullValue()) || + (Tmp2C && Tmp2C->isNullValue() && + (CCCode == ISD::SETLE || CCCode == ISD::SETGE || + CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) || + (Tmp2C && Tmp2C->getAPIntValue() == 1 && + (CCCode == ISD::SETLT || CCCode == ISD::SETGT || + CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) { + // low part is known false, returns high part. + // For LE / GE, if high part is known false, ignore the low part. + // For LT / GT, if high part is known true, ignore the low part. + NewLHS = Tmp2; + NewRHS = SDValue(); + return; + } + + NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ, false, + DagCombineInfo, dl); + if (!NewLHS.getNode()) + NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ); + NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(), + NewLHS, Tmp1, Tmp2); + NewRHS = SDValue(); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { + // The value being shifted is legal, but the shift amount is too big. + // It follows that either the result of the shift is undefined, or the + // upper half of the shift amount is zero. Just use the lower half. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(1), Lo, Hi); + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + MVT DstVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this SINT_TO_FP!"); + return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + + MVT VT = N->getOperand(1).getValueType(); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + DebugLoc dl = N->getDebugLoc(); + SDValue Lo, Hi; + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + GetExpandedInteger(N->getValue(), Lo, Hi); + return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + N->getMemoryVT(), isVolatile, Alignment); + } else if (TLI.isLittleEndian()) { + // Little-endian - low bits are at low addresses. + GetExpandedInteger(N->getValue(), Lo, Hi); + + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + isVolatile, Alignment); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + MVT NEVT = MVT::getIntegerVT(ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, NEVT, + isVolatile, MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } else { + // Big-endian - high bits are at low addresses. Favor aligned stores at + // the cost of some bit-fiddling. + GetExpandedInteger(N->getValue(), Lo, Hi); + + MVT EVT = N->getMemoryVT(); + unsigned EBytes = EVT.getStoreSizeInBits()/8; + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + MVT HiVT = MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer high bits from the top of Lo to the bottom of Hi. + Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SRL, dl, NVT, Lo, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + } + + // Store both the high bits and maybe some of the low bits. + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), + SVOffset, HiVT, isVolatile, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Store the lowest ExcessBits bits in the second half. + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, + MVT::getIntegerVT(ExcessBits), + isVolatile, MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } +} + +SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + // Just truncate the low part of the source. + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + MVT SrcVT = Op.getValueType(); + MVT DstVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ + // Do a signed conversion then adjust the result. + SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); + SignedConv = TLI.LowerOperation(SignedConv, DAG); + + // The result of the signed conversion needs adjusting if the 'sign bit' of + // the incoming integer was set. To handle this, we dynamically test to see + // if it is set, and, if so, add a fudge factor. + + const uint64_t F32TwoE32 = 0x4F800000ULL; + const uint64_t F32TwoE64 = 0x5F800000ULL; + const uint64_t F32TwoE128 = 0x7F800000ULL; + + APInt FF(32, 0); + if (SrcVT == MVT::i32) + FF = APInt(32, F32TwoE32); + else if (SrcVT == MVT::i64) + FF = APInt(32, F32TwoE64); + else if (SrcVT == MVT::i128) + FF = APInt(32, F32TwoE128); + else + assert(false && "Unsupported UINT_TO_FP!"); + + // Check whether the sign bit is set. + SDValue Lo, Hi; + GetExpandedInteger(Op, Lo, Hi); + SDValue SignSet = DAG.getSetCC(dl, + TLI.getSetCCResultType(Hi.getValueType()), + Hi, DAG.getConstant(0, Hi.getValueType()), + ISD::SETLT); + + // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. + SDValue FudgePtr = DAG.getConstantPool(ConstantInt::get(FF.zext(64)), + TLI.getPointerTy()); + + // Get a pointer to FF if the sign bit was set, or to 0 otherwise. + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Four = DAG.getIntPtrConstant(4); + if (TLI.isBigEndian()) std::swap(Zero, Four); + SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, + Zero, Four); + unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); + FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset); + Alignment = std::min(Alignment, 4u); + + // Load the value out, extending it from f32 to the destination float type. + // FIXME: Avoid the extend by constructing the right constant pool? + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), + FudgePtr, NULL, 0, MVT::f32, + false, Alignment); + return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); + } + + // Otherwise, use a libcall. + RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this UINT_TO_FP!"); + return MakeLibCall(LC, DstVT, &Op, 1, true, dl); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp new file mode 100644 index 0000000..00d71e1 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -0,0 +1,1074 @@ +//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeTypes method. It transforms +// an arbitrary well-formed SelectionDAG to only consist of legal types. This +// is common code shared among the LegalizeTypes*.cpp files. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CallingConv.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +static cl::opt<bool> +EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); + +/// PerformExpensiveChecks - Do extensive, expensive, sanity checking. +void DAGTypeLegalizer::PerformExpensiveChecks() { + // If a node is not processed, then none of its values should be mapped by any + // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + + // If a node is processed, then each value with an illegal type must be mapped + // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + // Values with a legal type may be mapped by ReplacedValues, but not by any of + // the other maps. + + // Note that these invariants may not hold momentarily when processing a node: + // the node being processed may be put in a map before being marked Processed. + + // Note that it is possible to have nodes marked NewNode in the DAG. This can + // occur in two ways. Firstly, a node may be created during legalization but + // never passed to the legalization core. This is usually due to the implicit + // folding that occurs when using the DAG.getNode operators. Secondly, a new + // node may be passed to the legalization core, but when analyzed may morph + // into a different node, leaving the original node as a NewNode in the DAG. + // A node may morph if one of its operands changes during analysis. Whether + // it actually morphs or not depends on whether, after updating its operands, + // it is equivalent to an existing node: if so, it morphs into that existing + // node (CSE). An operand can change during analysis if the operand is a new + // node that morphs, or it is a processed value that was mapped to some other + // value (as recorded in ReplacedValues) in which case the operand is turned + // into that other value. If a node morphs then the node it morphed into will + // be used instead of it for legalization, however the original node continues + // to live on in the DAG. + // The conclusion is that though there may be nodes marked NewNode in the DAG, + // all uses of such nodes are also marked NewNode: the result is a fungus of + // NewNodes growing on top of the useful nodes, and perhaps using them, but + // not used by them. + + // If a value is mapped by ReplacedValues, then it must have no uses, except + // by nodes marked NewNode (see above). + + // The final node obtained by mapping by ReplacedValues is not marked NewNode. + // Note that ReplacedValues should be applied iteratively. + + // Note that the ReplacedValues map may also map deleted nodes. By iterating + // over the DAG we only consider non-deleted nodes. + SmallVector<SDNode*, 16> NewNodes; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + // Remember nodes marked NewNode - they are subject to extra checking below. + if (I->getNodeId() == NewNode) + NewNodes.push_back(I); + + for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) { + SDValue Res(I, i); + bool Failed = false; + + unsigned Mapped = 0; + if (ReplacedValues.find(Res) != ReplacedValues.end()) { + Mapped |= 1; + // Check that remapped values are only used by nodes marked NewNode. + for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + + // Check that the final result of applying ReplacedValues is not + // marked NewNode. + SDValue NewVal = ReplacedValues[Res]; + DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal); + while (I != ReplacedValues.end()) { + NewVal = I->second; + I = ReplacedValues.find(NewVal); + } + assert(NewVal.getNode()->getNodeId() != NewNode && + "ReplacedValues maps to a new node!"); + } + if (PromotedIntegers.find(Res) != PromotedIntegers.end()) + Mapped |= 2; + if (SoftenedFloats.find(Res) != SoftenedFloats.end()) + Mapped |= 4; + if (ScalarizedVectors.find(Res) != ScalarizedVectors.end()) + Mapped |= 8; + if (ExpandedIntegers.find(Res) != ExpandedIntegers.end()) + Mapped |= 16; + if (ExpandedFloats.find(Res) != ExpandedFloats.end()) + Mapped |= 32; + if (SplitVectors.find(Res) != SplitVectors.end()) + Mapped |= 64; + if (WidenedVectors.find(Res) != WidenedVectors.end()) + Mapped |= 128; + + if (I->getNodeId() != Processed) { + if (Mapped != 0) { + cerr << "Unprocessed value in a map!"; + Failed = true; + } + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { + if (Mapped > 1) { + cerr << "Value with legal type was transformed!"; + Failed = true; + } + } else { + if (Mapped == 0) { + cerr << "Processed value not in any map!"; + Failed = true; + } else if (Mapped & (Mapped - 1)) { + cerr << "Value in multiple maps!"; + Failed = true; + } + } + + if (Failed) { + if (Mapped & 1) + cerr << " ReplacedValues"; + if (Mapped & 2) + cerr << " PromotedIntegers"; + if (Mapped & 4) + cerr << " SoftenedFloats"; + if (Mapped & 8) + cerr << " ScalarizedVectors"; + if (Mapped & 16) + cerr << " ExpandedIntegers"; + if (Mapped & 32) + cerr << " ExpandedFloats"; + if (Mapped & 64) + cerr << " SplitVectors"; + if (Mapped & 128) + cerr << " WidenedVectors"; + cerr << "\n"; + abort(); + } + } + } + + // Checked that NewNodes are only used by other NewNodes. + for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { + SDNode *N = NewNodes[i]; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); + } +} + +/// run - This is the main entry point for the type legalizer. This does a +/// top-down traversal of the dag, legalizing types as it goes. Returns "true" +/// if it made any changes. +bool DAGTypeLegalizer::run() { + bool Changed = false; + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + Dummy.setNodeId(Unanalyzed); + + // The root of the dag may dangle to deleted nodes until the type legalizer is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess' + // (and remembering them) if they are leaves and assigning 'Unanalyzed' if + // non-leaves. + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + if (I->getNumOperands() == 0) { + I->setNodeId(ReadyToProcess); + Worklist.push_back(I); + } else { + I->setNodeId(Unanalyzed); + } + } + + // Now that we have a set of nodes to process, handle them all. + while (!Worklist.empty()) { +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + SDNode *N = Worklist.back(); + Worklist.pop_back(); + assert(N->getNodeId() == ReadyToProcess && + "Node should be ready if on worklist!"); + + if (IgnoreNodeResults(N)) + goto ScanOperands; + + // Scan the values produced by the node, checking to see if any result + // types are illegal. + for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { + MVT ResultVT = N->getValueType(i); + switch (getTypeAction(ResultVT)) { + default: + assert(false && "Unknown action!"); + case Legal: + break; + // The following calls must take care of *all* of the node's results, + // not just the illegal result they were passed (this includes results + // with a legal type). Results can be remapped using ReplaceValueWith, + // or their promoted/expanded/etc values registered in PromotedIntegers, + // ExpandedIntegers etc. + case PromoteInteger: + PromoteIntegerResult(N, i); + Changed = true; + goto NodeDone; + case ExpandInteger: + ExpandIntegerResult(N, i); + Changed = true; + goto NodeDone; + case SoftenFloat: + SoftenFloatResult(N, i); + Changed = true; + goto NodeDone; + case ExpandFloat: + ExpandFloatResult(N, i); + Changed = true; + goto NodeDone; + case ScalarizeVector: + ScalarizeVectorResult(N, i); + Changed = true; + goto NodeDone; + case SplitVector: + SplitVectorResult(N, i); + Changed = true; + goto NodeDone; + case WidenVector: + WidenVectorResult(N, i); + Changed = true; + goto NodeDone; + } + } + +ScanOperands: + // Scan the operand list for the node, handling any nodes with operands that + // are illegal. + { + unsigned NumOperands = N->getNumOperands(); + bool NeedsReanalyzing = false; + unsigned i; + for (i = 0; i != NumOperands; ++i) { + if (IgnoreNodeResults(N->getOperand(i).getNode())) + continue; + + MVT OpVT = N->getOperand(i).getValueType(); + switch (getTypeAction(OpVT)) { + default: + assert(false && "Unknown action!"); + case Legal: + continue; + // The following calls must either replace all of the node's results + // using ReplaceValueWith, and return "false"; or update the node's + // operands in place, and return "true". + case PromoteInteger: + NeedsReanalyzing = PromoteIntegerOperand(N, i); + Changed = true; + break; + case ExpandInteger: + NeedsReanalyzing = ExpandIntegerOperand(N, i); + Changed = true; + break; + case SoftenFloat: + NeedsReanalyzing = SoftenFloatOperand(N, i); + Changed = true; + break; + case ExpandFloat: + NeedsReanalyzing = ExpandFloatOperand(N, i); + Changed = true; + break; + case ScalarizeVector: + NeedsReanalyzing = ScalarizeVectorOperand(N, i); + Changed = true; + break; + case SplitVector: + NeedsReanalyzing = SplitVectorOperand(N, i); + Changed = true; + break; + case WidenVector: + NeedsReanalyzing = WidenVectorOperand(N, i); + Changed = true; + break; + } + break; + } + + // The sub-method updated N in place. Check to see if any operands are new, + // and if so, mark them. If the node needs revisiting, don't add all users + // to the worklist etc. + if (NeedsReanalyzing) { + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(NewNode); + // Recompute the NodeId and correct processed operands, adding the node to + // the worklist if ready. + SDNode *M = AnalyzeNewNode(N); + if (M == N) + // The node didn't morph - nothing special to do, it will be revisited. + continue; + + // The node morphed - this is equivalent to legalizing by replacing every + // value of N with the corresponding value of M. So do that now. However + // there is no need to remember the replacement - morphing will make sure + // it is never used non-trivially. + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + // Replacing the value takes care of remapping the new value. Do the + // replacement without recording it in ReplacedValues. This does not + // expunge From but that is fine - it is not really a new node. + ReplaceValueWithHelper(SDValue(N, i), SDValue(M, i)); + assert(N->getNodeId() == NewNode && "Unexpected node state!"); + // The node continues to live on as part of the NewNode fungus that + // grows on top of the useful nodes. Nothing more needs to be done + // with it - move on to the next node. + continue; + } + + if (i == NumOperands) { + DEBUG(cerr << "Legally typed node: "; N->dump(&DAG); cerr << "\n"); + } + } +NodeDone: + + // If we reach here, the node was processed, potentially creating new nodes. + // Mark it as processed and add its users to the worklist as appropriate. + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(Processed); + + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + int NodeId = User->getNodeId(); + + // This node has two options: it can either be a new node or its Node ID + // may be a count of the number of operands it has that are not ready. + if (NodeId > 0) { + User->setNodeId(NodeId-1); + + // If this was the last use it was waiting on, add it to the ready list. + if (NodeId-1 == ReadyToProcess) + Worklist.push_back(User); + continue; + } + + // If this is an unreachable new node, then ignore it. If it ever becomes + // reachable by being used by a newly created node then it will be handled + // by AnalyzeNewNode. + if (NodeId == NewNode) + continue; + + // Otherwise, this node is new: this is the first operand of it that + // became ready. Its new NodeId is the number of operands it has minus 1 + // (as this node is now processed). + assert(NodeId == Unanalyzed && "Unknown node ID!"); + User->setNodeId(User->getNumOperands() - 1); + + // If the node only has a single operand, it is now ready. + if (User->getNumOperands() == 1) + Worklist.push_back(User); + } + } + +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + // If the root changed (e.g. it was a dead load) update the root. + DAG.setRoot(Dummy.getValue()); + + // Remove dead nodes. This is important to do for cleanliness but also before + // the checking loop below. Implicit folding by the DAG.getNode operators and + // node morphing can cause unreachable nodes to be around with their flags set + // to new. + DAG.RemoveDeadNodes(); + + // In a debug build, scan all the nodes to make sure we found them all. This + // ensures that there are no cycles and that everything got processed. +#ifndef NDEBUG + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + bool Failed = false; + + // Check that all result types are legal. + if (!IgnoreNodeResults(I)) + for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) + if (!isTypeLegal(I->getValueType(i))) { + cerr << "Result type " << i << " illegal!\n"; + Failed = true; + } + + // Check that all operand types are legal. + for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) + if (!IgnoreNodeResults(I->getOperand(i).getNode()) && + !isTypeLegal(I->getOperand(i).getValueType())) { + cerr << "Operand type " << i << " illegal!\n"; + Failed = true; + } + + if (I->getNodeId() != Processed) { + if (I->getNodeId() == NewNode) + cerr << "New node not analyzed?\n"; + else if (I->getNodeId() == Unanalyzed) + cerr << "Unanalyzed node not noticed?\n"; + else if (I->getNodeId() > 0) + cerr << "Operand not processed?\n"; + else if (I->getNodeId() == ReadyToProcess) + cerr << "Not added to worklist?\n"; + Failed = true; + } + + if (Failed) { + I->dump(&DAG); cerr << "\n"; + abort(); + } + } +#endif + + return Changed; +} + +/// AnalyzeNewNode - The specified node is the root of a subtree of potentially +/// new nodes. Correct any processed operands (this may change the node) and +/// calculate the NodeId. If the node itself changes to a processed node, it +/// is not remapped - the caller needs to take care of this. +/// Returns the potentially changed node. +SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { + // If this was an existing node that is already done, we're done. + if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed) + return N; + + // Remove any stale map entries. + ExpungeNode(N); + + // Okay, we know that this node is new. Recursively walk all of its operands + // to see if they are new also. The depth of this walk is bounded by the size + // of the new tree that was constructed (usually 2-3 nodes), so we don't worry + // about revisiting of nodes. + // + // As we walk the operands, keep track of the number of nodes that are + // processed. If non-zero, this will become the new nodeid of this node. + // Operands may morph when they are analyzed. If so, the node will be + // updated after all operands have been analyzed. Since this is rare, + // the code tries to minimize overhead in the non-morphing case. + + SmallVector<SDValue, 8> NewOps; + unsigned NumProcessed = 0; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue OrigOp = N->getOperand(i); + SDValue Op = OrigOp; + + AnalyzeNewValue(Op); // Op may morph. + + if (Op.getNode()->getNodeId() == Processed) + ++NumProcessed; + + if (!NewOps.empty()) { + // Some previous operand changed. Add this one to the list. + NewOps.push_back(Op); + } else if (Op != OrigOp) { + // This is the first operand to change - add all operands so far. + for (unsigned j = 0; j < i; ++j) + NewOps.push_back(N->getOperand(j)); + NewOps.push_back(Op); + } + } + + // Some operands changed - update the node. + if (!NewOps.empty()) { + SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], + NewOps.size()).getNode(); + if (M != N) { + // The node morphed into a different node. Normally for this to happen + // the original node would have to be marked NewNode. However this can + // in theory momentarily not be the case while ReplaceValueWith is doing + // its stuff. Mark the original node NewNode to help sanity checking. + N->setNodeId(NewNode); + if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed) + // It morphed into a previously analyzed node - nothing more to do. + return M; + + // It morphed into a different new node. Do the equivalent of passing + // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need + // to remap the operands, since they are the same as the operands we + // remapped above. + N = M; + ExpungeNode(N); + } + } + + // Calculate the NodeId. + N->setNodeId(N->getNumOperands() - NumProcessed); + if (N->getNodeId() == ReadyToProcess) + Worklist.push_back(N); + + return N; +} + +/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed. +/// If the node changes to a processed node, then remap it. +void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { + Val.setNode(AnalyzeNewNode(Val.getNode())); + if (Val.getNode()->getNodeId() == Processed) + // We were passed a processed node, or it morphed into one - remap it. + RemapValue(Val); +} + +/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it. +/// This can occur when a node is deleted then reallocated as a new node - +/// the mapping in ReplacedValues applies to the deleted node, not the new +/// one. +/// The only map that can have a deleted node as a source is ReplacedValues. +/// Other maps can have deleted nodes as targets, but since their looked-up +/// values are always immediately remapped using RemapValue, resulting in a +/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue +/// always performs correct mappings. In order to keep the mapping correct, +/// ExpungeNode should be called on any new nodes *before* adding them as +/// either source or target to ReplacedValues (which typically means calling +/// Expunge when a new node is first seen, since it may no longer be marked +/// NewNode by the time it is added to ReplacedValues). +void DAGTypeLegalizer::ExpungeNode(SDNode *N) { + if (N->getNodeId() != NewNode) + return; + + // If N is not remapped by ReplacedValues then there is nothing to do. + unsigned i, e; + for (i = 0, e = N->getNumValues(); i != e; ++i) + if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end()) + break; + + if (i == e) + return; + + // Remove N from all maps - this is expensive but rare. + + for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(), + E = PromotedIntegers.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(), + E = SoftenedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(), + E = ScalarizedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(), + E = WidenedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){ + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(), + E = ReplacedValues.end(); I != E; ++I) + RemapValue(I->second); + + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + ReplacedValues.erase(SDValue(N, i)); +} + +/// RemapValue - If the specified value was already legalized to another value, +/// replace it by that value. +void DAGTypeLegalizer::RemapValue(SDValue &N) { + DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N); + if (I != ReplacedValues.end()) { + // Use path compression to speed up future lookups if values get multiply + // replaced with other values. + RemapValue(I->second); + N = I->second; + assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!"); + } +} + +namespace { + /// NodeUpdateListener - This class is a DAGUpdateListener that listens for + /// updates to nodes and recomputes their ready state. + class VISIBILITY_HIDDEN NodeUpdateListener : + public SelectionDAG::DAGUpdateListener { + DAGTypeLegalizer &DTL; + SmallSetVector<SDNode*, 16> &NodesToAnalyze; + public: + explicit NodeUpdateListener(DAGTypeLegalizer &dtl, + SmallSetVector<SDNode*, 16> &nta) + : DTL(dtl), NodesToAnalyze(nta) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + // It is possible, though rare, for the deleted node N to occur as a + // target in a map, so note the replacement N -> E in ReplacedValues. + assert(E && "Node not replaced?"); + DTL.NoteDeletion(N, E); + + // In theory the deleted node could also have been scheduled for analysis. + // So remove it from the set of nodes which will be analyzed. + NodesToAnalyze.remove(N); + + // In general nothing needs to be done for E, since it didn't change but + // only gained new uses. However N -> E was just added to ReplacedValues, + // and the result of a ReplacedValues mapping is not allowed to be marked + // NewNode. So if E is marked NewNode, then it needs to be analyzed. + if (E->getNodeId() == DAGTypeLegalizer::NewNode) + NodesToAnalyze.insert(E); + } + + virtual void NodeUpdated(SDNode *N) { + // Node updates can mean pretty much anything. It is possible that an + // operand was set to something already processed (f.e.) in which case + // this node could become ready. Recompute its flags. + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + N->setNodeId(DAGTypeLegalizer::NewNode); + NodesToAnalyze.insert(N); + } + }; +} + + +/// ReplaceValueWithHelper - Internal helper for ReplaceValueWith. Updates the +/// DAG causing any uses of From to use To instead, but without expunging From +/// or recording the replacement in ReplacedValues. Do not call directly unless +/// you really know what you are doing! +void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) { + assert(From.getNode() != To.getNode() && "Potential legalization loop!"); + + // If expansion produced new nodes, make sure they are properly marked. + AnalyzeNewValue(To); // Expunges To. + + // Anything that used the old node should now use the new one. Note that this + // can potentially cause recursive merging. + SmallSetVector<SDNode*, 16> NodesToAnalyze; + NodeUpdateListener NUL(*this, NodesToAnalyze); + DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); + + // Process the list of nodes that need to be reanalyzed. + while (!NodesToAnalyze.empty()) { + SDNode *N = NodesToAnalyze.back(); + NodesToAnalyze.pop_back(); + if (N->getNodeId() != DAGTypeLegalizer::NewNode) + // The node was analyzed while reanalyzing an earlier node - it is safe to + // skip. Note that this is not a morphing node - otherwise it would still + // be marked NewNode. + continue; + + // Analyze the node's operands and recalculate the node ID. + SDNode *M = AnalyzeNewNode(N); + if (M != N) { + // The node morphed into a different node. Make everyone use the new node + // instead. + assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + SDValue OldVal(N, i); + SDValue NewVal(M, i); + if (M->getNodeId() == Processed) + RemapValue(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + } + // The original node continues to exist in the DAG, marked NewNode. + } + } +} + +/// ReplaceValueWith - The specified value was legalized to the specified other +/// value. Update the DAG and NodeIds replacing any uses of From to use To +/// instead. +void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { + assert(From.getNode()->getNodeId() == ReadyToProcess && + "Only the node being processed may be remapped!"); + + // If expansion produced new nodes, make sure they are properly marked. + ExpungeNode(From.getNode()); + AnalyzeNewValue(To); // Expunges To. + + // The old node may still be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + ReplacedValues[From] = To; + + // Do the replacement. + ReplaceValueWithHelper(From, To); +} + +void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = PromotedIntegers[Op]; + assert(OpEntry.getNode() == 0 && "Node is already promoted!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = SoftenedFloats[Op]; + assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = ScalarizedVectors[Op]; + assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, + SDValue Hi) { + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; + assert(Entry.first.getNode() == 0 && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, + SDValue Hi) { + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; + assert(Entry.first.getNode() == 0 && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't split"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, + SDValue Hi) { + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; + assert(Entry.first.getNode() == 0 && "Node already split"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = WidenedVectors[Op]; + assert(OpEntry.getNode() == 0 && "Node already widened!"); + OpEntry = Result; +} + + +//===----------------------------------------------------------------------===// +// Utilities. +//===----------------------------------------------------------------------===// + +/// BitConvertToInteger - Convert to an integer of the same size. +SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { + unsigned BitWidth = Op.getValueType().getSizeInBits(); + return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + MVT::getIntegerVT(BitWidth), Op); +} + +/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the +/// same size. +SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { + assert(Op.getValueType().isVector() && "Only applies to vectors!"); + unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); + MVT EltNVT = MVT::getIntegerVT(EltWidth); + unsigned NumElts = Op.getValueType().getVectorNumElements(); + return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + MVT::getVectorVT(EltNVT, NumElts), Op); +} + +SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, + MVT DestVT) { + DebugLoc dl = Op.getDebugLoc(); + // Create the stack frame object. Make sure it is aligned for both + // the source and destination types. + SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0); + // Result is a load from the stack slot. + return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0); +} + +/// CustomLowerNode - Replace the node's results with custom code provided +/// by the target and return "true", or do nothing and return "false". +/// The last parameter is FALSE if we are dealing with a node with legal +/// result types and illegal operand. The second parameter denotes the type of +/// illegal OperandNo in that case. +/// The last parameter being TRUE means we are dealing with a +/// node with illegal result types. The second parameter denotes the type of +/// illegal ResNo in that case. +bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector<SDValue, 8> Results; + if (LegalizeResult) + TLI.ReplaceNodeResults(N, Results, DAG); + else + TLI.LowerOperationWrapper(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom lower it after all. + return false; + + // Make everything that once used N's values now use those in Results instead. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) + ReplaceValueWith(SDValue(N, i), Results[i]); + return true; +} + +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split into two not necessarily identical pieces. +void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) { + if (!InVT.isVector()) { + LoVT = HiVT = TLI.getTypeToTransformTo(InVT); + } else { + MVT NewEltVT = InVT.getVectorElementType(); + unsigned NumElements = InVT.getVectorNumElements(); + if ((NumElements & (NumElements-1)) == 0) { // Simple power of two vector. + NumElements >>= 1; + LoVT = HiVT = MVT::getVectorVT(NewEltVT, NumElements); + } else { // Non-power-of-two vectors. + unsigned NewNumElts_Lo = 1 << Log2_32(NumElements); + unsigned NewNumElts_Hi = NumElements - NewNumElts_Lo; + LoVT = MVT::getVectorVT(NewEltVT, NewNumElts_Lo); + HiVT = MVT::getVectorVT(NewEltVT, NewNumElts_Hi); + } + } +} + +/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and +/// high parts of the given value. +void DAGTypeLegalizer::GetPairElements(SDValue Pair, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = Pair.getDebugLoc(); + MVT NVT = TLI.getTypeToTransformTo(Pair.getValueType()); + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(1)); +} + +SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, MVT EltVT, + SDValue Index) { + DebugLoc dl = Index.getDebugLoc(); + // Make sure the index type is big enough to compute in. + if (Index.getValueType().bitsGT(TLI.getPointerTy())) + Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index); + else + Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EltSize, Index.getValueType())); + return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); +} + +/// JoinIntegers - Build an integer with low bits Lo and high bits Hi. +SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { + // Arbitrarily use dlHi for result DebugLoc + DebugLoc dlHi = Hi.getDebugLoc(); + DebugLoc dlLo = Lo.getDebugLoc(); + MVT LVT = Lo.getValueType(); + MVT HVT = Hi.getValueType(); + MVT NVT = MVT::getIntegerVT(LVT.getSizeInBits() + HVT.getSizeInBits()); + + Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); + Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, + DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy())); + return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); +} + +/// LibCallify - Convert the node into a libcall with the same prototype. +SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, + bool isSigned) { + unsigned NumOps = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + if (NumOps == 0) { + return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl); + } else if (NumOps == 1) { + SDValue Op = N->getOperand(0); + return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl); + } else if (NumOps == 2) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl); + } + SmallVector<SDValue, 8> Ops(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + Ops[i] = N->getOperand(i); + + return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl); +} + +/// MakeLibCall - Generate a libcall taking the given operands as arguments and +/// returning a result of type RetVT. +SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForMVT(); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + const Type *RetTy = RetVT.getTypeForMVT(); + std::pair<SDValue,SDValue> CallInfo = + TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, CallingConv::C, false, Callee, Args, DAG, dl); + return CallInfo.first; +} + +/// PromoteTargetBoolean - Promote the given target boolean to a target boolean +/// of the given type. A target boolean is an integer value, not necessarily of +/// type i1, the bits of which conform to getBooleanContents. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) { + DebugLoc dl = Bool.getDebugLoc(); + ISD::NodeType ExtendCode; + switch (TLI.getBooleanContents()) { + default: + assert(false && "Unknown BooleanContent!"); + case TargetLowering::UndefinedBooleanContent: + // Extend to VT by adding rubbish bits. + ExtendCode = ISD::ANY_EXTEND; + break; + case TargetLowering::ZeroOrOneBooleanContent: + // Extend to VT by adding zero bits. + ExtendCode = ISD::ZERO_EXTEND; + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: { + // Extend to VT by copying the sign bit. + ExtendCode = ISD::SIGN_EXTEND; + break; + } + } + return DAG.getNode(ExtendCode, dl, VT, Bool); +} + +/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT +/// bits in Hi. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + MVT LoVT, MVT HiVT, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = Op.getDebugLoc(); + assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == + Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); + Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); + Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, + DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy())); + Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); +} + +/// SplitInteger - Return the lower and upper halves of Op's bits in a value +/// type half the size of Op's. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + SDValue &Lo, SDValue &Hi) { + MVT HalfVT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()/2); + SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Entry Point +//===----------------------------------------------------------------------===// + +/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that +/// only uses types natively supported by the target. Returns "true" if it made +/// any changes. +/// +/// Note that this is an involved process that may invalidate pointers into +/// the graph. +bool SelectionDAG::LegalizeTypes() { + return DAGTypeLegalizer(*this).run(); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h new file mode 100644 index 0000000..75c8924 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -0,0 +1,736 @@ +//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DAGTypeLegalizer class. This is a private interface +// shared between the code that implements the SelectionDAG::LegalizeTypes +// method. +// +//===----------------------------------------------------------------------===// + +#ifndef SELECTIONDAG_LEGALIZETYPES_H +#define SELECTIONDAG_LEGALIZETYPES_H + +#define DEBUG_TYPE "legalize-types" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" + +namespace llvm { + +//===----------------------------------------------------------------------===// +/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks +/// on it until only value types the target machine can handle are left. This +/// involves promoting small sizes to large sizes or splitting up large values +/// into small values. +/// +class VISIBILITY_HIDDEN DAGTypeLegalizer { + TargetLowering &TLI; + SelectionDAG &DAG; +public: + // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information + // about the state of the node. The enum has all the values. + enum NodeIdFlags { + /// ReadyToProcess - All operands have been processed, so this node is ready + /// to be handled. + ReadyToProcess = 0, + + /// NewNode - This is a new node, not before seen, that was created in the + /// process of legalizing some other node. + NewNode = -1, + + /// Unanalyzed - This node's ID needs to be set to the number of its + /// unprocessed operands. + Unanalyzed = -2, + + /// Processed - This is a node that has already been processed. + Processed = -3 + + // 1+ - This is a node which has this many unprocessed operands. + }; +private: + enum LegalizeAction { + Legal, // The target natively supports this type. + PromoteInteger, // Replace this integer type with a larger one. + ExpandInteger, // Split this integer type into two of half the size. + SoftenFloat, // Convert this float type to a same size integer type. + ExpandFloat, // Split this float type into two of half the size. + ScalarizeVector, // Replace this one-element vector with its element type. + SplitVector, // This vector type should be split into smaller vectors. + WidenVector // This vector type should be widened into a larger vector. + }; + + /// ValueTypeActions - This is a bitvector that contains two bits for each + /// simple value type, where the two bits correspond to the LegalizeAction + /// enum from TargetLowering. This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// getTypeAction - Return how we should legalize values of this type. + LegalizeAction getTypeAction(MVT VT) const { + switch (ValueTypeActions.getTypeAction(VT)) { + default: + assert(false && "Unknown legalize action!"); + case TargetLowering::Legal: + return Legal; + case TargetLowering::Promote: + // Promote can mean + // 1) For integers, use a larger integer type (e.g. i8 -> i32). + // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32). + if (!VT.isVector()) + return PromoteInteger; + else + return WidenVector; + case TargetLowering::Expand: + // Expand can mean + // 1) split scalar in half, 2) convert a float to an integer, + // 3) scalarize a single-element vector, 4) split a vector in two. + if (!VT.isVector()) { + if (VT.isInteger()) + return ExpandInteger; + else if (VT.getSizeInBits() == + TLI.getTypeToTransformTo(VT).getSizeInBits()) + return SoftenFloat; + else + return ExpandFloat; + } else if (VT.getVectorNumElements() == 1) { + return ScalarizeVector; + } else { + return SplitVector; + } + } + } + + /// isTypeLegal - Return true if this type is legal on this target. + bool isTypeLegal(MVT VT) const { + return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal; + } + + /// IgnoreNodeResults - Pretend all of this node's results are legal. + bool IgnoreNodeResults(SDNode *N) const { + return N->getOpcode() == ISD::TargetConstant; + } + + /// PromotedIntegers - For integer nodes that are below legal width, this map + /// indicates what promoted value to use. + DenseMap<SDValue, SDValue> PromotedIntegers; + + /// ExpandedIntegers - For integer nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers; + + /// SoftenedFloats - For floating point nodes converted to integers of + /// the same size, this map indicates the converted value to use. + DenseMap<SDValue, SDValue> SoftenedFloats; + + /// ExpandedFloats - For float nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats; + + /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the + /// scalar value of type 'ty' to use. + DenseMap<SDValue, SDValue> ScalarizedVectors; + + /// SplitVectors - For nodes that need to be split this map indicates + /// which operands are the expanded version of the input. + DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors; + + /// WidenedVectors - For vector nodes that need to be widened, indicates + /// the widened value to use. + DenseMap<SDValue, SDValue> WidenedVectors; + + /// ReplacedValues - For values that have been replaced with another, + /// indicates the replacement value to use. + DenseMap<SDValue, SDValue> ReplacedValues; + + /// Worklist - This defines a worklist of nodes to process. In order to be + /// pushed onto this worklist, all operands of a node must have already been + /// processed. + SmallVector<SDNode*, 128> Worklist; + +public: + explicit DAGTypeLegalizer(SelectionDAG &dag) + : TLI(dag.getTargetLoweringInfo()), DAG(dag), + ValueTypeActions(TLI.getValueTypeActions()) { + assert(MVT::LAST_VALUETYPE <= 32 && + "Too many value types for ValueTypeActions to hold!"); + } + + /// run - This is the main entry point for the type legalizer. This does a + /// top-down traversal of the dag, legalizing types as it goes. Returns + /// "true" if it made any changes. + bool run(); + + void NoteDeletion(SDNode *Old, SDNode *New) { + ExpungeNode(Old); + ExpungeNode(New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + ReplacedValues[SDValue(Old, i)] = SDValue(New, i); + } + +private: + SDNode *AnalyzeNewNode(SDNode *N); + void AnalyzeNewValue(SDValue &Val); + void ExpungeNode(SDNode *N); + void PerformExpensiveChecks(); + void RemapValue(SDValue &N); + + // Common routines. + SDValue BitConvertToInteger(SDValue Op); + SDValue BitConvertVectorToIntegerVector(SDValue Op); + SDValue CreateStackStoreLoad(SDValue Op, MVT DestVT); + bool CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult); + SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index); + SDValue JoinIntegers(SDValue Lo, SDValue Hi); + SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); + SDValue MakeLibCall(RTLIB::Libcall LC, MVT RetVT, + const SDValue *Ops, unsigned NumOps, bool isSigned, + DebugLoc dl); + SDValue PromoteTargetBoolean(SDValue Bool, MVT VT); + void ReplaceValueWith(SDValue From, SDValue To); + void ReplaceValueWithHelper(SDValue From, SDValue To); + void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SplitInteger(SDValue Op, MVT LoVT, MVT HiVT, + SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Integer Promotion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetPromotedInteger - Given a processed operand Op which was promoted to a + /// larger integer type, this returns the promoted value. The low bits of the + /// promoted value corresponding to the original type are exactly equal to Op. + /// The extra bits contain rubbish, so the promoted value may need to be zero- + /// or sign-extended from the original type before it is usable (the helpers + /// SExtPromotedInteger and ZExtPromotedInteger can do this for you). + /// For example, if Op is an i16 and was promoted to an i32, then this method + /// returns an i32, the lower 16 bits of which coincide with Op, and the upper + /// 16 bits of which contain rubbish. + SDValue GetPromotedInteger(SDValue Op) { + SDValue &PromotedOp = PromotedIntegers[Op]; + RemapValue(PromotedOp); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedInteger(SDValue Op, SDValue Result); + + /// SExtPromotedInteger - Get a promoted operand and sign extend it to the + /// final size. + SDValue SExtPromotedInteger(SDValue Op) { + MVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = GetPromotedInteger(Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, + DAG.getValueType(OldVT)); + } + + /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the + /// final size. + SDValue ZExtPromotedInteger(SDValue Op) { + MVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = GetPromotedInteger(Op); + return DAG.getZeroExtendInReg(Op, dl, OldVT); + } + + // Integer Result Promotion. + void PromoteIntegerResult(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_AssertSext(SDNode *N); + SDValue PromoteIntRes_AssertZext(SDNode *N); + SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); + SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); + SDValue PromoteIntRes_BIT_CONVERT(SDNode *N); + SDValue PromoteIntRes_BSWAP(SDNode *N); + SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); + SDValue PromoteIntRes_Constant(SDNode *N); + SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntRes_CTLZ(SDNode *N); + SDValue PromoteIntRes_CTPOP(SDNode *N); + SDValue PromoteIntRes_CTTZ(SDNode *N); + SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); + SDValue PromoteIntRes_INT_EXTEND(SDNode *N); + SDValue PromoteIntRes_LOAD(LoadSDNode *N); + SDValue PromoteIntRes_Overflow(SDNode *N); + SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_SDIV(SDNode *N); + SDValue PromoteIntRes_SELECT(SDNode *N); + SDValue PromoteIntRes_SELECT_CC(SDNode *N); + SDValue PromoteIntRes_SETCC(SDNode *N); + SDValue PromoteIntRes_SHL(SDNode *N); + SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue PromoteIntRes_SRA(SDNode *N); + SDValue PromoteIntRes_SRL(SDNode *N); + SDValue PromoteIntRes_TRUNCATE(SDNode *N); + SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_UDIV(SDNode *N); + SDValue PromoteIntRes_UNDEF(SDNode *N); + SDValue PromoteIntRes_VAARG(SDNode *N); + SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + + // Integer Operand Promotion. + bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); + SDValue PromoteIntOp_BIT_CONVERT(SDNode *N); + SDValue PromoteIntOp_BUILD_PAIR(SDNode *N); + SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MEMBARRIER(SDNode *N); + SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_Shift(SDNode *N); + SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); + SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_TRUNCATE(SDNode *N); + SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + + void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); + + //===--------------------------------------------------------------------===// + // Integer Expansion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedInteger - Given a processed operand Op which was expanded into + /// two integers of half the size, this returns the two halves. The low bits + /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi. + /// For example, if Op is an i64 which was expanded into two i32's, then this + /// method returns the two i32's, with Lo being equal to the lower 32 bits of + /// Op, and Hi being equal to the upper 32 bits. + void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi); + + // Integer Result Expansion. + void ExpandIntegerResult(SDNode *N, unsigned ResNo); + void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandShiftByConstant(SDNode *N, unsigned Amt, + SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Integer Operand Expansion. + bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandIntOp_BIT_CONVERT(SDNode *N); + SDValue ExpandIntOp_BR_CC(SDNode *N); + SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N); + SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N); + SDValue ExpandIntOp_SELECT_CC(SDNode *N); + SDValue ExpandIntOp_SETCC(SDNode *N); + SDValue ExpandIntOp_Shift(SDNode *N); + SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ExpandIntOp_TRUNCATE(SDNode *N); + SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); + + void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Float to Integer Conversion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSoftenedFloat - Given a processed operand Op which was converted to an + /// integer of the same size, this returns the integer. The integer contains + /// exactly the same bits as Op - only the type changed. For example, if Op + /// is an f32 which was softened to an i32, then this method returns an i32, + /// the bits of which coincide with those of Op. + SDValue GetSoftenedFloat(SDValue Op) { + SDValue &SoftenedOp = SoftenedFloats[Op]; + RemapValue(SoftenedOp); + assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); + return SoftenedOp; + } + void SetSoftenedFloat(SDValue Op, SDValue Result); + + // Result Float to Integer Conversion. + void SoftenFloatResult(SDNode *N, unsigned OpNo); + SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); + SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); + SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FADD(SDNode *N); + SDValue SoftenFloatRes_FCEIL(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOS(SDNode *N); + SDValue SoftenFloatRes_FDIV(SDNode *N); + SDValue SoftenFloatRes_FEXP(SDNode *N); + SDValue SoftenFloatRes_FEXP2(SDNode *N); + SDValue SoftenFloatRes_FFLOOR(SDNode *N); + SDValue SoftenFloatRes_FLOG(SDNode *N); + SDValue SoftenFloatRes_FLOG2(SDNode *N); + SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMUL(SDNode *N); + SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); + SDValue SoftenFloatRes_FP_ROUND(SDNode *N); + SDValue SoftenFloatRes_FPOW(SDNode *N); + SDValue SoftenFloatRes_FPOWI(SDNode *N); + SDValue SoftenFloatRes_FREM(SDNode *N); + SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FSIN(SDNode *N); + SDValue SoftenFloatRes_FSQRT(SDNode *N); + SDValue SoftenFloatRes_FSUB(SDNode *N); + SDValue SoftenFloatRes_FTRUNC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_SELECT(SDNode *N); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_UNDEF(SDNode *N); + SDValue SoftenFloatRes_VAARG(SDNode *N); + SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); + + // Operand Float to Integer Conversion. + bool SoftenFloatOperand(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FP_ROUND(SDNode *N); + SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); + SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); + SDValue SoftenFloatOp_SELECT_CC(SDNode *N); + SDValue SoftenFloatOp_SETCC(SDNode *N); + SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + + void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Float Expansion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedFloat - Given a processed operand Op which was expanded into + /// two floating point values of half the size, this returns the two halves. + /// The low bits of Op are exactly equal to the bits of Lo; the high bits + /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded + /// into two f64's, then this method returns the two f64's, with Lo being + /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits. + void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi); + + // Float Result Expansion. + void ExpandFloatResult(SDNode *N, unsigned ResNo); + void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Float Operand Expansion. + bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FP_ROUND(SDNode *N); + SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); + SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); + SDValue ExpandFloatOp_SELECT_CC(SDNode *N); + SDValue ExpandFloatOp_SETCC(SDNode *N); + SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); + + void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Scalarization Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetScalarizedVector - Given a processed one-element vector Op which was + /// scalarized to its element type, this returns the element. For example, + /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32. + SDValue GetScalarizedVector(SDValue Op) { + SDValue &ScalarizedOp = ScalarizedVectors[Op]; + RemapValue(ScalarizedOp); + assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?"); + return ScalarizedOp; + } + void SetScalarizedVector(SDValue Op, SDValue Result); + + // Vector Result Scalarization: <1 x ty> -> ty. + void ScalarizeVectorResult(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_ShiftOp(SDNode *N); + SDValue ScalarizeVecRes_UnaryOp(SDNode *N); + + SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); + SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue ScalarizeVecRes_FPOWI(SDNode *N); + SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_SELECT(SDNode *N); + SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); + SDValue ScalarizeVecRes_UNDEF(SDNode *N); + SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); + SDValue ScalarizeVecRes_VSETCC(SDNode *N); + + // Vector Operand Scalarization: <1 x ty> -> ty. + bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); + SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Vector Splitting Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSplitVector - Given a processed vector Op which was split into smaller + /// vectors, this method returns the smaller vectors. The first elements of + /// Op coincide with the elements of Lo; the remaining elements of Op coincide + /// with the elements of Hi: Op is what you would get by concatenating Lo and + /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then + /// this method returns the two v4i32's, with Lo corresponding to the first 4 + /// elements of Op, and Hi to the last 4 elements. + void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); + + // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. + void SplitVectorResult(SDNode *N, unsigned OpNo); + void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + + void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, + SDValue &Hi); + void SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. + bool SplitVectorOperand(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_UnaryOp(SDNode *N); + + SDValue SplitVecOp_BIT_CONVERT(SDNode *N); + SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Vector Widening Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetWidenedVector - Given a processed vector Op which was widened into a + /// larger vector, this method returns the larger vector. The elements of + /// the returned vector consist of the elements of Op followed by elements + /// containing rubbish. For example, if Op is a v2i32 that was widened to a + /// v4i32, then this method returns a v4i32 for which the first two elements + /// are the same as those of Op, while the last two elements contain rubbish. + SDValue GetWidenedVector(SDValue Op) { + SDValue &WidenedOp = WidenedVectors[Op]; + RemapValue(WidenedOp); + assert(WidenedOp.getNode() && "Operand wasn't widened?"); + return WidenedOp; + } + void SetWidenedVector(SDValue Op, SDValue Result); + + // Widen Vector Result Promotion. + void WidenVectorResult(SDNode *N, unsigned ResNo); + SDValue WidenVecRes_BIT_CONVERT(SDNode* N); + SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); + SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); + SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); + SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); + SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); + SDValue WidenVecRes_SELECT(SDNode* N); + SDValue WidenVecRes_SELECT_CC(SDNode* N); + SDValue WidenVecRes_UNDEF(SDNode *N); + SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); + SDValue WidenVecRes_VSETCC(SDNode* N); + + SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_Shift(SDNode *N); + SDValue WidenVecRes_Unary(SDNode *N); + + // Widen Vector Operand. + bool WidenVectorOperand(SDNode *N, unsigned ResNo); + SDValue WidenVecOp_BIT_CONVERT(SDNode *N); + SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue WidenVecOp_STORE(SDNode* N); + + SDValue WidenVecOp_Convert(SDNode *N); + + //===--------------------------------------------------------------------===// + // Vector Widening Utilities Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// Helper genWidenVectorLoads - Helper function to generate a set of + /// loads to load a vector with a resulting wider type. It takes + /// ExtType: Extension type + /// LdChain: list of chains for the load we have generated. + /// Chain: incoming chain for the ld vector. + /// BasePtr: base pointer to load from. + /// SV: memory disambiguation source value. + /// SVOffset: memory disambiugation offset. + /// Alignment: alignment of the memory. + /// isVolatile: volatile load. + /// LdWidth: width of memory that we want to load. + /// ResType: the wider result result type for the resulting vector. + /// dl: DebugLoc to be applied to new nodes + SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain, + SDValue BasePtr, const Value *SV, + int SVOffset, unsigned Alignment, + bool isVolatile, unsigned LdWidth, + MVT ResType, DebugLoc dl); + + /// Helper genWidenVectorStores - Helper function to generate a set of + /// stores to store a widen vector into non widen memory + /// It takes + /// StChain: list of chains for the stores we have generated + /// Chain: incoming chain for the ld vector + /// BasePtr: base pointer to load from + /// SV: memory disambiguation source value + /// SVOffset: memory disambiugation offset + /// Alignment: alignment of the memory + /// isVolatile: volatile lod + /// ValOp: value to store + /// StWidth: width of memory that we want to store + /// dl: DebugLoc to be applied to new nodes + void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain, + SDValue BasePtr, const Value *SV, + int SVOffset, unsigned Alignment, + bool isVolatile, SDValue ValOp, + unsigned StWidth, DebugLoc dl); + + /// Modifies a vector input (widen or narrows) to a vector of NVT. The + /// input vector must have the same element type as NVT. + SDValue ModifyToType(SDValue InOp, MVT WidenVT); + + + //===--------------------------------------------------------------------===// + // Generic Splitting: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // not necessarily identical types. As such they can be used for splitting + // vectors and expanding integers and floats. + + void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isVector()) + GetSplitVector(Op, Lo, Hi); + else if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type + /// which is split (or expanded) into two not necessarily identical pieces. + void GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT); + + /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and + /// high parts of the given value. + void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); + + // Generic Result Splitting. + void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Generic Expansion: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // identical types of half the size, and that the Lo/Hi part is stored first + // in memory on little/big-endian machines, followed by the Hi/Lo part. As + // such they can be used for expanding integers and floats. + + void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + // Generic Result Expansion. + void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi); + + // Generic Operand Expansion. + SDValue ExpandOp_BIT_CONVERT (SDNode *N); + SDValue ExpandOp_BUILD_VECTOR (SDNode *N); + SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N); + SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); + SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N); + SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo); +}; + +} // end namespace llvm. + +#endif diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp new file mode 100644 index 0000000..e8ff3fc --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -0,0 +1,453 @@ +//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements generic type expansion and splitting for LegalizeTypes. +// The routines here perform legalization when the details of the type (such as +// whether it is an integer or a float) do not matter. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. +// Splitting is the act of changing a computation in an illegal type to be a +// computation in two not necessarily identical registers of a smaller type. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/Target/TargetData.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Generic Result Expansion. +//===----------------------------------------------------------------------===// + +// These routines assume that the Lo/Hi part is stored first in memory on +// little/big-endian machines, followed by the Hi/Lo part. This means that +// they cannot be used as is on vectors, for which Lo is always stored first. + +void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT OutVT = N->getValueType(0); + MVT NOutVT = TLI.getTypeToTransformTo(OutVT); + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + case Legal: + case PromoteInteger: + break; + case SoftenFloat: + // Convert the integer operand instead. + SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case ExpandInteger: + case ExpandFloat: + // Convert the expanded pieces of the input. + GetExpandedOp(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case SplitVector: + // Convert the split parts of the input if it was split in two. + GetSplitVector(InOp, Lo, Hi); + if (Lo.getValueType() == Hi.getValueType()) { + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + } + break; + case ScalarizeVector: + // Convert the element instead. + SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case WidenVector: { + assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); + InOp = GetWidenedVector(InOp); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + InVT.getVectorNumElements()/2); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + } + } + + // Lower the bit-convert to a store/load from the stack. + assert(NOutVT.isByteSized() && "Expanded type not byte sized!"); + + // Create the stack frame object. Make sure it is aligned for both + // the source and expanded destination types. + unsigned Alignment = + TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT()); + SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0); + + // Load the first half from the stack slot. + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0); + + // Increment the pointer to the other half. + unsigned IncrementSize = NOutVT.getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getIntPtrConstant(IncrementSize)); + + // Load the second half from the stack slot. + Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, + MinAlign(Alignment, IncrementSize)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Return the operands. + Lo = N->getOperand(0); + Hi = N->getOperand(1); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + GetExpandedOp(N->getOperand(0), Lo, Hi); + SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? + Hi : Lo; + + assert(Part.getValueType() == N->getValueType(0) && + "Type twice as big as expanded type not itself expanded!"); + + GetPairElements(Part, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue OldVec = N->getOperand(0); + unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + // Convert to a vector of the expanded element type, for example + // <3 x i64> -> <6 x i32>. + MVT OldVT = N->getValueType(0); + MVT NewVT = TLI.getTypeToTransformTo(OldVT); + + SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::getVectorVT(NewVT, 2*OldElts), + OldVec); + + // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. + SDValue Idx = N->getOperand(1); + + // Make sure the type of Idx is big enough to hold the new values. + if (Idx.getValueType().bitsLT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + DAG.getConstant(1, Idx.getValueType())); + Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); + DebugLoc dl = N->getDebugLoc(); + + LoadSDNode *LD = cast<LoadSDNode>(N); + MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, + isVolatile, Alignment); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits() / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset+IncrementSize, + isVolatile, MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + +void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + + +//===--------------------------------------------------------------------===// +// Generic Operand Expansion. +//===--------------------------------------------------------------------===// + +SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0).isVector()) { + // An illegal expanding type is being converted to a legal vector type. + // Make a two element vector out of the expanded parts and convert that + // instead, but only if the new vector type is legal (otherwise there + // is no point, and it might create expansion loops). For example, on + // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32. + MVT OVT = N->getOperand(0).getValueType(); + MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OVT), 2); + + if (isTypeLegal(NVT)) { + SDValue Parts[2]; + GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]); + + if (TLI.isBigEndian()) + std::swap(Parts[0], Parts[1]); + + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); + return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec); + } + } + + // Otherwise, store to a temporary and load out again as the new type. + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type needs expansion. + MVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + MVT OldVT = N->getOperand(0).getValueType(); + MVT NewVT = TLI.getTypeToTransformTo(OldVT); + DebugLoc dl = N->getDebugLoc(); + + assert(OldVT == VecVT.getVectorElementType() && + "BUILD_VECTOR operand type doesn't match vector element type!"); + + // Build a vector of twice the length out of the expanded elements. + // For example <3 x i64> -> <6 x i32>. + std::vector<SDValue> NewElts; + NewElts.reserve(NumElts*2); + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(i), Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + NewElts.push_back(Lo); + NewElts.push_back(Hi); + } + + SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::getVectorVT(NewVT, NewElts.size()), + &NewElts[0], NewElts.size()); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(0), Lo, Hi); + return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo; +} + +SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { + // The vector type is legal but the element type needs expansion. + MVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + SDValue Val = N->getOperand(1); + MVT OldEVT = Val.getValueType(); + MVT NewEVT = TLI.getTypeToTransformTo(OldEVT); + + assert(OldEVT == VecVT.getVectorElementType() && + "Inserted element type doesn't match vector element type!"); + + // Bitconvert to a vector of twice the length with elements of the expanded + // type, insert the expanded vector elements, and then convert back. + MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2); + SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + NewVecVT, N->getOperand(0)); + + SDValue Lo, Hi; + GetExpandedOp(Val, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + SDValue Idx = N->getOperand(2); + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); + Idx = DAG.getNode(ISD::ADD, dl, + Idx.getValueType(), Idx, DAG.getIntPtrConstant(1)); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + MVT VT = N->getValueType(0); + assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && + "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + Ops[0] = N->getOperand(0); + SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); + for (unsigned i = 1; i < NumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { + assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + DebugLoc dl = N->getDebugLoc(); + + StoreSDNode *St = cast<StoreSDNode>(N); + MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType()); + SDValue Chain = St->getChain(); + SDValue Ptr = St->getBasePtr(); + int SVOffset = St->getSrcValueOffset(); + unsigned Alignment = St->getAlignment(); + bool isVolatile = St->isVolatile(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + unsigned IncrementSize = NVT.getSizeInBits() / 8; + + SDValue Lo, Hi; + GetExpandedOp(St->getValue(), Lo, Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset, + isVolatile, Alignment); + + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); + Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(), + SVOffset + IncrementSize, + isVolatile, MinAlign(Alignment, IncrementSize)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + + +//===--------------------------------------------------------------------===// +// Generic Result Splitting. +//===--------------------------------------------------------------------===// + +// Be careful to make no assumptions about which of Lo/Hi is stored first in +// memory (for vectors it is always Lo first followed by Hi in the following +// bytes; for integers and floats it is Lo first if and only if the machine is +// little-endian). + +void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // A MERGE_VALUES node can produce any number of values. We know that the + // first illegal one needs to be expanded into Lo/Hi. + unsigned i; + + // The string of legal results gets turned into input operands, which have + // the same type. + for (i = 0; isTypeLegal(N->getValueType(i)); ++i) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); + + // The first illegal result must be the one that needs to be expanded. + GetSplitOp(N->getOperand(i), Lo, Hi); + + // Legalize the rest of the results into the input operands whether they are + // legal or not. + unsigned e = N->getNumValues(); + for (++i; i != e; ++i) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); +} + +void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH; + DebugLoc dl = N->getDebugLoc(); + GetSplitOp(N->getOperand(1), LL, LH); + GetSplitOp(N->getOperand(2), RL, RH); + + SDValue Cond = N->getOperand(0); + Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL); + Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH); +} + +void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH; + DebugLoc dl = N->getDebugLoc(); + GetSplitOp(N->getOperand(2), LL, LH); + GetSplitOp(N->getOperand(3), RL, RH); + + Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0), + N->getOperand(1), LL, RL, N->getOperand(4)); + Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0), + N->getOperand(1), LH, RH, N->getOperand(4)); +} + +void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + Lo = DAG.getUNDEF(LoVT); + Hi = DAG.getUNDEF(HiVT); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp new file mode 100644 index 0000000..df9af21 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -0,0 +1,335 @@ +//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeVectors method. +// +// The vector legalizer looks for vector operations which might need to be +// scalarized and legalizes them. This is a separate step from Legalize because +// scalarizing can introduce illegal types. For example, suppose we have an +// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition +// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the +// operation, which introduces nodes with the illegal type i64 which must be +// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; +// the operation must be unrolled, which introduces nodes with the illegal +// type i8 which must be promoted. +// +// This does not legalize vector manipulations like ISD::BUILD_VECTOR, +// or operations that happen to take a vector which are custom-lowered like +// ISD::CALL; the legalization for such operations never produces nodes +// with illegal types, so it's okay to put off legalizing them until +// SelectionDAG::Legalize runs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +namespace { +class VectorLegalizer { + SelectionDAG& DAG; + TargetLowering& TLI; + bool Changed; // Keep track of whether anything changed + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + DenseMap<SDValue, SDValue> LegalizedNodes; + + // Adds a node to the translation cache + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + + // Legalizes the given node + SDValue LegalizeOp(SDValue Op); + // Assuming the node is legal, "legalize" the results + SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); + // Implements unrolling a generic vector operation, i.e. turning it into + // scalar operations. + SDValue UnrollVectorOp(SDValue Op); + // Implements unrolling a VSETCC. + SDValue UnrollVSETCC(SDValue Op); + // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB + // isn't legal. + SDValue ExpandFNEG(SDValue Op); + // Implements vector promotion; this is essentially just bitcasting the + // operands to a different type and bitcasting the result back to the + // original type. + SDValue PromoteVectorOp(SDValue Op); + + public: + bool Run(); + VectorLegalizer(SelectionDAG& dag) : + DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} +}; + +bool VectorLegalizer::Run() { + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != next(E); ++I) + LegalizeOp(SDValue(I, 0)); + + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); + + return Changed; +} + +SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { + // Generic legalization: just pass the operand through. + for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) + AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); + return Result.getValue(Op.getResNo()); +} + +SDValue VectorLegalizer::LegalizeOp(SDValue Op) { + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDNode* Node = Op.getNode(); + + // Legalize the operands + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + + SDValue Result = + DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size()); + + bool HasVectorValue = false; + for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); + J != E; + ++J) + HasVectorValue |= J->isVector(); + if (!HasVectorValue) + return TranslateLegalizeResults(Op, Result); + + switch (Op.getOpcode()) { + default: + return TranslateLegalizeResults(Op, Result); + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::SELECT: + case ISD::SELECT_CC: + case ISD::VSETCC: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FNEG: + case ISD::FABS: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FPOWI: + case ISD::FPOW: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FFLOOR: + break; + } + + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + case TargetLowering::Promote: + // "Promote" the operation by bitcasting + Result = PromoteVectorOp(Op); + Changed = true; + break; + case TargetLowering::Legal: break; + case TargetLowering::Custom: { + SDValue Tmp1 = TLI.LowerOperation(Op, DAG); + if (Tmp1.getNode()) { + Result = Tmp1; + break; + } + // FALL THROUGH + } + case TargetLowering::Expand: + if (Node->getOpcode() == ISD::FNEG) + Result = ExpandFNEG(Op); + else if (Node->getOpcode() == ISD::VSETCC) + Result = UnrollVSETCC(Op); + else + Result = UnrollVectorOp(Op); + break; + } + + // Make sure that the generated code is itself legal. + if (Result != Op) { + Result = LegalizeOp(Result); + Changed = true; + } + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { + // Vector "promotion" is basically just bitcasting and doing the operation + // in a different type. For example, x86 promotes ISD::AND on v2i32 to + // v1i64. + MVT VT = Op.getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + DebugLoc dl = Op.getDebugLoc(); + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); + + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); +} + +SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { + if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { + SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + Zero, Op.getOperand(0)); + } + return UnrollVectorOp(Op); +} + +SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { + MVT VT = Op.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + MVT EltVT = VT.getVectorElementType(); + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); + MVT TmpEltVT = LHS.getValueType().getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + SmallVector<SDValue, 8> Ops(NumElems); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getIntPtrConstant(i)); + SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getIntPtrConstant(i)); + Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT), + LHSElem, RHSElem, CC); + Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i], + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), EltVT), + DAG.getConstant(0, EltVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); +} + +/// UnrollVectorOp - We know that the given vector has a legal type, however +/// the operation it performs is not legal, and the target has requested that +/// the operation be expanded. "Unroll" the vector, splitting out the scalars +/// and operating on each element individually. +SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) { + MVT VT = Op.getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't unroll a vector with multiple results!"); + unsigned NE = VT.getVectorNumElements(); + MVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + + SmallVector<SDValue, 8> Scalars; + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + for (unsigned i = 0; i != NE; ++i) { + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + SDValue Operand = Op.getOperand(j); + MVT OperandVT = Operand.getValueType(); + if (OperandVT.isVector()) { + // A vector operand; extract a single element. + MVT OperandEltVT = OperandVT.getVectorElementType(); + Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + OperandEltVT, + Operand, + DAG.getConstant(i, MVT::i32)); + } else { + // A scalar operand; just use it as is. + Operands[j] = Operand; + } + } + + switch (Op.getOpcode()) { + default: + Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, + &Operands[0], Operands.size())); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, Operands[0], + DAG.getShiftAmountOperand(Operands[1]))); + break; + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Scalars[0], Scalars.size()); +} + +} + +bool SelectionDAG::LegalizeVectors() { + return VectorLegalizer(*this).Run(); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp new file mode 100644 index 0000000..68967cc --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -0,0 +1,2151 @@ +//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file performs vector type splitting and scalarization for LegalizeTypes. +// Scalarization is the act of changing a computation in an illegal one-element +// vector type to be a computation in its scalar element type. For example, +// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed +// as a base case when scalarizing vector arithmetic like <4 x f32>, which +// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 +// types. +// Splitting is the act of changing a computation in an invalid vector type to +// be a computation in multiple vectors of a smaller type. For example, +// implementing <128 x f32> operations in terms of two <64 x f32> operations. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Result Vector Scalarization: <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ScalarizeVectorResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to scalarize the result of this operator!"); + abort(); + + case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: R = N->getOperand(0); break; + case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; + case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; + case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; + case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break; + + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCOS: + case ISD::FNEG: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: R = ScalarizeVecRes_UnaryOp(N); break; + + case ISD::ADD: + case ISD::AND: + case ISD::FADD: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::SUB: + case ISD::UDIV: + case ISD::UREM: + case ISD::XOR: R = ScalarizeVecRes_BinOp(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: R = ScalarizeVecRes_ShiftOp(N); break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetScalarizedVector(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_ShiftOp(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue ShiftAmt = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, ShiftAmt); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) { + MVT NewVT = N->getValueType(0).getVectorElementType(); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + NewVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { + MVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + return DAG.getConvertRndSat(NewVT, N->getDebugLoc(), + Op0, DAG.getValueType(NewVT), + DAG.getValueType(Op0.getValueType()), + N->getOperand(3), + N->getOperand(4), + cast<CvtRndSatSDNode>(N)->getCvtCode()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + N->getValueType(0).getVectorElementType(), + N->getOperand(0), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FPOWI, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { + // The value to insert may have a wider type than the vector element type, + // so be sure to truncate it to the element type if necessary. + SDValue Op = N->getOperand(1); + MVT EltVT = N->getValueType(0).getVectorElementType(); + if (Op.getValueType() != EltVT) + // FIXME: Can this happen for floating point types? + Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op); + return Op; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { + assert(N->isUnindexed() && "Indexed vector load?"); + + SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(), + N->getExtensionType(), + N->getValueType(0).getVectorElementType(), + N->getChain(), N->getBasePtr(), + DAG.getUNDEF(N->getBasePtr().getValueType()), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->getAlignment()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { + // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. + MVT DestVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + MVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(), + N->getOperand(0), N->getOperand(1), + LHS, GetScalarizedVector(N->getOperand(3)), + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { + // Figure out if the scalar is the LHS or RHS and return it. + SDValue Arg = N->getOperand(2).getOperand(0); + if (Arg.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); + unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue(); + return GetScalarizedVector(N->getOperand(Op)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + MVT NVT = N->getValueType(0).getVectorElementType(); + MVT SVT = TLI.getSetCCResultType(LHS.getValueType()); + DebugLoc dl = N->getDebugLoc(); + + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, dl, SVT, LHS, RHS, N->getOperand(2)); + + // VSETCC always returns a sign-extended value, while SETCC may not. The + // SETCC result type may not match the vector element type. Correct these. + if (NVT.bitsLE(SVT)) { + // The SETCC result type is bigger than the vector element type. + // Ensure the SETCC result is sign-extended. + if (TLI.getBooleanContents() != + TargetLowering::ZeroOrNegativeOneBooleanContent) + Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, SVT, Res, + DAG.getValueType(MVT::i1)); + // Truncate to the final type. + return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); + } else { + // The SETCC result type is smaller than the vector element type. + // If the SetCC result is not sign-extended, chop it down to MVT::i1. + if (TLI.getBooleanContents() != + TargetLowering::ZeroOrNegativeOneBooleanContent) + Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Res); + // Sign extend to the final type. + return DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, Res); + } +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Scalarization <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ScalarizeVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to scalarize this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: + Res = ScalarizeVecOp_BIT_CONVERT(N); break; + + case ISD::CONCAT_VECTORS: + Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; + + case ISD::EXTRACT_VECTOR_ELT: + Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break; + + case ISD::STORE: + Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Elt); +} + +/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - +/// use a BUILD_VECTOR instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { + SmallVector<SDValue, 8> Ops(N->getNumOperands()); + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + Ops[i] = GetScalarizedVector(N->getOperand(i)); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + &Ops[0], Ops.size()); +} + +/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to +/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the +/// index. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + return GetScalarizedVector(N->getOperand(0)); +} + +/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be +/// scalarized, it must be <1 x ty>. Just store the element. +SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(N->isUnindexed() && "Indexed store of one-element vector?"); + assert(OpNo == 1 && "Do not know how to scalarize this operand!"); + DebugLoc dl = N->getDebugLoc(); + + if (N->isTruncatingStore()) + return DAG.getTruncStore(N->getChain(), dl, + GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->getAlignment()); + + return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), + N->isVolatile(), N->getAlignment()); +} + + +//===----------------------------------------------------------------------===// +// Result Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorResult - This method is called when the specified result of the +/// specified node is found to need vector splitting. At this point, the node +/// may also have invalid operands or may have other results that need +/// legalization, we just know that (at least) one result needs vector +/// splitting. +void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Split node result: "; N->dump(&DAG); cerr << "\n"); + SDValue Lo, Hi; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SplitVectorResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to split the result of this operator!"); + abort(); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; + case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; + case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; + case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; + case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break; + case ISD::VECTOR_SHUFFLE: + SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::VSETCC: SplitVecRes_VSETCC(N, Lo, Hi); break; + + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FNEG: + case ISD::FABS: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: SplitVecRes_UnaryOp(N, Lo, Hi); break; + + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::FDIV: + case ISD::FPOW: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::UREM: + case ISD::SREM: + case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetSplitVector(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDValue RHSLo, RHSHi; + GetSplitVector(N->getOperand(1), RHSLo, RHSHi); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); +} + +void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // We know the result is a vector. The input may be either a vector or a + // scalar value. + MVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + DebugLoc dl = N->getDebugLoc(); + + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + case Legal: + case PromoteInteger: + case SoftenFloat: + case ScalarizeVector: + break; + case ExpandInteger: + case ExpandFloat: + // A scalar to vector conversion, where the scalar needs expansion. + // If the vector is being split in two then we can just convert the + // expanded pieces. + if (LoVT == HiVT) { + GetExpandedOp(InOp, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + return; + } + break; + case SplitVector: + // If the input is a vector that needs to be split, convert each split + // piece of the input now. + GetSplitVector(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + return; + } + + // In the general case, convert the input to an integer and split it by hand. + MVT LoIntVT = MVT::getIntegerVT(LoVT.getSizeInBits()); + MVT HiIntVT = MVT::getIntegerVT(HiVT.getSizeInBits()); + if (TLI.isBigEndian()) + std::swap(LoIntVT, HiIntVT); + + SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + unsigned LoNumElts = LoVT.getVectorNumElements(); + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); + Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end()); + Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size()); +} + +void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); + DebugLoc dl = N->getDebugLoc(); + unsigned NumSubvectors = N->getNumOperands() / 2; + if (NumSubvectors == 1) { + Lo = N->getOperand(0); + Hi = N->getOperand(1); + return; + } + + MVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size()); +} + +void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SDValue DTyOpLo = DAG.getValueType(LoVT); + SDValue DTyOpHi = DAG.getValueType(HiVT); + + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + + // Split the input. + SDValue VLo, VHi; + MVT InVT = N->getOperand(0).getValueType(); + switch (getTypeAction(InVT)) { + default: assert(0 && "Unexpected type action!"); + case Legal: { + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + case SplitVector: + GetSplitVector(N->getOperand(0), VLo, VHi); + break; + case WidenVector: { + // If the result needs to be split and the input needs to be widened, + // the two types must have different lengths. Use the widened result + // and extract from it to do the split. + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + } + + SDValue STyOpLo = DAG.getValueType(VLo.getValueType()); + SDValue STyOpHi = DAG.getValueType(VHi.getValueType()); + + Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp, + CvtCode); + Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp, + CvtCode); +} + +void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + MVT IdxVT = Idx.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + MVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + // The indices are not guaranteed to be a multiple of the new vector + // size unless the original vector type was split in two. + assert(LoVT == HiVT && "Non power-of-two vectors not supported!"); + + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); + Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx, + DAG.getConstant(LoVT.getVectorNumElements(), IdxVT)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx); +} + +void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); +} + +void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Elt = N->getOperand(1); + SDValue Idx = N->getOperand(2); + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(Vec, Lo, Hi); + + if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) { + unsigned IdxVal = CIdx->getZExtValue(); + unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); + if (IdxVal < LoNumElts) + Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + Lo.getValueType(), Lo, Elt, Idx); + else + Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, + DAG.getIntPtrConstant(IdxVal - LoNumElts)); + return; + } + + // Spill the vector to the stack. + MVT VecVT = Vec.getValueType(); + MVT EltVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); + + // Store the new element. This may be larger than the vector element type, + // so use a truncating store. + SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + unsigned Alignment = + TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForMVT()); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getIntPtrConstant(IncrementSize)); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false, + MinAlign(Alignment, IncrementSize)); +} + +void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); + Hi = DAG.getUNDEF(HiVT); +} + +void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); + MVT LoVT, HiVT; + DebugLoc dl = LD->getDebugLoc(); + GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + const Value *SV = LD->getSrcValue(); + int SVOffset = LD->getSrcValueOffset(); + MVT MemoryVT = LD->getMemoryVT(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + + MVT LoMemVT, HiMemVT; + GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + + Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, + SV, SVOffset, LoMemVT, isVolatile, Alignment); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + SVOffset += IncrementSize; + Alignment = MinAlign(Alignment, IncrementSize); + Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset, + SV, SVOffset, HiMemVT, isVolatile, Alignment); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), Ch); +} + +void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Get the dest types - they may not match the input types, e.g. int_to_fp. + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + // Split the input. + MVT InVT = N->getOperand(0).getValueType(); + switch (getTypeAction(InVT)) { + default: assert(0 && "Unexpected type action!"); + case Legal: { + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + case SplitVector: + GetSplitVector(N->getOperand(0), Lo, Hi); + break; + case WidenVector: { + // If the result needs to be split and the input needs to be widened, + // the two types must have different lengths. Use the widened result + // and extract from it to do the split. + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + } + + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, + SDValue &Lo, SDValue &Hi) { + // The low and high parts of the original input give four input vectors. + SDValue Inputs[4]; + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); + GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); + MVT NewVT = Inputs[0].getValueType(); + unsigned NewElts = NewVT.getVectorNumElements(); + assert(NewVT == Inputs[1].getValueType() && + "Non power-of-two vectors not supported!"); + + // If Lo or Hi uses elements from at most two of the four input vectors, then + // express it as a vector shuffle of those two inputs. Otherwise extract the + // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. + SmallVector<int, 16> Ops; + for (unsigned High = 0; High < 2; ++High) { + SDValue &Output = High ? Hi : Lo; + + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. + unsigned FirstMaskIdx = High * NewElts; + bool useBuildVector = false; + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element does not index into any input vector. + Ops.push_back(-1); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) { + // This input vector is already an operand. + break; + } else if (InputUsed[OpNo] == -1U) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } + + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up on trying to create a + // shuffle vector. Insert all elements into a BUILD_VECTOR instead. + useBuildVector = true; + break; + } + + // Add the mask index for the new shuffle vector. + Ops.push_back(Idx + OpNo * NewElts); + } + + if (useBuildVector) { + MVT EltVT = NewVT.getVectorElementType(); + SmallVector<SDValue, 16> SVOps; + + // Extract the input elements by hand. + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element is "undef" or indexes off the end of the input. + SVOps.push_back(DAG.getUNDEF(EltVT)); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Extract the vector element by hand. + SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Inputs[Input], DAG.getIntPtrConstant(Idx))); + } + + // Construct the Lo/Hi output using a BUILD_VECTOR. + Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size()); + } else if (InputUsed[0] == -1U) { + // No input vectors were used! The result is undefined. + Output = DAG.getUNDEF(NewVT); + } else { + SDValue Op0 = Inputs[InputUsed[0]]; + // If only one input was used, use an undefined vector for the other. + SDValue Op1 = InputUsed[1] == -1U ? + DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; + // At least one input vector was used. Create a new shuffle vector. + Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); + } + + Ops.clear(); + } +} + +void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SDValue LL, LH, RL, RH; + GetSplitVector(N->getOperand(0), LL, LH); + GetSplitVector(N->getOperand(1), RL, RH); + + Lo = DAG.getNode(ISD::VSETCC, dl, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(ISD::VSETCC, dl, HiVT, LH, RH, N->getOperand(2)); +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorOperand - This method is called when the specified operand of the +/// specified node is found to need vector splitting. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need legalization as well as the specified one. +bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Split node operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SplitVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to split this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: Res = SplitVecOp_UnaryOp(N); break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + MVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + assert(Lo.getValueType() == Hi.getValueType() && + "Returns legal non-power-of-two vector type?"); + MVT InVT = Lo.getValueType(); + + MVT OutVT = MVT::getVectorVT(ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) { + // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will + // end up being split all the way down to individual components. Convert the + // split pieces into integers and reassemble. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), + JoinIntegers(Lo, Hi)); +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + // We know that the extracted result type is legal. For now, assume the index + // is a constant. + MVT SubVT = N->getValueType(0); + SDValue Idx = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + if (IdxVal < LoElts) { + assert(IdxVal + SubVT.getVectorNumElements() <= LoElts && + "Extracted subvector crosses vector split!"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + } else { + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, + DAG.getConstant(IdxVal - LoElts, Idx.getValueType())); + } +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + MVT VecVT = Vec.getValueType(); + + if (isa<ConstantSDNode>(Idx)) { + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!"); + + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + + if (IdxVal < LoElts) + return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx); + else + return DAG.UpdateNodeOperands(SDValue(N, 0), Hi, + DAG.getConstant(IdxVal - LoElts, + Idx.getValueType())); + } + + // Store the vector to the stack. + MVT EltVT = VecVT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0); + + // Load back the required element. + StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + return DAG.getLoad(EltVT, dl, Store, StackPtr, SV, 0); +} + +SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed store of vector?"); + assert(OpNo == 1 && "Can only split the stored value"); + DebugLoc dl = N->getDebugLoc(); + + bool isTruncating = N->isTruncatingStore(); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + MVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getAlignment(); + bool isVol = N->isVolatile(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(1), Lo, Hi); + + MVT LoMemVT, HiMemVT; + GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + + if (isTruncating) + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + LoMemVT, isVol, Alignment); + else + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + isVol, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + + if (isTruncating) + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, + N->getSrcValue(), SVOffset+IncrementSize, + HiMemVT, + isVol, MinAlign(Alignment, IncrementSize)); + else + Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize, + isVol, MinAlign(Alignment, IncrementSize)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Result Vector Widening +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "WidenVectorResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to widen the result of this operator!"); + abort(); + + case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; + case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; + case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: + Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; + case ISD::VSETCC: Res = WidenVecRes_VSETCC(N); break; + + case ISD::ADD: + case ISD::AND: + case ISD::BSWAP: + case ISD::FADD: + case ISD::FCOPYSIGN: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FPOWI: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::UDIV: + case ISD::UREM: + case ISD::SUB: + case ISD::XOR: Res = WidenVecRes_Binary(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: Res = WidenVecRes_Shift(N); break; + + case ISD::ANY_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::ZERO_EXTEND: + case ISD::UINT_TO_FP: Res = WidenVecRes_Convert(N); break; + + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCOS: + case ISD::FNEG: + case ISD::FSIN: + case ISD::FSQRT: Res = WidenVecRes_Unary(N); break; + } + + // If Res is null, the sub-method took care of registering the result. + if (Res.getNode()) + SetWidenedVector(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { + // Binary op widening. + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + SDValue InOp = N->getOperand(0); + DebugLoc dl = N->getDebugLoc(); + + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + MVT InVT = InOp.getValueType(); + MVT InEltVT = InVT.getVectorElementType(); + MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts); + + unsigned Opcode = N->getOpcode(); + unsigned InVTNumElts = InVT.getVectorNumElements(); + + if (getTypeAction(InVT) == WidenVector) { + InOp = GetWidenedVector(N->getOperand(0)); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) + return DAG.getNode(Opcode, dl, WidenVT, InOp); + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(Opcode, dl, WidenVT, + DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, + &Ops[0], NumConcat)); + } + + if (InVTNumElts % WidenNumElts == 0) { + // Extract the input and convert the shorten input vector. + return DAG.getNode(Opcode, dl, WidenVT, + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, + InOp, DAG.getIntPtrConstant(0))); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + MVT EltVT = WidenVT.getVectorElementType(); + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i))); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + + MVT ShVT = ShOp.getValueType(); + if (getTypeAction(ShVT) == WidenVector) { + ShOp = GetWidenedVector(ShOp); + ShVT = ShOp.getValueType(); + } + MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + if (ShVT != ShWidenVT) + ShOp = ModifyToType(ShOp, ShWidenVT); + + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { + // Unary op widening. + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + MVT VT = N->getValueType(0); + MVT WidenVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + break; + case Legal: + break; + case PromoteInteger: + // If the InOp is promoted to the same size, convert it. Otherwise, + // fall out of the switch and widen the promoted input. + InOp = GetPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + break; + case SoftenFloat: + case ExpandInteger: + case ExpandFloat: + case ScalarizeVector: + case SplitVector: + break; + case WidenVector: + // If the InOp is widened to the same size, convert it. Otherwise, fall + // out of the switch and widen the widened input. + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + // The input widens to the same size. Convert to the widen value. + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + break; + } + + unsigned WidenSize = WidenVT.getSizeInBits(); + unsigned InSize = InVT.getSizeInBits(); + if (WidenSize % InSize == 0) { + // Determine new input vector type. The new input vector type will use + // the same element type (if its a vector) or use the input type as a + // vector. It is the same size as the type to widen to. + MVT NewInVT; + unsigned NewNumElts = WidenSize / InSize; + if (InVT.isVector()) { + MVT InEltVT = InVT.getVectorElementType(); + NewInVT= MVT::getVectorVT(InEltVT, WidenSize / InEltVT.getSizeInBits()); + } else { + NewInVT = MVT::getVectorVT(InVT, NewNumElts); + } + + if (TLI.isTypeLegal(NewInVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + SmallVector<SDValue, 16> Ops(NewNumElts); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i < NewNumElts; ++i) + Ops[i] = UndefVal; + + SDValue NewVec; + if (InVT.isVector()) + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NewInVT, &Ops[0], NewNumElts); + else + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, + NewInVT, &Ops[0], NewNumElts); + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec); + } + } + + // This should occur rarely. Lower the bit-convert to a store/load + // from the stack. Create the stack frame object. Make sure it is aligned + // for both the source and destination types. + SDValue FIPtr = DAG.CreateStackTemporary(InVT, WidenVT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); + + // Result is a load from the stack slot. + return DAG.getLoad(WidenVT, dl, Store, FIPtr, SV, 0); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + // Build a vector with undefined for the new nodes. + MVT VT = N->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + + MVT WidenVT = TLI.getTypeToTransformTo(VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end()); + NewOps.reserve(WidenNumElts); + for (unsigned i = NumElts; i < WidenNumElts; ++i) + NewOps.push_back(DAG.getUNDEF(EltVT)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size()); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { + MVT InVT = N->getOperand(0).getValueType(); + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumOperands = N->getNumOperands(); + + bool InputWidened = false; // Indicates we need to widen the input. + if (getTypeAction(InVT) != WidenVector) { + if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { + // Add undef vectors to widen to correct length. + unsigned NumConcat = WidenVT.getVectorNumElements() / + InVT.getVectorNumElements(); + SDValue UndefVal = DAG.getUNDEF(InVT); + SmallVector<SDValue, 16> Ops(NumConcat); + for (unsigned i=0; i < NumOperands; ++i) + Ops[i] = N->getOperand(i); + for (unsigned i = NumOperands; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat); + } + } else { + InputWidened = true; + if (WidenVT == TLI.getTypeToTransformTo(InVT)) { + // The inputs and the result are widen to the same value. + unsigned i; + for (i=1; i < NumOperands; ++i) + if (N->getOperand(i).getOpcode() != ISD::UNDEF) + break; + + if (i > NumOperands) + // Everything but the first operand is an UNDEF so just return the + // widened first operand. + return GetWidenedVector(N->getOperand(0)); + + if (NumOperands == 2) { + // Replace concat of two operands with a shuffle. + SmallVector<int, 16> MaskOps(WidenNumElts); + for (unsigned i=0; i < WidenNumElts/2; ++i) { + MaskOps[i] = i; + MaskOps[i+WidenNumElts/2] = i+WidenNumElts; + } + return DAG.getVectorShuffle(WidenVT, dl, + GetWidenedVector(N->getOperand(0)), + GetWidenedVector(N->getOperand(1)), + &MaskOps[0]); + } + } + } + + // Fall back to use extracts and build vector. + MVT EltVT = WidenVT.getVectorElementType(); + unsigned NumInElts = InVT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Idx = 0; + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (InputWidened) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); + } + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue InOp = N->getOperand(0); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + MVT InVT = InOp.getValueType(); + MVT InEltVT = InVT.getVectorElementType(); + MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts); + + SDValue DTyOp = DAG.getValueType(WidenVT); + SDValue STyOp = DAG.getValueType(InWidenVT); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + + unsigned InVTNumElts = InVT.getVectorNumElements(); + if (getTypeAction(InVT) == WidenVector) { + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) { + Ops[i] = UndefVal; + } + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (InVTNumElts % WidenNumElts == 0) { + // Extract the input and convert the shorten input vector. + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, + DAG.getIntPtrConstant(0)); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + MVT EltVT = WidenVT.getVectorElementType(); + DTyOp = DAG.getValueType(EltVT); + STyOp = DAG.getValueType(InEltVT); + + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) { + SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i)); + Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + MVT VT = N->getValueType(0); + MVT WidenVT = TLI.getTypeToTransformTo(VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + SDValue Idx = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + + MVT InVT = InOp.getValueType(); + + ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx); + if (CIdx) { + unsigned IdxVal = CIdx->getZExtValue(); + // Check if we can just return the input vector after widening. + if (IdxVal == 0 && InVT == WidenVT) + return InOp; + + // Check if we can extract from the vector. + unsigned InNumElts = InVT.getVectorNumElements(); + if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); + } + + // We could try widening the input to the right length but for now, extract + // the original elements, fill the rest with undefs and build a vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + MVT EltVT = VT.getVectorElementType(); + MVT IdxVT = Idx.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned i; + if (CIdx) { + unsigned IdxVal = CIdx->getZExtValue(); + for (i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(IdxVal+i, IdxVT)); + } else { + Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx); + for (i=1; i < NumElts; ++i) { + SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + DAG.getConstant(i, IdxVT)); + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx); + } + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(), + InOp.getValueType(), InOp, + N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + MVT WidenVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + MVT LdVT = LD->getMemoryVT(); + DebugLoc dl = N->getDebugLoc(); + assert(LdVT.isVector() && WidenVT.isVector()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + const Value *SV = LD->getSrcValue(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SDValue Result; + SmallVector<SDValue, 16> LdChain; // Chain for the series of load + if (ExtType != ISD::NON_EXTLOAD) { + // For extension loads, we can not play the tricks of chopping legal + // vector types and bit cast it to the right type. Instead, we unroll + // the load and build a vector. + MVT EltVT = WidenVT.getVectorElementType(); + MVT LdEltVT = LdVT.getVectorElementType(); + unsigned NumElts = LdVT.getVectorNumElements(); + + // Load each element and widen + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Increment = LdEltVT.getSizeInBits() / 8; + Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, + LdEltVT, isVolatile, Align); + LdChain.push_back(Ops[0].getValue(1)); + unsigned i = 0, Offset = Increment; + for (i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, + SVOffset + Offset, LdEltVT, isVolatile, Align); + LdChain.push_back(Ops[i].getValue(1)); + } + + // Fill the rest with undefs + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i != WidenNumElts; ++i) + Ops[i] = UndefVal; + + Result = DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); + } else { + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + unsigned int LdWidth = LdVT.getSizeInBits(); + Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset, + Align, isVolatile, LdWidth, WidenVT, dl); + } + + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LdChain[0], + LdChain.size()); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); + + return Result; +} + +SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(), + WidenVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue Cond1 = N->getOperand(0); + MVT CondVT = Cond1.getValueType(); + if (CondVT.isVector()) { + MVT CondEltVT = CondVT.getVectorElementType(); + MVT CondWidenVT = MVT::getVectorVT(CondEltVT, WidenNumElts); + if (getTypeAction(CondVT) == WidenVector) + Cond1 = GetWidenedVector(Cond1); + + if (Cond1.getValueType() != CondWidenVT) + Cond1 = ModifyToType(Cond1, CondWidenVT); + } + + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDValue InOp2 = GetWidenedVector(N->getOperand(2)); + assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + WidenVT, Cond1, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { + SDValue InOp1 = GetWidenedVector(N->getOperand(2)); + SDValue InOp2 = GetWidenedVector(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + InOp1.getValueType(), N->getOperand(0), + N->getOperand(1), InOp1, InOp2, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + return DAG.getUNDEF(WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + MVT WidenVT = TLI.getTypeToTransformTo(VT); + unsigned NumElts = VT.getVectorNumElements(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Adjust mask based on new input vector length. + SmallVector<int, 16> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = N->getMaskElt(i); + if (Idx < (int)NumElts) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); + } + for (unsigned i = NumElts; i != WidenNumElts; ++i) + NewMask.push_back(-1); + return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = N->getOperand(0); + MVT InVT = InOp1.getValueType(); + assert(InVT.isVector() && "can not widen non vector type"); + MVT WidenInVT = MVT::getVectorVT(InVT.getVectorElementType(), WidenNumElts); + InOp1 = GetWidenedVector(InOp1); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Assume that the input and output will be widen appropriately. If not, + // we will have to unroll it at some point. + assert(InOp1.getValueType() == WidenInVT && + InOp2.getValueType() == WidenInVT && + "Input not widened to expected type!"); + return DAG.getNode(ISD::VSETCC, N->getDebugLoc(), + WidenVT, InOp1, InOp2, N->getOperand(2)); +} + + +//===----------------------------------------------------------------------===// +// Widen Vector Operand +//===----------------------------------------------------------------------===// +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Widen node operand " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "WidenVectorOperand op #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to widen this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::STORE: Res = WidenVecOp_STORE(N); break; + + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: Res = WidenVecOp_Convert(N); break; + } + + // If Res is null, the sub-method took care of registering the result. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { + // Since the result is legal and the input is illegal, it is unlikely + // that we can fix the input to a legal type so unroll the convert + // into some scalar code and create a nasty build vector. + MVT VT = N->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + unsigned NumElts = VT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + MVT InVT = InOp.getValueType(); + MVT InEltVT = InVT.getVectorElementType(); + + unsigned Opcode = N->getOpcode(); + SmallVector<SDValue, 16> Ops(NumElts); + for (unsigned i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i))); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { + MVT VT = N->getValueType(0); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT InWidenVT = InOp.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Check if we can convert between two legal vector types and extract. + unsigned InWidenSize = InWidenVT.getSizeInBits(); + unsigned Size = VT.getSizeInBits(); + if (InWidenSize % Size == 0 && !VT.isVector()) { + unsigned NewNumElts = InWidenSize / Size; + MVT NewVT = MVT::getVectorVT(VT, NewNumElts); + if (TLI.isTypeLegal(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, + DAG.getIntPtrConstant(0)); + } + } + + // Lower the bit-convert to a store/load from the stack. Create the stack + // frame object. Make sure it is aligned for both the source and destination + // types. + SDValue FIPtr = DAG.CreateStackTemporary(InWidenVT, VT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, Store, FIPtr, SV, 0); +} + +SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { + // If the input vector is not legal, it is likely that we will not find a + // legal vector of the same size. Replace the concatenate vector with a + // nasty build vector. + MVT VT = N->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + + MVT InVT = N->getOperand(0).getValueType(); + unsigned NumInElts = InVT.getVectorNumElements(); + + unsigned Idx = 0; + unsigned NumOperands = N->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT EltVT = InOp.getValueType().getVectorElementType(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + EltVT, InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { + // We have to widen the value but we want only to store the original + // vector type. + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + const Value *SV = ST->getSrcValue(); + int SVOffset = ST->getSrcValueOffset(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + DebugLoc dl = N->getDebugLoc(); + + MVT StVT = ST->getMemoryVT(); + MVT ValVT = ValOp.getValueType(); + // It must be true that we the widen vector type is bigger than where + // we need to store. + assert(StVT.isVector() && ValOp.getValueType().isVector()); + assert(StVT.bitsLT(ValOp.getValueType())); + + SmallVector<SDValue, 16> StChain; + if (ST->isTruncatingStore()) { + // For truncating stores, we can not play the tricks of chopping legal + // vector types and bit cast it to the right type. Instead, we unroll + // the store. + MVT StEltVT = StVT.getVectorElementType(); + MVT ValEltVT = ValVT.getVectorElementType(); + unsigned Increment = ValEltVT.getSizeInBits() / 8; + unsigned NumElts = StVT.getVectorNumElements(); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, + SVOffset, StEltVT, + isVolatile, Align)); + unsigned Offset = Increment; + for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, + SVOffset + Offset, StEltVT, + isVolatile, MinAlign(Align, Offset))); + } + } + else { + assert(StVT.getVectorElementType() == ValVT.getVectorElementType()); + // Store value + GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset, + Align, isVolatile, ValOp, StVT.getSizeInBits(), dl); + } + if (StChain.size() == 1) + return StChain[0]; + else + return DAG.getNode(ISD::TokenFactor, dl, + MVT::Other,&StChain[0],StChain.size()); +} + +//===----------------------------------------------------------------------===// +// Vector Widening Utilities +//===----------------------------------------------------------------------===// + + +// Utility function to find a vector type and its associated element +// type from a preferred width and whose vector type must be the same size +// as the VecVT. +// TLI: Target lowering used to determine legal types. +// Width: Preferred width to store. +// VecVT: Vector value type whose size we must match. +// Returns NewVecVT and NewEltVT - the vector type and its associated +// element type. +static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width, + MVT VecVT, + MVT& NewEltVT, MVT& NewVecVT) { + unsigned EltWidth = Width + 1; + if (TLI.isTypeLegal(VecVT)) { + // We start with the preferred with, making it a power of 2 and find a + // legal vector type of that width. If not, we reduce it by another of 2. + // For incoming type is legal, this process will end as a vector of the + // smallest loadable type should always be legal. + do { + assert(EltWidth > 0); + EltWidth = 1 << Log2_32(EltWidth - 1); + NewEltVT = MVT::getIntegerVT(EltWidth); + unsigned NumElts = VecVT.getSizeInBits() / EltWidth; + NewVecVT = MVT::getVectorVT(NewEltVT, NumElts); + } while (!TLI.isTypeLegal(NewVecVT) || + VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); + } else { + // The incoming vector type is illegal and is the result of widening + // a vector to a power of 2. In this case, we will use the preferred + // with as long as it is a multiple of the incoming vector length. + // The legalization process will eventually make this into a legal type + // and remove the illegal bit converts (which would turn to stack converts + // if they are allow to exist). + do { + assert(EltWidth > 0); + EltWidth = 1 << Log2_32(EltWidth - 1); + NewEltVT = MVT::getIntegerVT(EltWidth); + unsigned NumElts = VecVT.getSizeInBits() / EltWidth; + NewVecVT = MVT::getVectorVT(NewEltVT, NumElts); + } while (!TLI.isTypeLegal(NewEltVT) || + VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); + } +} + +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, + SDValue Chain, + SDValue BasePtr, + const Value *SV, + int SVOffset, + unsigned Alignment, + bool isVolatile, + unsigned LdWidth, + MVT ResType, + DebugLoc dl) { + // The strategy assumes that we can efficiently load powers of two widths. + // The routines chops the vector into the largest power of 2 load and + // can be inserted into a legal vector and then cast the result into the + // vector type we want. This avoids unnecessary stack converts. + + // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and + // the load is nonvolatile, we an use a wider load for the value. + + // Find the vector type that can load from. + MVT NewEltVT, NewVecVT; + unsigned NewEltVTWidth; + FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT); + NewEltVTWidth = NewEltVT.getSizeInBits(); + + SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset, + isVolatile, Alignment); + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); + LdChain.push_back(LdOp.getValue(1)); + + // Check if we can load the element with one instruction + if (LdWidth == NewEltVTWidth) { + return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp); + } + + unsigned Idx = 1; + LdWidth -= NewEltVTWidth; + unsigned Offset = 0; + + while (LdWidth > 0) { + unsigned Increment = NewEltVTWidth / 8; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + + if (LdWidth < NewEltVTWidth) { + // Our current type we are using is too large, use a smaller size by + // using a smaller power of 2 + unsigned oNewEltVTWidth = NewEltVTWidth; + FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT); + NewEltVTWidth = NewEltVT.getSizeInBits(); + // Readjust position and vector position based on new load type + Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); + VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); + } + + SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, + SVOffset+Offset, isVolatile, + MinAlign(Alignment, Offset)); + LdChain.push_back(LdOp.getValue(1)); + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOp, + DAG.getIntPtrConstant(Idx++)); + + LdWidth -= NewEltVTWidth; + } + + return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp); +} + +void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, + SDValue Chain, + SDValue BasePtr, + const Value *SV, + int SVOffset, + unsigned Alignment, + bool isVolatile, + SDValue ValOp, + unsigned StWidth, + DebugLoc dl) { + // Breaks the stores into a series of power of 2 width stores. For any + // width, we convert the vector to the vector of element size that we + // want to store. This avoids requiring a stack convert. + + // Find a width of the element type we can store with + MVT WidenVT = ValOp.getValueType(); + MVT NewEltVT, NewVecVT; + + FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT); + unsigned NewEltVTWidth = NewEltVT.getSizeInBits(); + + SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp, + DAG.getIntPtrConstant(0)); + SDValue StOp = DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset, + isVolatile, Alignment); + StChain.push_back(StOp); + + // Check if we are done + if (StWidth == NewEltVTWidth) { + return; + } + + unsigned Idx = 1; + StWidth -= NewEltVTWidth; + unsigned Offset = 0; + + while (StWidth > 0) { + unsigned Increment = NewEltVTWidth / 8; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + + if (StWidth < NewEltVTWidth) { + // Our current type we are using is too large, use a smaller size by + // using a smaller power of 2 + unsigned oNewEltVTWidth = NewEltVTWidth; + FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT); + NewEltVTWidth = NewEltVT.getSizeInBits(); + // Readjust position and vector position based on new load type + Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); + VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); + } + + EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp, + DAG.getIntPtrConstant(Idx++)); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, + SVOffset + Offset, isVolatile, + MinAlign(Alignment, Offset))); + StWidth -= NewEltVTWidth; + } +} + +/// Modifies a vector input (widen or narrows) to a vector of NVT. The +/// input vector must have the same element type as NVT. +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) { + // Note that InOp might have been widened so it might already have + // the right width or it might need be narrowed. + MVT InVT = InOp.getValueType(); + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + DebugLoc dl = InOp.getDebugLoc(); + + // Check if InOp already has the right width. + if (InVT == NVT) + return InOp; + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { + unsigned NumConcat = WidenNumElts / InNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat); + } + + if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, + DAG.getIntPtrConstant(0)); + + // Fall back to extract and build. + SmallVector<SDValue, 16> Ops(WidenNumElts); + MVT EltVT = NVT.getVectorElementType(); + unsigned MinNumElts = std::min(WidenNumElts, InNumElts); + unsigned Idx; + for (Idx = 0; Idx < MinNumElts; ++Idx) + Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(Idx)); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for ( ; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts); +} diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile new file mode 100644 index 0000000..185222a --- /dev/null +++ b/lib/CodeGen/SelectionDAG/Makefile @@ -0,0 +1,15 @@ +##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMSelectionDAG +PARALLEL_DIRS = +BUILD_ARCHIVE = 1 +DONT_BUILD_RELINKED = 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp new file mode 100644 index 0000000..af73b28 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -0,0 +1,635 @@ +//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a fast scheduler. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical copies"); + +static RegisterScheduler + fastDAGScheduler("fast", "Fast suboptimal list scheduling", + createFastDAGScheduler); + +namespace { + /// FastPriorityQueue - A degenerate priority queue that considers + /// all nodes to have the same priority. + /// + struct VISIBILITY_HIDDEN FastPriorityQueue { + SmallVector<SUnit *, 16> Queue; + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + Queue.push_back(U); + } + + SUnit *pop() { + if (empty()) return NULL; + SUnit *V = Queue.back(); + Queue.pop_back(); + return V; + } + }; + +//===----------------------------------------------------------------------===// +/// ScheduleDAGFast - The actual "fast" list scheduler implementation. +/// +class VISIBILITY_HIDDEN ScheduleDAGFast : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + FastPriorityQueue AvailableQueue; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<unsigned> LiveRegCycles; + +public: + ScheduleDAGFast(MachineFunction &mf) + : ScheduleDAGSDNodes(mf) {} + + void Schedule(); + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + void AddPred(SUnit *SU, const SDep &D) { + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + void RemovePred(SUnit *SU, const SDep &D) { + SU->removePred(D); + } + +private: + void ReleasePred(SUnit *SU, SDep *PredEdge); + void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ScheduleNodeBottomUp(SUnit*, unsigned); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector<SUnit*, 2>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + void ListScheduleBottomUp(); + + /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool ForceUnitLatencies() const { return true; } +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGFast::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + NumLiveRegs = 0; + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegCycles.resize(TRI->getNumRegs(), 0); + + // Build the scheduling graph. + BuildSchedGraph(); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + // Execute the actual scheduling loop. + ListScheduleBottomUp(); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + --PredSU->NumSuccsLeft; + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + AvailableQueue.push(PredSU); + } +} + +void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (!LiveRegDefs[I->getReg()]) { + ++NumLiveRegs; + LiveRegDefs[I->getReg()] = I->getSUnit(); + LiveRegCycles[I->getReg()] = CurCycle; + } + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleasePredecessors(SU, CurCycle); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == SU && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + } + + SU->isScheduled = true; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->getNode()->getFlaggedNode()) + return NULL; + + SDNode *N = SU->getNode(); + if (!N) + return NULL; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getValueType(i); + if (VT == MVT::Flag) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDValue &Op = N->getOperand(i); + MVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (VT == MVT::Flag) + return NULL; + } + + if (TryUnfold) { + SmallVector<SDNode*, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return NULL; + + DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + SUnit *NewSU = NewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + NewSU->isCommutable = true; + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = NewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + } + + SDep ChainPred; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPred = *I; + else if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + if (ChainPred.getSUnit()) { + RemovePred(SU, ChainPred); + if (isNewLoad) + AddPred(LoadSU, ChainPred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) { + AddPred(LoadSU, Pred); + } + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + if (isNewLoad) { + AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); + } + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + NewSU = Clone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVector<SUnit*, 2> &Copies) { + SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL)); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL)); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + RemovePred(DelDeps[i].first, DelDeps[i].second); + } + + AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); + AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = TID.getNumDefs(); + for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + return N->getValueType(NumRes); +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, + SmallVector<unsigned, 4> &LRegs){ + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + unsigned Reg = I->getReg(); + if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) { + if (RegAdded.insert(Reg)) + LRegs.push_back(Reg); + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); + *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) { + if (RegAdded.insert(*Alias)) + LRegs.push_back(*Alias); + } + } + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + if (!Node->isMachineOpcode()) + continue; + const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); + if (!TID.ImplicitDefs) + continue; + for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { + if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) { + if (RegAdded.insert(*Reg)) + LRegs.push_back(*Reg); + } + for (const unsigned *Alias = TRI->getAliasSet(*Reg); + *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { + if (RegAdded.insert(*Alias)) + LRegs.push_back(*Alias); + } + } + } + return !LRegs.empty(); +} + + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGFast::ListScheduleBottomUp() { + unsigned CurCycle = 0; + + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU, CurCycle); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue.push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + SmallVector<SUnit*, 4> NotReady; + DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty()) { + bool Delayed = false; + LRegsMap.clear(); + SUnit *CurSU = AvailableQueue.pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + Delayed = true; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + NotReady.push_back(CurSU); + CurSU = AvailableQueue.pop(); + } + + // All candidates are delayed due to live physical reg dependencies. + // Try code duplication or inserting cross class copies + // to resolve it. + if (Delayed && !CurSU) { + if (!CurSU) { + // Try duplicating the nodes that produces these + // "expensive to copy" values to break the dependency. In case even + // that doesn't work, insert cross class copies. + SUnit *TrySU = NotReady[0]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DOUT << "Adding an edge from SU # " << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"; + AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + NewDef = Copies.back(); + } + + DOUT << "Adding an edge from SU # " << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"; + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + if (!CurSU) { + assert(false && "Unable to resolve live physical register dependencies!"); + abort(); + } + } + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + // May no longer be available due to backtracking. + if (NotReady[i]->isAvailable) + AvailableQueue.push(NotReady[i]); + } + NotReady.clear(); + + if (CurSU) + ScheduleNodeBottomUp(CurSU, CurCycle); + ++CurCycle; + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + + +#ifndef NDEBUG + // Verify that all SUnits were scheduled. + bool AnyNotSched = false; + unsigned DeadNodes = 0; + unsigned Noops = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (!SUnits[i].isScheduled) { + if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { + ++DeadNodes; + continue; + } + if (!AnyNotSched) + cerr << "*** List scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has not been scheduled!\n"; + AnyNotSched = true; + } + if (SUnits[i].NumSuccsLeft != 0) { + if (!AnyNotSched) + cerr << "*** List scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has successors left!\n"; + AnyNotSched = true; + } + } + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(!AnyNotSched); + assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && + "The number of nodes scheduled doesn't match the expected number!"); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGFast(*IS->MF); +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp new file mode 100644 index 0000000..c432534 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -0,0 +1,268 @@ +//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/Statistic.h" +#include <climits> +using namespace llvm; + +STATISTIC(NumNoops , "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); + +static RegisterScheduler + tdListDAGScheduler("list-td", "Top-down list scheduler", + createTDListDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGList - The actual list scheduler implementation. This supports +/// top-down scheduling. +/// +class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands become available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + +public: + ScheduleDAGList(MachineFunction &mf, + SchedulingPriorityQueue *availqueue, + ScheduleHazardRecognizer *HR) + : ScheduleDAGSDNodes(mf), + AvailableQueue(availqueue), HazardRec(HR) { + } + + ~ScheduleDAGList() { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule(); + +private: + void ReleaseSucc(SUnit *SU, const SDep &D); + void ReleaseSuccessors(SUnit *SU); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); +}; +} // end anonymous namespace + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGList::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + // Build the scheduling graph. + BuildSchedGraph(); + + AvailableQueue->initNodes(SUnits); + + ListScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { + SUnit *SuccSU = D.getSUnit(); + --SuccSU->NumPredsLeft; + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + PendingQueue.push_back(SuccSU); +} + +void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { + // Top down: release successors. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-td scheduler doesn't yet support physreg dependencies!"); + + ReleaseSucc(SU, *I); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() == CurCycle) { + AvailableQueue->push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } else { + assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); + } + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue->empty()) { + ++CurCycle; + continue; + } + + SUnit *FoundSUnit = 0; + + bool HasNoopHazards = false; + while (!AvailableQueue->empty()) { + SUnit *CurSUnit = AvailableQueue->pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue->push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule, do it now. + if (FoundSUnit) { + ScheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, just advance + // the current cycle and try again. + DOUT << "*** Advancing cycle, no work to do\n"; + HazardRec->AdvanceCycle(); + ++NumStalls; + ++CurCycle; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DOUT << "*** Emitting noop\n"; + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + ++CurCycle; + } + } + +#ifndef NDEBUG + VerifySchedule(/*isBottomUp=*/false); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +/// createTDListDAGScheduler - This creates a top-down list scheduler with a +/// new hazard recognizer. This scheduler takes ownership of the hazard +/// recognizer and deletes it when done. +ScheduleDAGSDNodes * +llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGList(*IS->MF, + new LatencyPriorityQueue(), + IS->CreateTargetHazardRecognizer()); +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp new file mode 100644 index 0000000..c97e2a8 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -0,0 +1,1533 @@ +//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements bottom-up and top-down register pressure reduction list +// schedulers, using standard algorithms. The basic approach uses a priority +// queue of available nodes to schedule. One at a time, nodes are taken from +// the priority queue (thus in priority order), checked for legality to +// schedule, and emitted if legal. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <climits> +using namespace llvm; + +STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical register copies"); + +static RegisterScheduler + burrListDAGScheduler("list-burr", + "Bottom-up register reduction list scheduling", + createBURRListDAGScheduler); +static RegisterScheduler + tdrListrDAGScheduler("list-tdrr", + "Top-down register reduction list scheduling", + createTDRRListDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGRRList - The actual register reduction list scheduler +/// implementation. This supports both top-down and bottom-up scheduling. +/// +class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes { +private: + /// isBottomUp - This is true if the scheduling problem is bottom-up, false if + /// it is top-down. + bool isBottomUp; + + /// AvailableQueue - The priority queue to use for the available SUnits. + SchedulingPriorityQueue *AvailableQueue; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<unsigned> LiveRegCycles; + + /// Topo - A topological ordering for SUnits which permits fast IsReachable + /// and similar queries. + ScheduleDAGTopologicalSort Topo; + +public: + ScheduleDAGRRList(MachineFunction &mf, + bool isbottomup, + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), + AvailableQueue(availqueue), Topo(SUnits) { + } + + ~ScheduleDAGRRList() { + delete AvailableQueue; + } + + void Schedule(); + + /// IsReachable - Checks if SU is reachable from TargetSU. + bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { + return Topo.IsReachable(SU, TargetSU); + } + + /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will + /// create a cycle. + bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + return Topo.WillCreateCycle(SU, TargetSU); + } + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + /// Updates the topological ordering if required. + void AddPred(SUnit *SU, const SDep &D) { + Topo.AddPred(SU, D.getSUnit()); + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + /// Updates the topological ordering if required. + void RemovePred(SUnit *SU, const SDep &D) { + Topo.RemovePred(SU, D.getSUnit()); + SU->removePred(D); + } + +private: + void ReleasePred(SUnit *SU, const SDep *PredEdge); + void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); + void ReleaseSuccessors(SUnit *SU); + void CapturePred(SDep *PredEdge); + void ScheduleNodeBottomUp(SUnit*, unsigned); + void ScheduleNodeTopDown(SUnit*, unsigned); + void UnscheduleNodeBottomUp(SUnit*); + void BacktrackBottomUp(SUnit*, unsigned, unsigned&); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector<SUnit*, 2>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + void ListScheduleTopDown(); + void ListScheduleBottomUp(); + + + /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. + /// Updates the topological ordering if required. + SUnit *CreateNewSUnit(SDNode *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = NewSUnit(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// CreateClone - Creates a new SUnit from an existing one. + /// Updates the topological ordering if required. + SUnit *CreateClone(SUnit *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = Clone(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// ForceUnitLatencies - Return true, since register-pressure-reducing + /// scheduling doesn't need actual latency information. + bool ForceUnitLatencies() const { return true; } +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGRRList::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + NumLiveRegs = 0; + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegCycles.resize(TRI->getNumRegs(), 0); + + // Build the scheduling graph. + BuildSchedGraph(); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + Topo.InitDAGTopologicalSorting(); + + AvailableQueue->initNodes(SUnits); + + // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. + if (isBottomUp) + ListScheduleBottomUp(); + else + ListScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + --PredSU->NumSuccsLeft; + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + AvailableQueue->push(PredSU); + } +} + +void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (!LiveRegDefs[I->getReg()]) { + ++NumLiveRegs; + LiveRegDefs[I->getReg()] = I->getSUnit(); + LiveRegCycles[I->getReg()] = CurCycle; + } + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleasePredecessors(SU, CurCycle); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == SU && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + } + + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// CapturePred - This does the opposite of ReleasePred. Since SU is being +/// unscheduled, incrcease the succ left count of its predecessors. Remove +/// them from AvailableQueue if necessary. +void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + if (PredSU->isAvailable) { + PredSU->isAvailable = false; + if (!PredSU->isPending) + AvailableQueue->remove(PredSU); + } + + ++PredSU->NumSuccsLeft; +} + +/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and +/// its predecessor states to reflect the change. +void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { + DOUT << "*** Unscheduling [" << SU->getHeight() << "]: "; + DEBUG(SU->dump(this)); + + AvailableQueue->UnscheduledNode(SU); + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + CapturePred(&*I); + if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == I->getSUnit() && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (!LiveRegDefs[I->getReg()]) { + LiveRegDefs[I->getReg()] = SU; + ++NumLiveRegs; + } + if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) + LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); + } + } + + SU->setHeightDirty(); + SU->isScheduled = false; + SU->isAvailable = true; + AvailableQueue->push(SU); +} + +/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in +/// BTCycle in order to schedule a specific node. +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, + unsigned &CurCycle) { + SUnit *OldSU = NULL; + while (CurCycle > BtCycle) { + OldSU = Sequence.back(); + Sequence.pop_back(); + if (SU->isSucc(OldSU)) + // Don't try to remove SU from AvailableQueue. + SU->isAvailable = false; + UnscheduleNodeBottomUp(OldSU); + --CurCycle; + } + + assert(!SU->isSucc(OldSU) && "Something is wrong!"); + + ++NumBacktracks; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->getNode()->getFlaggedNode()) + return NULL; + + SDNode *N = SU->getNode(); + if (!N) + return NULL; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getValueType(i); + if (VT == MVT::Flag) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDValue &Op = N->getOperand(i); + MVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (VT == MVT::Flag) + return NULL; + } + + if (TryUnfold) { + SmallVector<SDNode*, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return NULL; + + DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = CreateNewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + ComputeLatency(LoadSU); + } + + SUnit *NewSU = CreateNewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + NewSU->isCommutable = true; + ComputeLatency(NewSU); + + // Record all the edges to and from the old SU, by category. + SmallVector<SDep, 4> ChainPreds; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPreds.push_back(*I); + else if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + // Now assign edges to the newly-created nodes. + for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { + const SDep &Pred = ChainPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + + // Add a data dependency to reflect that NewSU reads the value defined + // by LoadSU. + AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); + + if (isNewLoad) + AvailableQueue->addNode(LoadSU); + AvailableQueue->addNode(NewSU); + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + NewSU = CreateClone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(NewSU); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVector<SUnit*, 2> &Copies) { + SUnit *CopyFromSU = CreateNewSUnit(NULL); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = CreateNewSUnit(NULL); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); + AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(CopyFromSU); + AvailableQueue->addNode(CopyToSU); + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = TID.getNumDefs(); + for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + return N->getValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs, + const TargetRegisterInfo *TRI) { + bool Added = false; + if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { + if (RegAdded.insert(Reg)) { + LRegs.push_back(Reg); + Added = true; + } + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { + if (RegAdded.insert(*Alias)) { + LRegs.push_back(*Alias); + Added = true; + } + } + return Added; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, + SmallVector<unsigned, 4> &LRegs){ + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + RegAdded, LRegs, TRI); + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + if (Node->getOpcode() == ISD::INLINEASM) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + for (unsigned i = 2; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = (Flags & 0xffff) >> 3; + + ++i; // Skip the ID value. + if ((Flags & 7) == 2 || (Flags & 7) == 6) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } + + if (!Node->isMachineOpcode()) + continue; + const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); + if (!TID.ImplicitDefs) + continue; + for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) + CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + return !LRegs.empty(); +} + + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGRRList::ListScheduleBottomUp() { + unsigned CurCycle = 0; + + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU, CurCycle); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue->push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + SmallVector<SUnit*, 4> NotReady; + DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty()) { + bool Delayed = false; + LRegsMap.clear(); + SUnit *CurSU = AvailableQueue->pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + Delayed = true; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + NotReady.push_back(CurSU); + CurSU = AvailableQueue->pop(); + } + + // All candidates are delayed due to live physical reg dependencies. + // Try backtracking, code duplication, or inserting cross class copies + // to resolve it. + if (Delayed && !CurSU) { + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + SUnit *TrySU = NotReady[i]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + + // Try unscheduling up to the point where it's safe to schedule + // this node. + unsigned LiveCycle = CurCycle; + for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { + unsigned Reg = LRegs[j]; + unsigned LCycle = LiveRegCycles[Reg]; + LiveCycle = std::min(LiveCycle, LCycle); + } + SUnit *OldSU = Sequence[LiveCycle]; + if (!WillCreateCycle(TrySU, OldSU)) { + BacktrackBottomUp(TrySU, LiveCycle, CurCycle); + // Force the current node to be scheduled before the node that + // requires the physical reg dep. + if (OldSU->isAvailable) { + OldSU->isAvailable = false; + AvailableQueue->remove(OldSU); + } + AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + // If one or more successors has been unscheduled, then the current + // node is no longer avaialable. Schedule a successor that's now + // available instead. + if (!TrySU->isAvailable) + CurSU = AvailableQueue->pop(); + else { + CurSU = TrySU; + TrySU->isPending = false; + NotReady.erase(NotReady.begin()+i); + } + break; + } + } + + if (!CurSU) { + // Can't backtrack. If it's too expensive to copy the value, then try + // duplicate the nodes that produces these "too expensive to copy" + // values to break the dependency. In case even that doesn't work, + // insert cross class copies. + // If it's not too expensive, i.e. cost != -1, issue copies. + SUnit *TrySU = NotReady[0]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DOUT << "Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"; + AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + NewDef = Copies.back(); + } + + DOUT << "Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"; + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + assert(CurSU && "Unable to resolve live physical register dependencies!"); + } + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + // May no longer be available due to backtracking. + if (NotReady[i]->isAvailable) + AvailableQueue->push(NotReady[i]); + } + NotReady.clear(); + + if (CurSU) + ScheduleNodeBottomUp(CurSU, CurCycle); + ++CurCycle; + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG + VerifySchedule(isBottomUp); +#endif +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + --SuccSU->NumPredsLeft; + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { + SuccSU->isAvailable = true; + AvailableQueue->push(SuccSU); + } +} + +void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { + // Top down: release successors + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-tdrr scheduler doesn't yet support physreg dependencies!"); + + ReleaseSucc(SU, &*I); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGRRList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty()) { + SUnit *CurSU = AvailableQueue->pop(); + + if (CurSU) + ScheduleNodeTopDown(CurSU, CurCycle); + ++CurCycle; + } + +#ifndef NDEBUG + VerifySchedule(isBottomUp); +#endif +} + + +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Implementation +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// +namespace { + template<class SF> + class RegReductionPriorityQueue; + + /// Sorting functions for the Available queue. + struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; + bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} + bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; + td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} + td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; +} // end anonymous namespace + +/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. +/// Smaller number is the higher priority. +static unsigned +CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { + unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; + if (SethiUllmanNumber != 0) + return SethiUllmanNumber; + + unsigned Extra = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + SUnit *PredSU = I->getSUnit(); + unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber) + ++Extra; + } + + SethiUllmanNumber += Extra; + + if (SethiUllmanNumber == 0) + SethiUllmanNumber = 1; + + return SethiUllmanNumber; +} + +namespace { + template<class SF> + class VISIBILITY_HIDDEN RegReductionPriorityQueue + : public SchedulingPriorityQueue { + PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; + unsigned currentQueueId; + + protected: + // SUnits - The SUnits for the current graph. + std::vector<SUnit> *SUnits; + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + ScheduleDAGRRList *scheduleDAG; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector<unsigned> SethiUllmanNumbers; + + public: + RegReductionPriorityQueue(const TargetInstrInfo *tii, + const TargetRegisterInfo *tri) : + Queue(SF(this)), currentQueueId(0), + TII(tii), TRI(tri), scheduleDAG(NULL) {} + + void initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + } + + void addNode(const SUnit *SU) { + unsigned SUSize = SethiUllmanNumbers.size(); + if (SUnits->size() > SUSize) + SethiUllmanNumbers.resize(SUSize*2, 0); + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); + } + + void updateNode(const SUnit *SU) { + SethiUllmanNumbers[SU->NodeNum] = 0; + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); + } + + void releaseState() { + SUnits = 0; + SethiUllmanNumbers.clear(); + } + + unsigned getNodePriority(const SUnit *SU) const { + assert(SU->NodeNum < SethiUllmanNumbers.size()); + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return 0; + if (Opc == TargetInstrInfo::EXTRACT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG || + Opc == TargetInstrInfo::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return 0; + if (SU->NumSuccs == 0 && SU->NumPreds != 0) + // If SU does not have a register use, i.e. it doesn't produce a value + // that would be consumed (e.g. store), then it terminates a chain of + // computation. Give it a large SethiUllman number so it will be + // scheduled right before its predecessors that it doesn't lengthen + // their live ranges. + return 0xffff; + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return 0; + return SethiUllmanNumbers[SU->NodeNum]; + } + + unsigned size() const { return Queue.size(); } + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + assert(!U->NodeQueueId && "Node in the queue already"); + U->NodeQueueId = ++currentQueueId; + Queue.push(U); + } + + void push_all(const std::vector<SUnit *> &Nodes) { + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) + push(Nodes[i]); + } + + SUnit *pop() { + if (empty()) return NULL; + SUnit *V = Queue.top(); + Queue.pop(); + V->NodeQueueId = 0; + return V; + } + + void remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + assert(SU->NodeQueueId != 0 && "Not in queue!"); + Queue.erase_one(SU); + SU->NodeQueueId = 0; + } + + void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { + scheduleDAG = scheduleDag; + } + + protected: + bool canClobber(const SUnit *SU, const SUnit *Op); + void AddPseudoTwoAddrDeps(); + void PrescheduleNodesWithMultipleUses(); + void CalculateSethiUllmanNumbers(); + }; + + typedef RegReductionPriorityQueue<bu_ls_rr_sort> + BURegReductionPriorityQueue; + + typedef RegReductionPriorityQueue<td_ls_rr_sort> + TDRegReductionPriorityQueue; +} + +/// closestSucc - Returns the scheduled cycle of the successor which is +/// closest to the current cycle. +static unsigned closestSucc(const SUnit *SU) { + unsigned MaxHeight = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain succs + unsigned Height = I->getSUnit()->getHeight(); + // If there are bunch of CopyToRegs stacked up, they should be considered + // to be at the same position. + if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) + Height = closestSucc(I->getSUnit())+1; + if (Height > MaxHeight) + MaxHeight = Height; + } + return MaxHeight; +} + +/// calcMaxScratches - Returns an cost estimate of the worse case requirement +/// for scratch registers, i.e. number of data dependencies. +static unsigned calcMaxScratches(const SUnit *SU) { + unsigned Scratches = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + Scratches++; + } + return Scratches; +} + +// Bottom up +bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + if (LPriority != RPriority) + return LPriority > RPriority; + + // Try schedule def + use closer when Sethi-Ullman numbers are the same. + // e.g. + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // and the following instructions are both ready. + // t2 = op c3 + // t4 = op c4 + // + // Then schedule t2 = op first. + // i.e. + // t4 = op c4 + // t2 = op c3 + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // This creates more short live intervals. + unsigned LDist = closestSucc(left); + unsigned RDist = closestSucc(right); + if (LDist != RDist) + return LDist < RDist; + + // How many registers becomes live when the node is scheduled. + unsigned LScratch = calcMaxScratches(left); + unsigned RScratch = calcMaxScratches(right); + if (LScratch != RScratch) + return LScratch > RScratch; + + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + + assert(left->NodeQueueId && right->NodeQueueId && + "NodeQueueId cannot be zero"); + return (left->NodeQueueId > right->NodeQueueId); +} + +template<class SF> +bool +RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { + if (SU->isTwoAddress) { + unsigned Opc = SU->getNode()->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + unsigned NumRes = TID.getNumDefs(); + unsigned NumOps = TID.getNumOperands() - NumRes; + for (unsigned i = 0; i != NumOps; ++i) { + if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { + SDNode *DU = SU->getNode()->getOperand(i).getNode(); + if (DU->getNodeId() != -1 && + Op->OrigNode == &(*SUnits)[DU->getNodeId()]) + return true; + } + } + } + return false; +} + + +/// hasCopyToRegUse - Return true if SU has a value successor that is a +/// CopyToReg node. +static bool hasCopyToRegUse(const SUnit *SU) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *SuccSU = I->getSUnit(); + if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) + return true; + } + return false; +} + +/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's +/// physical register defs. +static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SDNode *N = SuccSU->getNode(); + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + assert(ImpDefs && "Caller should check hasPhysRegDefs"); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getFlaggedNode()) { + if (!SUNode->isMachineOpcode()) + continue; + const unsigned *SUImpDefs = + TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); + if (!SUImpDefs) + return false; + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getValueType(i); + if (VT == MVT::Flag || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned Reg = ImpDefs[i - NumDefs]; + for (;*SUImpDefs; ++SUImpDefs) { + unsigned SUReg = *SUImpDefs; + if (TRI->regsOverlap(Reg, SUReg)) + return true; + } + } + } + return false; +} + +/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses +/// are not handled well by the general register pressure reduction +/// heuristics. When presented with code like this: +/// +/// N +/// / | +/// / | +/// U store +/// | +/// ... +/// +/// the heuristics tend to push the store up, but since the +/// operand of the store has another use (U), this would increase +/// the length of that other use (the U->N edge). +/// +/// This function transforms code like the above to route U's +/// dependence through the store when possible, like this: +/// +/// N +/// || +/// || +/// store +/// | +/// U +/// | +/// ... +/// +/// This results in the store being scheduled immediately +/// after N, which shortens the U->N live range, reducing +/// register pressure. +/// +template<class SF> +void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { + // Visit all the nodes in topological order, working top-down. + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + // For now, only look at nodes with no data successors, such as stores. + // These are especially important, due to the heuristics in + // getNodePriority for nodes with no data successors. + if (SU->NumSuccs != 0) + continue; + // For now, only look at nodes with exactly one data predecessor. + if (SU->NumPreds != 1) + continue; + // Avoid prescheduling copies to virtual registers, which don't behave + // like other nodes from the perspective of scheduling heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyToReg && + TargetRegisterInfo::isVirtualRegister + (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + // Locate the single data predecessor. + SUnit *PredSU = 0; + for (SUnit::const_pred_iterator II = SU->Preds.begin(), + EE = SU->Preds.end(); II != EE; ++II) + if (!II->isCtrl()) { + PredSU = II->getSUnit(); + break; + } + assert(PredSU); + + // Don't rewrite edges that carry physregs, because that requires additional + // support infrastructure. + if (PredSU->hasPhysRegDefs) + continue; + // Short-circuit the case where SU is PredSU's only data successor. + if (PredSU->NumSuccs == 1) + continue; + // Avoid prescheduling to copies from virtual registers, which don't behave + // like other nodes from the perspective of scheduling // heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyFromReg && + TargetRegisterInfo::isVirtualRegister + (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + // Perform checks on the successors of PredSU. + for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), + EE = PredSU->Succs.end(); II != EE; ++II) { + SUnit *PredSuccSU = II->getSUnit(); + if (PredSuccSU == SU) continue; + // If PredSU has another successor with no data successors, for + // now don't attempt to choose either over the other. + if (PredSuccSU->NumSuccs == 0) + goto outer_loop_continue; + // Don't break physical register dependencies. + if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) + if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) + goto outer_loop_continue; + // Don't introduce graph cycles. + if (scheduleDAG->IsReachable(SU, PredSuccSU)) + goto outer_loop_continue; + } + + // Ok, the transformation is safe and the heuristics suggest it is + // profitable. Update the graph. + DOUT << "Prescheduling SU # " << SU->NodeNum + << " next to PredSU # " << PredSU->NodeNum + << " to guide scheduling in the presence of multiple uses\n"; + for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { + SDep Edge = PredSU->Succs[i]; + assert(!Edge.isAssignedRegDep()); + SUnit *SuccSU = Edge.getSUnit(); + if (SuccSU != SU) { + Edge.setSUnit(PredSU); + scheduleDAG->RemovePred(SuccSU, Edge); + scheduleDAG->AddPred(SU, Edge); + Edge.setSUnit(SU); + scheduleDAG->AddPred(SuccSU, Edge); + --i; + } + } + outer_loop_continue:; + } +} + +/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses +/// it as a def&use operand. Add a pseudo control edge from it to the other +/// node (if it won't create a cycle) so the two-address one will be scheduled +/// first (lower in the schedule). If both nodes are two-address, favor the +/// one that has a CopyToReg use (more likely to be a loop induction update). +/// If both are two-address, but one is commutable while the other is not +/// commutable, favor the one that's not commutable. +template<class SF> +void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + if (!SU->isTwoAddress) + continue; + + SDNode *Node = SU->getNode(); + if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) + continue; + + unsigned Opc = Node->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + unsigned NumRes = TID.getNumDefs(); + unsigned NumOps = TID.getNumOperands() - NumRes; + for (unsigned j = 0; j != NumOps; ++j) { + if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) + continue; + SDNode *DU = SU->getNode()->getOperand(j).getNode(); + if (DU->getNodeId() == -1) + continue; + const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; + if (!DUSU) continue; + for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), + E = DUSU->Succs.end(); I != E; ++I) { + if (I->isCtrl()) continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU == SU) + continue; + // Be conservative. Ignore if nodes aren't at roughly the same + // depth and height. + if (SuccSU->getHeight() < SU->getHeight() && + (SU->getHeight() - SuccSU->getHeight()) > 1) + continue; + // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge + // constrains whatever is using the copy, instead of the copy + // itself. In the case that the copy is coalesced, this + // preserves the intent of the pseudo two-address heurietics. + while (SuccSU->Succs.size() == 1 && + SuccSU->getNode()->isMachineOpcode() && + SuccSU->getNode()->getMachineOpcode() == + TargetInstrInfo::COPY_TO_REGCLASS) + SuccSU = SuccSU->Succs.front().getSUnit(); + // Don't constrain non-instruction nodes. + if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) + continue; + // Don't constrain nodes with physical register defs if the + // predecessor can clobber them. + if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { + if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) + continue; + } + // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; + // these may be coalesced away. We want them close to their uses. + unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); + if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || + SuccOpc == TargetInstrInfo::INSERT_SUBREG || + SuccOpc == TargetInstrInfo::SUBREG_TO_REG) + continue; + if ((!canClobber(SuccSU, DUSU) || + (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || + (!SU->isCommutable && SuccSU->isCommutable)) && + !scheduleDAG->IsReachable(SuccSU, SU)) { + DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum + << " to SU #" << SuccSU->NodeNum << "\n"; + scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + } + } + } + } +} + +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +template<class SF> +void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { + SethiUllmanNumbers.assign(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); +} + +/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled +/// predecessors of the successors of the SUnit SU. Stop when the provided +/// limit is exceeded. +static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, + unsigned Limit) { + unsigned Sum = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + const SUnit *SuccSU = I->getSUnit(); + for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), + EE = SuccSU->Preds.end(); II != EE; ++II) { + SUnit *PredSU = II->getSUnit(); + if (!PredSU->isScheduled) + if (++Sum > Limit) + return Sum; + } + } + return Sum; +} + + +// Top down +bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); + bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); + bool LIsFloater = LIsTarget && left->NumPreds == 0; + bool RIsFloater = RIsTarget && right->NumPreds == 0; + unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; + unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; + + if (left->NumSuccs == 0 && right->NumSuccs != 0) + return false; + else if (left->NumSuccs != 0 && right->NumSuccs == 0) + return true; + + if (LIsFloater) + LBonus -= 2; + if (RIsFloater) + RBonus -= 2; + if (left->NumSuccs == 1) + LBonus += 2; + if (right->NumSuccs == 1) + RBonus += 2; + + if (LPriority+LBonus != RPriority+RBonus) + return LPriority+LBonus < RPriority+RBonus; + + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + + if (left->NumSuccsLeft != right->NumSuccsLeft) + return left->NumSuccsLeft > right->NumSuccsLeft; + + assert(left->NodeQueueId && right->NodeQueueId && + "NodeQueueId cannot be zero"); + return (left->NodeQueueId > right->NodeQueueId); +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); + + ScheduleDAGRRList *SD = + new ScheduleDAGRRList(*IS->MF, true, PQ); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); + + ScheduleDAGRRList *SD = + new ScheduleDAGRRList(*IS->MF, false, PQ); + PQ->setScheduleDAG(SD); + return SD; +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp new file mode 100644 index 0000000..7aa15bc --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -0,0 +1,294 @@ +//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) + : ScheduleDAG(mf) { +} + +/// Run - perform scheduling. +/// +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, + MachineBasicBlock::iterator insertPos) { + DAG = dag; + ScheduleDAG::Run(bb, insertPos); +} + +SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { + SUnit *SU = NewSUnit(Old->getNode()); + SU->OrigNode = Old->OrigNode; + SU->Latency = Old->Latency; + SU->isTwoAddress = Old->isTwoAddress; + SU->isCommutable = Old->isCommutable; + SU->hasPhysRegDefs = Old->hasPhysRegDefs; + SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + Old->isCloned = true; + return SU; +} + +/// CheckForPhysRegDependency - Check if the dependency between def and use of +/// a specified operand is a physical register dependency. If so, returns the +/// register and the cost of copying the register. +static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, + unsigned &PhysReg, int &Cost) { + if (Op != 2 || User->getOpcode() != ISD::CopyToReg) + return; + + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return; + + unsigned ResNo = User->getOperand(2).getResNo(); + if (Def->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); + if (ResNo >= II.getNumDefs() && + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { + PhysReg = Reg; + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo)); + Cost = RC->getCopyCost(); + } + } +} + +void ScheduleDAGSDNodes::BuildSchedUnits() { + // During scheduling, the NodeId field of SDNode is used to map SDNodes + // to their associated SUnits by holding SUnits table indices. A value + // of -1 means the SDNode does not yet have an associated SUnit. + unsigned NumNodes = 0; + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + NI->setNodeId(-1); + ++NumNodes; + } + + // Reserve entries in the vector for each of the SUnits we are creating. This + // ensure that reallocation of the vector won't happen, so SUnit*'s won't get + // invalidated. + // FIXME: Multiply by 2 because we may clone nodes during scheduling. + // This is a temporary workaround. + SUnits.reserve(NumNodes * 2); + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = ForceUnitLatencies(); + + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. + continue; + + // If this node has already been processed, stop now. + if (NI->getNodeId() != -1) continue; + + SUnit *NodeSUnit = NewSUnit(NI); + + // See if anything is flagged to this node, if so, add them to flagged + // nodes. Nodes can have at most one flag input and one flag output. Flags + // are required to be the last operand and result of a node. + + // Scan up to find flagged preds. + SDNode *N = NI; + while (N->getNumOperands() && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { + N = N->getOperand(N->getNumOperands()-1).getNode(); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + } + + // Scan down to find any flagged succs. + N = NI; + while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { + SDValue FlagVal(N, N->getNumValues()-1); + + // There are either zero or one users of the Flag result. + bool HasFlagUse = false; + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) + if (FlagVal.isOperandOf(*UI)) { + HasFlagUse = true; + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + N = *UI; + break; + } + if (!HasFlagUse) break; + } + + // If there are flag operands involved, N is now the bottom-most node + // of the sequence of nodes that are flagged together. + // Update the SUnit. + NodeSUnit->setNode(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + + // Assign the Latency field of NodeSUnit using target-provided information. + if (UnitLatencies) + NodeSUnit->Latency = 1; + else + ComputeLatency(NodeSUnit); + } +} + +void ScheduleDAGSDNodes::AddSchedEdges() { + // Pass 2: add the preds, succs, etc. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + SUnit *SU = &SUnits[su]; + SDNode *MainNode = SU->getNode(); + + if (MainNode->isMachineOpcode()) { + unsigned Opc = MainNode->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + SU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + SU->isCommutable = true; + } + + // Find all predecessors and successors of the group. + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) { + if (N->isMachineOpcode() && + TII->get(N->getMachineOpcode()).getImplicitDefs()) { + SU->hasPhysRegClobbers = true; + unsigned NumUsed = CountResults(N); + while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) + --NumUsed; // Skip over unused values at the end. + if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) + SU->hasPhysRegDefs = true; + } + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *OpN = N->getOperand(i).getNode(); + if (isPassiveNode(OpN)) continue; // Not scheduled. + SUnit *OpSU = &SUnits[OpN->getNodeId()]; + assert(OpSU && "Node has no SUnit!"); + if (OpSU == SU) continue; // In the same group. + + MVT OpVT = N->getOperand(i).getValueType(); + assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); + bool isChain = OpVT == MVT::Other; + + unsigned PhysReg = 0; + int Cost = 1; + // Determine if this is a physical register dependency. + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); + assert((PhysReg == 0 || !isChain) && + "Chain dependence via physreg data?"); + // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler + // emits a copy from the physical register to a virtual register unless + // it requires a cross class copy (cost < 0). That means we are only + // treating "expensive to copy" register dependency as physical register + // dependency. This may change in the future though. + if (Cost >= 0) + PhysReg = 0; + SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data, + OpSU->Latency, PhysReg)); + } + } + } +} + +/// BuildSchedGraph - Build the SUnit graph from the selection dag that we +/// are input. This SUnit graph is similar to the SelectionDAG, but +/// excludes nodes that aren't interesting to scheduling, and represents +/// flagged together nodes with a single SUnit. +void ScheduleDAGSDNodes::BuildSchedGraph() { + // Populate the SUnits array. + BuildSchedUnits(); + // Compute all the scheduling dependencies between nodes. + AddSchedEdges(); +} + +void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes flagged together into this SUnit. + SU->Latency = 0; + bool SawMachineOpcode = false; + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) + if (N->isMachineOpcode()) { + SawMachineOpcode = true; + SU->Latency += + InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass()); + } +} + +/// CountResults - The results of target nodes have register or immediate +/// operands first, then an optional chain, and optional flag operands (which do +/// not go into the resulting MachineInstr). +unsigned ScheduleDAGSDNodes::CountResults(SDNode *Node) { + unsigned N = Node->getNumValues(); + while (N && Node->getValueType(N - 1) == MVT::Flag) + --N; + if (N && Node->getValueType(N - 1) == MVT::Other) + --N; // Skip over chain result. + return N; +} + +/// CountOperands - The inputs to target nodes have any actual inputs first, +/// followed by special operands that describe memory references, then an +/// optional chain operand, then an optional flag operand. Compute the number +/// of actual operands that will go into the resulting MachineInstr. +unsigned ScheduleDAGSDNodes::CountOperands(SDNode *Node) { + unsigned N = ComputeMemOperandsEnd(Node); + while (N && isa<MemOperandSDNode>(Node->getOperand(N - 1).getNode())) + --N; // Ignore MEMOPERAND nodes + return N; +} + +/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode +/// operand +unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) { + unsigned N = Node->getNumOperands(); + while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) + --N; + if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) + --N; // Ignore chain if it exists. + return N; +} + + +void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { + if (!SU->getNode()) { + cerr << "PHYS REG COPY\n"; + return; + } + + SU->getNode()->dump(DAG); + cerr << "\n"; + SmallVector<SDNode *, 4> FlaggedNodes; + for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + cerr << " "; + FlaggedNodes.back()->dump(DAG); + cerr << "\n"; + FlaggedNodes.pop_back(); + } +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h new file mode 100644 index 0000000..2a278b7 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -0,0 +1,179 @@ +//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScheduleDAGSDNodes class, which implements +// scheduling for an SDNode-based dependency graph. +// +//===----------------------------------------------------------------------===// + +#ifndef SCHEDULEDAGSDNODES_H +#define SCHEDULEDAGSDNODES_H + +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SelectionDAG.h" + +namespace llvm { + /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. + /// + /// Edges between SUnits are initially based on edges in the SelectionDAG, + /// and additional edges can be added by the schedulers as heuristics. + /// SDNodes such as Constants, Registers, and a few others that are not + /// interesting to schedulers are not allocated SUnits. + /// + /// SDNodes with MVT::Flag operands are grouped along with the flagged + /// nodes into a single SUnit so that they are scheduled together. + /// + /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output + /// edges. Physical register dependence information is not carried in + /// the DAG and must be handled explicitly by schedulers. + /// + class ScheduleDAGSDNodes : public ScheduleDAG { + public: + SelectionDAG *DAG; // DAG of the current basic block + + explicit ScheduleDAGSDNodes(MachineFunction &mf); + + virtual ~ScheduleDAGSDNodes() {} + + /// Run - perform scheduling. + /// + void Run(SelectionDAG *dag, MachineBasicBlock *bb, + MachineBasicBlock::iterator insertPos); + + /// isPassiveNode - Return true if the node is a non-scheduled leaf. + /// + static bool isPassiveNode(SDNode *Node) { + if (isa<ConstantSDNode>(Node)) return true; + if (isa<ConstantFPSDNode>(Node)) return true; + if (isa<RegisterSDNode>(Node)) return true; + if (isa<GlobalAddressSDNode>(Node)) return true; + if (isa<BasicBlockSDNode>(Node)) return true; + if (isa<FrameIndexSDNode>(Node)) return true; + if (isa<ConstantPoolSDNode>(Node)) return true; + if (isa<JumpTableSDNode>(Node)) return true; + if (isa<ExternalSymbolSDNode>(Node)) return true; + if (isa<MemOperandSDNode>(Node)) return true; + if (Node->getOpcode() == ISD::EntryToken) return true; + return false; + } + + /// NewSUnit - Creates a new SUnit and return a ptr to it. + /// + SUnit *NewSUnit(SDNode *N) { +#ifndef NDEBUG + const SUnit *Addr = 0; + if (!SUnits.empty()) + Addr = &SUnits[0]; +#endif + SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); + assert((Addr == 0 || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + return &SUnits.back(); + } + + /// Clone - Creates a clone of the specified SUnit. It does not copy the + /// predecessors / successors info nor the temporary scheduling states. + /// + SUnit *Clone(SUnit *N); + + /// BuildSchedGraph - Build the SUnit graph from the selection dag that we + /// are input. This SUnit graph is similar to the SelectionDAG, but + /// excludes nodes that aren't interesting to scheduling, and represents + /// flagged together nodes with a single SUnit. + virtual void BuildSchedGraph(); + + /// ComputeLatency - Compute node latency. + /// + virtual void ComputeLatency(SUnit *SU); + + /// CountResults - The results of target nodes have register or immediate + /// operands first, then an optional chain, and optional flag operands + /// (which do not go into the machine instrs.) + static unsigned CountResults(SDNode *Node); + + /// CountOperands - The inputs to target nodes have any actual inputs first, + /// followed by special operands that describe memory references, then an + /// optional chain operand, then flag operands. Compute the number of + /// actual operands that will go into the resulting MachineInstr. + static unsigned CountOperands(SDNode *Node); + + /// ComputeMemOperandsEnd - Find the index one past the last + /// MemOperandSDNode operand + static unsigned ComputeMemOperandsEnd(SDNode *Node); + + /// EmitNode - Generate machine code for an node and needed dependencies. + /// VRBaseMap contains, for each already emitted node, the first virtual + /// register number for the results of the node. + /// + void EmitNode(SDNode *Node, bool IsClone, bool HasClone, + DenseMap<SDValue, unsigned> &VRBaseMap); + + virtual MachineBasicBlock *EmitSchedule(); + + /// Schedule - Order nodes according to selected style, filling + /// in the Sequence member. + /// + virtual void Schedule() = 0; + + virtual void dumpNode(const SUnit *SU) const; + + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + + virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; + + private: + /// EmitSubregNode - Generate machine code for subreg nodes. + /// + void EmitSubregNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS + /// nodes. + /// + void EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getVR - Return the virtual register corresponding to the specified result + /// of the specified node. + unsigned getVR(SDValue Op, DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getDstOfCopyToRegUse - If the only use of the specified result number of + /// node is a CopyToReg, return its destination register. Return 0 otherwise. + unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const; + + void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// AddRegisterOperand - Add the specified register as an operand to the + /// specified machine instr. Insert register copies if the register is + /// not in the required register class. + void AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an + /// implicit physical register output. + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, + bool IsCloned, unsigned SrcReg, + DenseMap<SDValue, unsigned> &VRBaseMap); + + void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, bool IsClone, + bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. + void BuildSchedUnits(); + void AddSchedEdges(); + }; +} + +#endif diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp new file mode 100644 index 0000000..fb5e207 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp @@ -0,0 +1,668 @@ +//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the ScheduleDAG class, which creates +// MachineInstrs according to the computed schedule. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an +/// implicit physical register output. +void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo, + bool IsClone, bool IsCloned, + unsigned SrcReg, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VRBase = 0; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Just use the input register directly! + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + return; + } + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + bool MatchReg = true; + const TargetRegisterClass *UseRC = NULL; + if (!IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + bool Match = true; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op.getNode() != Node || Op.getResNo() != ResNo) + continue; + MVT VT = Node->getValueType(Op.getResNo()); + if (VT == MVT::Other || VT == MVT::Flag) + continue; + Match = false; + if (User->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(User->getMachineOpcode()); + const TargetRegisterClass *RC = + getInstrOperandRegClass(TRI, II, i+II.getNumDefs()); + if (!UseRC) + UseRC = RC; + else if (RC) { + if (UseRC->hasSuperClass(RC)) + UseRC = RC; + else + assert((UseRC == RC || RC->hasSuperClass(UseRC)) && + "Multiple uses expecting different register classes!"); + } + } + } + } + MatchReg &= Match; + if (VRBase) + break; + } + + MVT VT = Node->getValueType(ResNo); + const TargetRegisterClass *SrcRC = 0, *DstRC = 0; + SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT); + + // Figure out the register class to create for the destreg. + if (VRBase) { + DstRC = MRI.getRegClass(VRBase); + } else if (UseRC) { + assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + DstRC = UseRC; + } else { + DstRC = TLI->getRegClassFor(VT); + } + + // If all uses are reading from the src physical register and copying the + // register is either impossible or very expensive, then don't create a copy. + if (MatchReg && SrcRC->getCopyCost() < 0) { + VRBase = SrcReg; + } else { + // Create the reg, emit the copy. + VRBase = MRI.createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg, + DstRC, SrcRC); + + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + } + + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// getDstOfCopyToRegUse - If the only use of the specified result number of +/// node is a CopyToReg, return its destination register. Return 0 otherwise. +unsigned ScheduleDAGSDNodes::getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const { + if (!Node->hasOneUse()) + return 0; + + SDNode *User = *Node->use_begin(); + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return Reg; + } + return 0; +} + +void ScheduleDAGSDNodes::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF && + "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + + for (unsigned i = 0; i < II.getNumDefs(); ++i) { + // If the specific node value is only used by a CopyToReg and the dest reg + // is a vreg in the same register class, use the CopyToReg'd destination + // register instead of creating a new vreg. + unsigned VRBase = 0; + const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, II, i); + + if (!IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == i) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); + if (RegRC == RC) { + VRBase = Reg; + MI->addOperand(MachineOperand::CreateReg(Reg, true)); + break; + } + } + } + } + + // Create the result registers for this node and add the result regs to + // the machine instruction. + if (VRBase == 0) { + assert(RC && "Isn't a register operand!"); + VRBase = MRI.createVirtualRegister(RC); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + } + + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } +} + +/// getVR - Return the virtual register corresponding to the specified result +/// of the specified node. +unsigned ScheduleDAGSDNodes::getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Op.isMachineOpcode() && + Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + // Add an IMPLICIT_DEF instruction before every use. + unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); + // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc + // does not include operand register class info. + if (!VReg) { + const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); + VReg = MRI.createVirtualRegister(RC); + } + BuildMI(BB, Op.getDebugLoc(), TII->get(TargetInstrInfo::IMPLICIT_DEF),VReg); + return VReg; + } + + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + assert(I != VRBaseMap.end() && "Node emitted out of order - late"); + return I->second; +} + + +/// AddRegisterOperand - Add the specified register as an operand to the +/// specified machine instr. Insert register copies if the register is +/// not in the required register class. +void +ScheduleDAGSDNodes::AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap) { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + // Get/emit the operand. + unsigned VReg = getVR(Op, VRBaseMap); + assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); + + const TargetInstrDesc &TID = MI->getDesc(); + bool isOptDef = IIOpNum < TID.getNumOperands() && + TID.OpInfo[IIOpNum].isOptionalDef(); + + // If the instruction requires a register in a different class, create + // a new virtual register and copy the value into it. + if (II) { + const TargetRegisterClass *SrcRC = + MRI.getRegClass(VReg); + const TargetRegisterClass *DstRC = + getInstrOperandRegClass(TRI, *II, IIOpNum); + assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && + "Don't have operand info for this instruction!"); + if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { + unsigned NewVReg = MRI.createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg, + DstRC, SrcRC); + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + VReg = NewVReg; + } + } + + MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef)); +} + +/// AddOperand - Add the specified operand to the specified machine instr. II +/// specifies the instruction information for the node, and IIOpNum is the +/// operand number (in the II) that we are adding. IIOpNum and II are used for +/// assertions only. +void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Op.isMachineOpcode()) { + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateImm(C->getZExtValue())); + } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { + const ConstantFP *CFP = F->getConstantFPValue(); + MI->addOperand(MachineOperand::CreateFPImm(CFP)); + } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(),TGA->getOffset())); + } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock())); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateFI(FI->getIndex())); + } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateJTI(JT->getIndex())); + } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { + int Offset = CP->getOffset(); + unsigned Align = CP->getAlignment(); + const Type *Type = CP->getType(); + // MachineConstantPool wants an explicit alignment. + if (Align == 0) { + Align = TM.getTargetData()->getPrefTypeAlignment(Type); + if (Align == 0) { + // Alignment of vector types. FIXME! + Align = TM.getTargetData()->getTypeAllocSize(Type); + } + } + + unsigned Idx; + if (CP->isMachineConstantPoolEntry()) + Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align); + else + Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align); + MI->addOperand(MachineOperand::CreateCPI(Idx, Offset)); + } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateES(ES->getSymbol())); + } else { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + } +} + +/// getSuperRegisterRegClass - Returns the register class of a superreg A whose +/// "SubIdx"'th sub-register class is the specified register class and whose +/// type matches the specified type. +static const TargetRegisterClass* +getSuperRegisterRegClass(const TargetRegisterClass *TRC, + unsigned SubIdx, MVT VT) { + // Pick the register class of the superegister for this type + for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(), + E = TRC->superregclasses_end(); I != E; ++I) + if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC) + return *I; + assert(false && "Couldn't find the register class"); + return 0; +} + +/// EmitSubregNode - Generate machine code for subreg nodes. +/// +void ScheduleDAGSDNodes::EmitSubregNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VRBase = 0; + unsigned Opc = Node->getMachineOpcode(); + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + break; + } + } + } + + if (Opc == TargetInstrInfo::EXTRACT_SUBREG) { + unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), + TII->get(TargetInstrInfo::EXTRACT_SUBREG)); + + // Figure out the register class to create for the destreg. + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + const TargetRegisterClass *TRC = MRI.getRegClass(VReg); + const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); + assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI.createVirtualRegister(SRC); + } + + // Add def, source, and subreg index + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap); + MI->addOperand(MachineOperand::CreateImm(SubIdx)); + BB->insert(InsertPos, MI); + } else if (Opc == TargetInstrInfo::INSERT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG) { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + unsigned SubReg = getVR(N1, VRBaseMap); + unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); + const TargetRegisterClass *TRC = MRI.getRegClass(SubReg); + const TargetRegisterClass *SRC = + getSuperRegisterRegClass(TRC, SubIdx, + Node->getValueType(0)); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI.createVirtualRegister(SRC); + } + + // Create the insert_subreg or subreg_to_reg machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), TII->get(Opc)); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + + // If creating a subreg_to_reg, then the first input operand + // is an implicit value immediate, otherwise it's a register + if (Opc == TargetInstrInfo::SUBREG_TO_REG) { + const ConstantSDNode *SD = cast<ConstantSDNode>(N0); + MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); + } else + AddOperand(MI, N0, 0, 0, VRBaseMap); + // Add the subregster being inserted + AddOperand(MI, N1, 0, 0, VRBaseMap); + MI->addOperand(MachineOperand::CreateImm(SubIdx)); + BB->insert(InsertPos, MI); + } else + assert(0 && "Node is not insert_subreg, extract_subreg, or subreg_to_reg"); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. +/// COPY_TO_REGCLASS is just a normal copy, except that the destination +/// register is constrained to be in a particular register class. +/// +void +ScheduleDAGSDNodes::EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + const TargetRegisterClass *SrcRC = MRI.getRegClass(VReg); + + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); + + // Create the new VReg in the destination class and emit a copy. + unsigned NewVReg = MRI.createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg, + DstRC, SrcRC); + assert(Emitted && + "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n"); + (void) Emitted; + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitNode - Generate machine code for an node and needed dependencies. +/// +void ScheduleDAGSDNodes::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + // If machine instruction + if (Node->isMachineOpcode()) { + unsigned Opc = Node->getMachineOpcode(); + + // Handle subreg insert/extract specially + if (Opc == TargetInstrInfo::EXTRACT_SUBREG || + Opc == TargetInstrInfo::INSERT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG) { + EmitSubregNode(Node, VRBaseMap); + return; + } + + // Handle COPY_TO_REGCLASS specially. + if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) { + EmitCopyToRegClassNode(Node, VRBaseMap); + return; + } + + if (Opc == TargetInstrInfo::IMPLICIT_DEF) + // We want a unique VR for each IMPLICIT_DEF use. + return; + + const TargetInstrDesc &II = TII->get(Opc); + unsigned NumResults = CountResults(Node); + unsigned NodeOperands = CountOperands(Node); + unsigned MemOperandsEnd = ComputeMemOperandsEnd(Node); + bool HasPhysRegOuts = (NumResults > II.getNumDefs()) && + II.getImplicitDefs() != 0; +#ifndef NDEBUG + unsigned NumMIOperands = NodeOperands + NumResults; + assert((II.getNumOperands() == NumMIOperands || + HasPhysRegOuts || II.isVariadic()) && + "#operands for dag node doesn't match .td file!"); +#endif + + // Create the new machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), II); + + // Add result register values for things that are defined by this + // instruction. + if (NumResults) + CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); + + // Emit all of the actual operands of this instruction, adding them to the + // instruction as appropriate. + for (unsigned i = 0; i != NodeOperands; ++i) + AddOperand(MI, Node->getOperand(i), i+II.getNumDefs(), &II, VRBaseMap); + + // Emit all of the memory operands of this instruction + for (unsigned i = NodeOperands; i != MemOperandsEnd; ++i) + AddMemOperand(MI, cast<MemOperandSDNode>(Node->getOperand(i))->MO); + + if (II.usesCustomDAGSchedInsertionHook()) { + // Insert this instruction into the basic block using a target + // specific inserter which may returns a new basic block. + BB = TLI->EmitInstrWithCustomInserter(MI, BB); + InsertPos = BB->end(); + } else { + BB->insert(InsertPos, MI); + } + + // Additional results must be an physical register def. + if (HasPhysRegOuts) { + for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + if (Node->hasAnyUseOfValue(i)) + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + } + } + return; + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(DAG); +#endif + assert(0 && "This target-independent node should have been selected!"); + break; + case ISD::EntryToken: + assert(0 && "EntryToken should have been excluded from the schedule!"); + break; + case ISD::TokenFactor: // fall thru + break; + case ISD::CopyToReg: { + unsigned SrcReg; + SDValue SrcVal = Node->getOperand(2); + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) + SrcReg = R->getReg(); + else + SrcReg = getVR(SrcVal, VRBaseMap); + + unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + if (SrcReg == DestReg) // Coalesced away the copy? Ignore. + break; + + const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0; + // Get the register classes of the src/dst. + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + SrcTRC = MRI.getRegClass(SrcReg); + else + SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType()); + + if (TargetRegisterInfo::isVirtualRegister(DestReg)) + DstTRC = MRI.getRegClass(DestReg); + else + DstTRC = TRI->getPhysicalRegisterRegClass(DestReg, + Node->getOperand(1).getValueType()); + + bool Emitted = TII->copyRegToReg(*BB, InsertPos, DestReg, SrcReg, + DstTRC, SrcTRC); + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + break; + } + case ISD::CopyFromReg: { + unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); + break; + } + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + // Create the inline asm machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), + TII->get(TargetInstrInfo::INLINEASM)); + + // Add the asm string as an external symbol operand. + const char *AsmStr = + cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol(); + MI->addOperand(MachineOperand::CreateES(AsmStr)); + + // Add all of the operand registers to the instruction. + for (unsigned i = 2; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + MI->addOperand(MachineOperand::CreateImm(Flags)); + ++i; // Skip the ID value. + + switch (Flags & 7) { + default: assert(0 && "Bad flags!"); + case 2: // Def of register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MI->addOperand(MachineOperand::CreateReg(Reg, true)); + } + break; + case 6: // Def of earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, + false, 0, true)); + } + break; + case 1: // Use of register. + case 3: // Immediate. + case 4: // Addressing mode. + // The addressing mode has been selected, just add all of the + // operands to the machine instruction. + for (; NumVals; --NumVals, ++i) + AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap); + break; + } + } + BB->insert(InsertPos, MI); + break; + } + } +} + +/// EmitSchedule - Emit the machine code in scheduled order. +MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { + DenseMap<SDValue, unsigned> VRBaseMap; + DenseMap<SUnit*, unsigned> CopyVRBaseMap; + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + EmitNoop(); + continue; + } + + // For pre-regalloc scheduling, create instructions corresponding to the + // SDNode and any flagged SDNodes and append them to the block. + if (!SU->getNode()) { + // Emit a copy. + EmitPhysRegCopy(SU, CopyVRBaseMap); + continue; + } + + SmallVector<SDNode *, 4> FlaggedNodes; + for (SDNode *N = SU->getNode()->getFlaggedNode(); N; + N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,VRBaseMap); + FlaggedNodes.pop_back(); + } + EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); + } + + return BB; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp new file mode 100644 index 0000000..195896e --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -0,0 +1,5743 @@ +//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG class. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Constants.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/GlobalAlias.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include <algorithm> +#include <cmath> +using namespace llvm; + +/// makeVTList - Return an instance of the SDVTList struct initialized with the +/// specified members. +static SDVTList makeVTList(const MVT *VTs, unsigned NumVTs) { + SDVTList Res = {VTs, NumVTs}; + return Res; +} + +static const fltSemantics *MVTToAPFloatSemantics(MVT VT) { + switch (VT.getSimpleVT()) { + default: assert(0 && "Unknown FP format"); + case MVT::f32: return &APFloat::IEEEsingle; + case MVT::f64: return &APFloat::IEEEdouble; + case MVT::f80: return &APFloat::x87DoubleExtended; + case MVT::f128: return &APFloat::IEEEquad; + case MVT::ppcf128: return &APFloat::PPCDoubleDouble; + } +} + +SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {} + +//===----------------------------------------------------------------------===// +// ConstantFPSDNode Class +//===----------------------------------------------------------------------===// + +/// isExactlyValue - We don't rely on operator== working on double values, as +/// it returns true for things that are clearly not equal, like -0.0 and 0.0. +/// As such, this method can be used to do an exact bit-for-bit comparison of +/// two floating point values. +bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { + return getValueAPF().bitwiseIsEqual(V); +} + +bool ConstantFPSDNode::isValueValidForType(MVT VT, + const APFloat& Val) { + assert(VT.isFloatingPoint() && "Can only convert between FP types"); + + // PPC long double cannot be converted to any other type. + if (VT == MVT::ppcf128 || + &Val.getSemantics() == &APFloat::PPCDoubleDouble) + return false; + + // convert modifies in place, so make a copy. + APFloat Val2 = APFloat(Val); + bool losesInfo; + (void) Val2.convert(*MVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + &losesInfo); + return !losesInfo; +} + +//===----------------------------------------------------------------------===// +// ISD Namespace +//===----------------------------------------------------------------------===// + +/// isBuildVectorAllOnes - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are ~0 or undef. +bool ISD::isBuildVectorAllOnes(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BIT_CONVERT) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. + SDValue NotZero = N->getOperand(i); + if (isa<ConstantSDNode>(NotZero)) { + if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue()) + return false; + } else if (isa<ConstantFPSDNode>(NotZero)) { + if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF(). + bitcastToAPInt().isAllOnesValue()) + return false; + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. + for (++i; i != e; ++i) + if (N->getOperand(i) != NotZero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + + +/// isBuildVectorAllZeros - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are 0 or undef. +bool ISD::isBuildVectorAllZeros(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BIT_CONVERT) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. + SDValue Zero = N->getOperand(i); + if (isa<ConstantSDNode>(Zero)) { + if (!cast<ConstantSDNode>(Zero)->isNullValue()) + return false; + } else if (isa<ConstantFPSDNode>(Zero)) { + if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero()) + return false; + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. + for (++i; i != e; ++i) + if (N->getOperand(i) != Zero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + +/// isScalarToVector - Return true if the specified node is a +/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low +/// element is not an undef. +bool ISD::isScalarToVector(const SDNode *N) { + if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) + return true; + + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + if (N->getOperand(0).getOpcode() == ISD::UNDEF) + return false; + unsigned NumElems = N->getNumOperands(); + for (unsigned i = 1; i < NumElems; ++i) { + SDValue V = N->getOperand(i); + if (V.getOpcode() != ISD::UNDEF) + return false; + } + return true; +} + + +/// isDebugLabel - Return true if the specified node represents a debug +/// label (i.e. ISD::DBG_LABEL or TargetInstrInfo::DBG_LABEL node). +bool ISD::isDebugLabel(const SDNode *N) { + SDValue Zero; + if (N->getOpcode() == ISD::DBG_LABEL) + return true; + if (N->isMachineOpcode() && + N->getMachineOpcode() == TargetInstrInfo::DBG_LABEL) + return true; + return false; +} + +/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) +/// when given the operation for (X op Y). +ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { + // To perform this operation, we just need to swap the L and G bits of the + // operation. + unsigned OldL = (Operation >> 2) & 1; + unsigned OldG = (Operation >> 1) & 1; + return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits + (OldL << 1) | // New G bit + (OldG << 2)); // New L bit. +} + +/// getSetCCInverse - Return the operation corresponding to !(X op Y), where +/// 'op' is a valid SetCC operation. +ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { + unsigned Operation = Op; + if (isInteger) + Operation ^= 7; // Flip L, G, E bits, but not U. + else + Operation ^= 15; // Flip all of the condition bits. + + if (Operation > ISD::SETTRUE2) + Operation &= ~8; // Don't let N and U bits get set. + + return ISD::CondCode(Operation); +} + + +/// isSignedOp - For an integer comparison, return 1 if the comparison is a +/// signed operation and 2 if the result is an unsigned comparison. Return zero +/// if the operation does not depend on the sign of the input (setne and seteq). +static int isSignedOp(ISD::CondCode Opcode) { + switch (Opcode) { + default: assert(0 && "Illegal integer setcc operation!"); + case ISD::SETEQ: + case ISD::SETNE: return 0; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return 1; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return 2; + } +} + +/// getSetCCOrOperation - Return the result of a logical OR between different +/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function +/// returns SETCC_INVALID if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed integer setcc with an unsigned integer setcc. + return ISD::SETCC_INVALID; + + unsigned Op = Op1 | Op2; // Combine all of the condition bits. + + // If the N and U bits get set then the resultant comparison DOES suddenly + // care about orderedness, and is true when ordered. + if (Op > ISD::SETTRUE2) + Op &= ~16; // Clear the U bit if the N bit is set. + + // Canonicalize illegal integer setcc's. + if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT + Op = ISD::SETNE; + + return ISD::CondCode(Op); +} + +/// getSetCCAndOperation - Return the result of a logical AND between different +/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This +/// function returns zero if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed setcc with an unsigned setcc. + return ISD::SETCC_INVALID; + + // Combine all of the condition bits. + ISD::CondCode Result = ISD::CondCode(Op1 & Op2); + + // Canonicalize illegal integer setcc's. + if (isInteger) { + switch (Result) { + default: break; + case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT + case ISD::SETOEQ: // SETEQ & SETU[LG]E + case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE + case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE + case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE + } + } + + return Result; +} + +const TargetMachine &SelectionDAG::getTarget() const { + return MF->getTarget(); +} + +//===----------------------------------------------------------------------===// +// SDNode Profile Support +//===----------------------------------------------------------------------===// + +/// AddNodeIDOpcode - Add the node opcode to the NodeID data. +/// +static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { + ID.AddInteger(OpC); +} + +/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them +/// solely with their pointer. +static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { + ID.AddPointer(VTList.VTs); +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + const SDValue *Ops, unsigned NumOps) { + for (; NumOps; --NumOps, ++Ops) { + ID.AddPointer(Ops->getNode()); + ID.AddInteger(Ops->getResNo()); + } +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + const SDUse *Ops, unsigned NumOps) { + for (; NumOps; --NumOps, ++Ops) { + ID.AddPointer(Ops->getNode()); + ID.AddInteger(Ops->getResNo()); + } +} + +static void AddNodeIDNode(FoldingSetNodeID &ID, + unsigned short OpC, SDVTList VTList, + const SDValue *OpList, unsigned N) { + AddNodeIDOpcode(ID, OpC); + AddNodeIDValueTypes(ID, VTList); + AddNodeIDOperands(ID, OpList, N); +} + +/// AddNodeIDCustom - If this is an SDNode with special info, add this info to +/// the NodeID data. +static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { + switch (N->getOpcode()) { + default: break; // Normal nodes don't need extra info. + case ISD::ARG_FLAGS: + ID.AddInteger(cast<ARG_FLAGSSDNode>(N)->getArgFlags().getRawBits()); + break; + case ISD::TargetConstant: + case ISD::Constant: + ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue()); + break; + case ISD::TargetConstantFP: + case ISD::ConstantFP: { + ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); + break; + } + case ISD::TargetGlobalAddress: + case ISD::GlobalAddress: + case ISD::TargetGlobalTLSAddress: + case ISD::GlobalTLSAddress: { + const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); + ID.AddPointer(GA->getGlobal()); + ID.AddInteger(GA->getOffset()); + break; + } + case ISD::BasicBlock: + ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock()); + break; + case ISD::Register: + ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); + break; + case ISD::DBG_STOPPOINT: { + const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(N); + ID.AddInteger(DSP->getLine()); + ID.AddInteger(DSP->getColumn()); + ID.AddPointer(DSP->getCompileUnit()); + break; + } + case ISD::SRCVALUE: + ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); + break; + case ISD::MEMOPERAND: { + const MachineMemOperand &MO = cast<MemOperandSDNode>(N)->MO; + MO.Profile(ID); + break; + } + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); + break; + case ISD::JumpTable: + case ISD::TargetJumpTable: + ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); + break; + case ISD::ConstantPool: + case ISD::TargetConstantPool: { + const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); + ID.AddInteger(CP->getAlignment()); + ID.AddInteger(CP->getOffset()); + if (CP->isMachineConstantPoolEntry()) + CP->getMachineCPVal()->AddSelectionDAGCSEId(ID); + else + ID.AddPointer(CP->getConstVal()); + break; + } + case ISD::CALL: { + const CallSDNode *Call = cast<CallSDNode>(N); + ID.AddInteger(Call->getCallingConv()); + ID.AddInteger(Call->isVarArg()); + break; + } + case ISD::LOAD: { + const LoadSDNode *LD = cast<LoadSDNode>(N); + ID.AddInteger(LD->getMemoryVT().getRawBits()); + ID.AddInteger(LD->getRawSubclassData()); + break; + } + case ISD::STORE: { + const StoreSDNode *ST = cast<StoreSDNode>(N); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + break; + } + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: { + const AtomicSDNode *AT = cast<AtomicSDNode>(N); + ID.AddInteger(AT->getMemoryVT().getRawBits()); + ID.AddInteger(AT->getRawSubclassData()); + break; + } + case ISD::VECTOR_SHUFFLE: { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); + i != e; ++i) + ID.AddInteger(SVN->getMaskElt(i)); + break; + } + } // end switch (N->getOpcode()) +} + +/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID +/// data. +static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { + AddNodeIDOpcode(ID, N->getOpcode()); + // Add the return value info. + AddNodeIDValueTypes(ID, N->getVTList()); + // Add the operand info. + AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands()); + + // Handle SDNode leafs with special info. + AddNodeIDCustom(ID, N); +} + +/// encodeMemSDNodeFlags - Generic routine for computing a value for use in +/// the CSE map that carries alignment, volatility, indexing mode, and +/// extension/truncation information. +/// +static inline unsigned +encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, + bool isVolatile, unsigned Alignment) { + assert((ConvType & 3) == ConvType && + "ConvType may not require more than 2 bits!"); + assert((AM & 7) == AM && + "AM may not require more than 3 bits!"); + return ConvType | + (AM << 2) | + (isVolatile << 5) | + ((Log2_32(Alignment) + 1) << 6); +} + +//===----------------------------------------------------------------------===// +// SelectionDAG Class +//===----------------------------------------------------------------------===// + +/// doNotCSE - Return true if CSE should not be performed for this node. +static bool doNotCSE(SDNode *N) { + if (N->getValueType(0) == MVT::Flag) + return true; // Never CSE anything that produces a flag. + + switch (N->getOpcode()) { + default: break; + case ISD::HANDLENODE: + case ISD::DBG_LABEL: + case ISD::DBG_STOPPOINT: + case ISD::EH_LABEL: + case ISD::DECLARE: + return true; // Never CSE these nodes. + } + + // Check that remaining values produced are not flags. + for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Flag) + return true; // Never CSE anything that produces a flag. + + return false; +} + +/// RemoveDeadNodes - This method deletes all unreachable nodes in the +/// SelectionDAG. +void SelectionDAG::RemoveDeadNodes() { + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted. + HandleSDNode Dummy(getRoot()); + + SmallVector<SDNode*, 128> DeadNodes; + + // Add all obviously-dead nodes to the DeadNodes worklist. + for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) + if (I->use_empty()) + DeadNodes.push_back(I); + + RemoveDeadNodes(DeadNodes); + + // If the root changed (e.g. it was a dead load, update the root). + setRoot(Dummy.getValue()); +} + +/// RemoveDeadNodes - This method deletes the unreachable nodes in the +/// given list, and any nodes that become unreachable as a result. +void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes, + DAGUpdateListener *UpdateListener) { + + // Process the worklist, deleting the nodes and adding their uses to the + // worklist. + while (!DeadNodes.empty()) { + SDNode *N = DeadNodes.pop_back_val(); + + if (UpdateListener) + UpdateListener->NodeDeleted(N, 0); + + // Take the node out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Next, brutally remove the operand list. This is safe to do, as there are + // no cycles in the graph. + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Operand = Use.getNode(); + Use.set(SDValue()); + + // Now that we removed this operand, see if there are no uses of it left. + if (Operand->use_empty()) + DeadNodes.push_back(Operand); + } + + DeallocateNode(N); + } +} + +void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ + SmallVector<SDNode*, 16> DeadNodes(1, N); + RemoveDeadNodes(DeadNodes, UpdateListener); +} + +void SelectionDAG::DeleteNode(SDNode *N) { + // First take this out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Finally, remove uses due to operands of this node, remove from the + // AllNodes list, and delete the node. + DeleteNodeNotInCSEMaps(N); +} + +void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { + assert(N != AllNodes.begin() && "Cannot delete the entry node!"); + assert(N->use_empty() && "Cannot delete a node that is not dead!"); + + // Drop all of the operands and decrement used node's use counts. + N->DropOperands(); + + DeallocateNode(N); +} + +void SelectionDAG::DeallocateNode(SDNode *N) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + + // Set the opcode to DELETED_NODE to help catch bugs when node + // memory is reallocated. + N->NodeType = ISD::DELETED_NODE; + + NodeAllocator.Deallocate(AllNodes.remove(N)); +} + +/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that +/// correspond to it. This is useful when we're about to delete or repurpose +/// the node. We don't want future request for structurally identical nodes +/// to return N anymore. +bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { + bool Erased = false; + switch (N->getOpcode()) { + case ISD::EntryToken: + assert(0 && "EntryToken should not be in CSEMaps!"); + return false; + case ISD::HANDLENODE: return false; // noop. + case ISD::CONDCODE: + assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && + "Cond code doesn't exist!"); + Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0; + CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0; + break; + case ISD::ExternalSymbol: + Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); + break; + case ISD::TargetExternalSymbol: + Erased = + TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); + break; + case ISD::VALUETYPE: { + MVT VT = cast<VTSDNode>(N)->getVT(); + if (VT.isExtended()) { + Erased = ExtendedValueTypeNodes.erase(VT); + } else { + Erased = ValueTypeNodes[VT.getSimpleVT()] != 0; + ValueTypeNodes[VT.getSimpleVT()] = 0; + } + break; + } + default: + // Remove it from the CSE Map. + Erased = CSEMap.RemoveNode(N); + break; + } +#ifndef NDEBUG + // Verify that the node was actually in one of the CSE maps, unless it has a + // flag result (which cannot be CSE'd) or is one of the special cases that are + // not subject to CSE. + if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && + !N->isMachineOpcode() && !doNotCSE(N)) { + N->dump(this); + cerr << "\n"; + assert(0 && "Node is not in map!"); + } +#endif + return Erased; +} + +/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE +/// maps and modified in place. Add it back to the CSE maps, unless an identical +/// node already exists, in which case transfer all its users to the existing +/// node. This transfer can potentially trigger recursive merging. +/// +void +SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N, + DAGUpdateListener *UpdateListener) { + // For node types that aren't CSE'd, just act as if no identical node + // already exists. + if (!doNotCSE(N)) { + SDNode *Existing = CSEMap.GetOrInsertNode(N); + if (Existing != N) { + // If there was already an existing matching node, use ReplaceAllUsesWith + // to replace the dead one with the existing one. This can cause + // recursive merging of other unrelated nodes down the line. + ReplaceAllUsesWith(N, Existing, UpdateListener); + + // N is now dead. Inform the listener if it exists and delete it. + if (UpdateListener) + UpdateListener->NodeDeleted(N, Existing); + DeleteNodeNotInCSEMaps(N); + return; + } + } + + // If the node doesn't already exist, we updated it. Inform a listener if + // it exists. + if (UpdateListener) + UpdateListener->NodeUpdated(N); +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + SDValue Ops[] = { Op }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); + AddNodeIDCustom(ID, N); + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + SDValue Op1, SDValue Op2, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + SDValue Ops[] = { Op1, Op2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); + AddNodeIDCustom(ID, N); + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + const SDValue *Ops,unsigned NumOps, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); + AddNodeIDCustom(ID, N); + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + +/// VerifyNode - Sanity check the given node. Aborts if it is invalid. +void SelectionDAG::VerifyNode(SDNode *N) { + switch (N->getOpcode()) { + default: + break; + case ISD::BUILD_PAIR: { + MVT VT = N->getValueType(0); + assert(N->getNumValues() == 1 && "Too many results!"); + assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && + "Wrong return type!"); + assert(N->getNumOperands() == 2 && "Wrong number of operands!"); + assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && + "Mismatched operand types!"); + assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && + "Wrong operand type!"); + assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && + "Wrong return type size"); + break; + } + case ISD::BUILD_VECTOR: { + assert(N->getNumValues() == 1 && "Too many results!"); + assert(N->getValueType(0).isVector() && "Wrong return type!"); + assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && + "Wrong number of operands!"); + MVT EltVT = N->getValueType(0).getVectorElementType(); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) + assert((I->getValueType() == EltVT || + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); + break; + } + } +} + +/// getMVTAlignment - Compute the default alignment value for the +/// given type. +/// +unsigned SelectionDAG::getMVTAlignment(MVT VT) const { + const Type *Ty = VT == MVT::iPTR ? + PointerType::get(Type::Int8Ty, 0) : + VT.getTypeForMVT(); + + return TLI.getTargetData()->getABITypeAlignment(Ty); +} + +// EntryNode could meaningfully have debug info if we can find it... +SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) + : TLI(tli), FLI(fli), DW(0), + EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(), + getVTList(MVT::Other)), Root(getEntryNode()) { + AllNodes.push_back(&EntryNode); +} + +void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, + DwarfWriter *dw) { + MF = &mf; + MMI = mmi; + DW = dw; +} + +SelectionDAG::~SelectionDAG() { + allnodes_clear(); +} + +void SelectionDAG::allnodes_clear() { + assert(&*AllNodes.begin() == &EntryNode); + AllNodes.remove(AllNodes.begin()); + while (!AllNodes.empty()) + DeallocateNode(AllNodes.begin()); +} + +void SelectionDAG::clear() { + allnodes_clear(); + OperandAllocator.Reset(); + CSEMap.clear(); + + ExtendedValueTypeNodes.clear(); + ExternalSymbols.clear(); + TargetExternalSymbols.clear(); + std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), + static_cast<CondCodeSDNode*>(0)); + std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), + static_cast<SDNode*>(0)); + + EntryNode.UseList = 0; + AllNodes.push_back(&EntryNode); + Root = getEntryNode(); +} + +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) { + if (Op.getValueType() == VT) return Op; + APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(), + VT.getSizeInBits()); + return getNode(ISD::AND, DL, Op.getValueType(), Op, + getConstant(Imm, Op.getValueType())); +} + +/// getNOT - Create a bitwise NOT operation as (XOR Val, -1). +/// +SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, MVT VT) { + MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + SDValue NegOne = + getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + return getNode(ISD::XOR, DL, VT, Val, NegOne); +} + +SDValue SelectionDAG::getConstant(uint64_t Val, MVT VT, bool isT) { + MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + assert((EltVT.getSizeInBits() >= 64 || + (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && + "getConstant with a uint64_t value that doesn't fit in the type!"); + return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT); +} + +SDValue SelectionDAG::getConstant(const APInt &Val, MVT VT, bool isT) { + return getConstant(*ConstantInt::get(Val), VT, isT); +} + +SDValue SelectionDAG::getConstant(const ConstantInt &Val, MVT VT, bool isT) { + assert(VT.isInteger() && "Cannot create FP integer constant!"); + + MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + assert(Val.getBitWidth() == EltVT.getSizeInBits() && + "APInt size does not match type size!"); + + unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + ID.AddPointer(&Val); + void *IP = 0; + SDNode *N = NULL; + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + if (!N) { + N = NodeAllocator.Allocate<ConstantSDNode>(); + new (N) ConstantSDNode(isT, &Val, EltVT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector<SDValue, 8> Ops; + Ops.assign(VT.getVectorNumElements(), Result); + Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(), + VT, &Ops[0], Ops.size()); + } + return Result; +} + +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { + return getConstant(Val, TLI.getPointerTy(), isTarget); +} + + +SDValue SelectionDAG::getConstantFP(const APFloat& V, MVT VT, bool isTarget) { + return getConstantFP(*ConstantFP::get(V), VT, isTarget); +} + +SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){ + assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); + + MVT EltVT = + VT.isVector() ? VT.getVectorElementType() : VT; + + // Do the map lookup using the actual bit pattern for the floating point + // value, so that we don't have problems with 0.0 comparing equal to -0.0, and + // we don't have issues with SNANs. + unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + ID.AddPointer(&V); + void *IP = 0; + SDNode *N = NULL; + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + if (!N) { + N = NodeAllocator.Allocate<ConstantFPSDNode>(); + new (N) ConstantFPSDNode(isTarget, &V, EltVT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector<SDValue, 8> Ops; + Ops.assign(VT.getVectorNumElements(), Result); + // FIXME DebugLoc info might be appropriate here + Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(), + VT, &Ops[0], Ops.size()); + } + return Result; +} + +SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) { + MVT EltVT = + VT.isVector() ? VT.getVectorElementType() : VT; + if (EltVT==MVT::f32) + return getConstantFP(APFloat((float)Val), VT, isTarget); + else + return getConstantFP(APFloat(Val), VT, isTarget); +} + +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, + MVT VT, int64_t Offset, + bool isTargetGA) { + unsigned Opc; + + // Truncate (with sign-extension) the offset value to the pointer size. + unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); + if (BitWidth < 64) + Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth)); + + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + if (!GVar) { + // If GV is an alias then use the aliasee for determining thread-localness. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); + } + + if (GVar && GVar->isThreadLocal()) + Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; + else + Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddPointer(GV); + ID.AddInteger(Offset); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>(); + new (N) GlobalAddressSDNode(isTargetGA, GV, VT, Offset); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) { + unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(FI); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>(); + new (N) FrameIndexSDNode(FI, VT, isTarget); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget){ + unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(JTI); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>(); + new (N) JumpTableSDNode(JTI, VT, isTarget); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT, + unsigned Alignment, int Offset, + bool isTarget) { + if (Alignment == 0) + Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + ID.AddPointer(C); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>(); + new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + +SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT, + unsigned Alignment, int Offset, + bool isTarget) { + if (Alignment == 0) + Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + C->AddSelectionDAGCSEId(ID); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>(); + new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); + ID.AddPointer(MBB); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>(); + new (N) BasicBlockSDNode(MBB); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getArgFlags(ISD::ArgFlagsTy Flags) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::ARG_FLAGS, getVTList(MVT::Other), 0, 0); + ID.AddInteger(Flags.getRawBits()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<ARG_FLAGSSDNode>(); + new (N) ARG_FLAGSSDNode(Flags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getValueType(MVT VT) { + if (VT.isSimple() && (unsigned)VT.getSimpleVT() >= ValueTypeNodes.size()) + ValueTypeNodes.resize(VT.getSimpleVT()+1); + + SDNode *&N = VT.isExtended() ? + ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT()]; + + if (N) return SDValue(N, 0); + N = NodeAllocator.Allocate<VTSDNode>(); + new (N) VTSDNode(VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) { + SDNode *&N = ExternalSymbols[Sym]; + if (N) return SDValue(N, 0); + N = NodeAllocator.Allocate<ExternalSymbolSDNode>(); + new (N) ExternalSymbolSDNode(false, Sym, VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT) { + SDNode *&N = TargetExternalSymbols[Sym]; + if (N) return SDValue(N, 0); + N = NodeAllocator.Allocate<ExternalSymbolSDNode>(); + new (N) ExternalSymbolSDNode(true, Sym, VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { + if ((unsigned)Cond >= CondCodeNodes.size()) + CondCodeNodes.resize(Cond+1); + + if (CondCodeNodes[Cond] == 0) { + CondCodeSDNode *N = NodeAllocator.Allocate<CondCodeSDNode>(); + new (N) CondCodeSDNode(Cond); + CondCodeNodes[Cond] = N; + AllNodes.push_back(N); + } + return SDValue(CondCodeNodes[Cond], 0); +} + +// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in +// the shuffle mask M that point at N1 to point at N2, and indices that point +// N2 to point at N1. +static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { + std::swap(N1, N2); + int NElts = M.size(); + for (int i = 0; i != NElts; ++i) { + if (M[i] >= NElts) + M[i] -= NElts; + else if (M[i] >= 0) + M[i] += NElts; + } +} + +SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, + SDValue N2, const int *Mask) { + assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); + assert(VT.isVector() && N1.getValueType().isVector() && + "Vector Shuffle VTs must be a vectors"); + assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() + && "Vector Shuffle VTs must have same element type"); + + // Canonicalize shuffle undef, undef -> undef + if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) + return N1; + + // Validate that all indices in Mask are within the range of the elements + // input to the shuffle. + unsigned NElts = VT.getVectorNumElements(); + SmallVector<int, 8> MaskVec; + for (unsigned i = 0; i != NElts; ++i) { + assert(Mask[i] < (int)(NElts * 2) && "Index out of range"); + MaskVec.push_back(Mask[i]); + } + + // Canonicalize shuffle v, v -> v, undef + if (N1 == N2) { + N2 = getUNDEF(VT); + for (unsigned i = 0; i != NElts; ++i) + if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts; + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N1.getOpcode() == ISD::UNDEF) + commuteShuffle(N1, N2, MaskVec); + + // Canonicalize all index into lhs, -> shuffle lhs, undef + // Canonicalize all index into rhs, -> shuffle rhs, undef + bool AllLHS = true, AllRHS = true; + bool N2Undef = N2.getOpcode() == ISD::UNDEF; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= (int)NElts) { + if (N2Undef) + MaskVec[i] = -1; + else + AllLHS = false; + } else if (MaskVec[i] >= 0) { + AllRHS = false; + } + } + if (AllLHS && AllRHS) + return getUNDEF(VT); + if (AllLHS && !N2Undef) + N2 = getUNDEF(VT); + if (AllRHS) { + N1 = getUNDEF(VT); + commuteShuffle(N1, N2, MaskVec); + } + + // If Identity shuffle, or all shuffle in to undef, return that node. + bool AllUndef = true; + bool Identity = true; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; + if (MaskVec[i] >= 0) AllUndef = false; + } + if (Identity) + return N1; + if (AllUndef) + return getUNDEF(VT); + + FoldingSetNodeID ID; + SDValue Ops[2] = { N1, N2 }; + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); + for (unsigned i = 0; i != NElts; ++i) + ID.AddInteger(MaskVec[i]); + + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + // Allocate the mask array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the NodeAllocator is released. + int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); + memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); + + ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>(); + new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl, + SDValue Val, SDValue DTy, + SDValue STy, SDValue Rnd, SDValue Sat, + ISD::CvtCode Code) { + // If the src and dest types are the same and the conversion is between + // integer types of the same sign or two floats, no conversion is necessary. + if (DTy == STy && + (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF)) + return Val; + + FoldingSetNodeID ID; + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>(); + SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; + new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); + ID.AddInteger(RegNo); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<RegisterSDNode>(); + new (N) RegisterSDNode(RegNo, VT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root, + unsigned Line, unsigned Col, + Value *CU) { + SDNode *N = NodeAllocator.Allocate<DbgStopPointSDNode>(); + new (N) DbgStopPointSDNode(Root, Line, Col, CU); + N->setDebugLoc(DL); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl, + SDValue Root, + unsigned LabelID) { + FoldingSetNodeID ID; + SDValue Ops[] = { Root }; + AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1); + ID.AddInteger(LabelID); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<LabelSDNode>(); + new (N) LabelSDNode(Opcode, dl, Root, LabelID); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getSrcValue(const Value *V) { + assert((!V || isa<PointerType>(V->getType())) && + "SrcValue is not a pointer?"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0); + ID.AddPointer(V); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>(); + new (N) SrcValueSDNode(V); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMemOperand(const MachineMemOperand &MO) { +#ifndef NDEBUG + const Value *v = MO.getValue(); + assert((!v || isa<PointerType>(v->getType())) && + "SrcValue is not a pointer?"); +#endif + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MEMOPERAND, getVTList(MVT::Other), 0, 0); + MO.Profile(ID); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = NodeAllocator.Allocate<MemOperandSDNode>(); + new (N) MemOperandSDNode(MO); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +/// getShiftAmountOperand - Return the specified value casted to +/// the target's desired shift amount type. +SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { + MVT OpTy = Op.getValueType(); + MVT ShTy = TLI.getShiftAmountTy(); + if (OpTy == ShTy || OpTy.isVector()) return Op; + + ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; + return getNode(Opcode, Op.getDebugLoc(), ShTy, Op); +} + +/// CreateStackTemporary - Create a stack temporary, suitable for holding the +/// specified value type. +SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) { + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + unsigned ByteSize = VT.getStoreSizeInBits()/8; + const Type *Ty = VT.getTypeForMVT(); + unsigned StackAlign = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); + + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign); + return getFrameIndex(FrameIdx, TLI.getPointerTy()); +} + +/// CreateStackTemporary - Create a stack temporary suitable for holding +/// either of the specified value types. +SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) { + unsigned Bytes = std::max(VT1.getStoreSizeInBits(), + VT2.getStoreSizeInBits())/8; + const Type *Ty1 = VT1.getTypeForMVT(); + const Type *Ty2 = VT2.getTypeForMVT(); + const TargetData *TD = TLI.getTargetData(); + unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), + TD->getPrefTypeAlignment(Ty2)); + + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align); + return getFrameIndex(FrameIdx, TLI.getPointerTy()); +} + +SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1, + SDValue N2, ISD::CondCode Cond, DebugLoc dl) { + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return getConstant(1, VT); + + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETO: + case ISD::SETUO: + case ISD::SETUEQ: + case ISD::SETUNE: + assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!"); + break; + } + + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) { + const APInt &C2 = N2C->getAPIntValue(); + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + + switch (Cond) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETEQ: return getConstant(C1 == C2, VT); + case ISD::SETNE: return getConstant(C1 != C2, VT); + case ISD::SETULT: return getConstant(C1.ult(C2), VT); + case ISD::SETUGT: return getConstant(C1.ugt(C2), VT); + case ISD::SETULE: return getConstant(C1.ule(C2), VT); + case ISD::SETUGE: return getConstant(C1.uge(C2), VT); + case ISD::SETLT: return getConstant(C1.slt(C2), VT); + case ISD::SETGT: return getConstant(C1.sgt(C2), VT); + case ISD::SETLE: return getConstant(C1.sle(C2), VT); + case ISD::SETGE: return getConstant(C1.sge(C2), VT); + } + } + } + if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) { + if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) { + // No compile time operations on this type yet. + if (N1C->getValueType(0) == MVT::ppcf128) + return SDValue(); + + APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); + switch (Cond) { + default: break; + case ISD::SETEQ: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT); + case ISD::SETNE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, VT); + case ISD::SETLT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT); + case ISD::SETGT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT); + case ISD::SETLE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, VT); + case ISD::SETGE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, VT); + case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT); + case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT); + case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, VT); + case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT); + case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, VT); + case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, VT); + case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT); + case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT); + } + } else { + // Ensure that the constant occurs on the RHS. + return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond)); + } + } + + // Could not fold it. + return SDValue(); +} + +/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We +/// use this predicate to simplify operations downstream. +bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { + unsigned BitWidth = Op.getValueSizeInBits(); + return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, + unsigned Depth) const { + APInt KnownZero, KnownOne; + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; +} + +/// ComputeMaskedBits - Determine which of the bits specified in Mask are +/// known to be either zero or one and return them in the KnownZero/KnownOne +/// bitsets. This code only analyzes bits in Mask, in order to short-circuit +/// processing. +void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) const { + unsigned BitWidth = Mask.getBitWidth(); + assert(BitWidth == Op.getValueType().getSizeInBits() && + "Mask size mismatches value type size!"); + + KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + if (Depth == 6 || Mask == 0) + return; // Limit search depth. + + APInt KnownZero2, KnownOne2; + + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask; + KnownZero = ~KnownOne & Mask; + return; + case ISD::AND: + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero, + KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + case ISD::OR: + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne, + KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + case ISD::XOR: { + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case ISD::MUL: { + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clear(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero &= Mask; + return; + } + case ISD::UDIV: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be less than the denominator. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clear(); + KnownZero2.clear(); + ComputeMaskedBits(Op.getOperand(1), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + if (RHSUnknownLeadingOnes != BitWidth) + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + return; + } + case ISD::SELECT: + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SELECT_CC: + ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + return; + // The boolean result conforms to getBooleanContents. Fall through. + case ISD::SETCC: + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + case ISD::SHL: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt), + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= ShAmt; + KnownOne <<= ShAmt; + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt); + } + return; + case ISD::SRL: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt), + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + KnownZero |= HighBits; // High bits known zero. + } + return; + case ISD::SRA: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + APInt InDemandedMask = (Mask << ShAmt); + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + if (HighBits.getBoolValue()) + InDemandedMask |= APInt::getSignBit(BitWidth); + + ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bits. + APInt SignBit = APInt::getSignBit(BitWidth); + SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask. + + if (KnownZero.intersects(SignBit)) { + KnownZero |= HighBits; // New bits are known zero. + } else if (KnownOne.intersects(SignBit)) { + KnownOne |= HighBits; // New bits are known one. + } + } + return; + case ISD::SIGN_EXTEND_INREG: { + MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + unsigned EBits = EVT.getSizeInBits(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask; + + APInt InSignBit = APInt::getSignBit(EBits); + APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits); + + // If the sign extended bits are demanded, we know that the sign + // bit is demanded. + InSignBit.zext(BitWidth); + if (NewBits.getBoolValue()) + InputDemandedBits |= InSignBit; + + ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear + KnownZero |= NewBits; + KnownOne &= ~NewBits; + } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + return; + } + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownOne.clear(); + return; + } + case ISD::LOAD: { + if (ISD::isZEXTLoad(Op.getNode())) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + MVT VT = LD->getMemoryVT(); + unsigned MemBits = VT.getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; + } + return; + } + case ISD::ZERO_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; + APInt InMask = Mask; + InMask.trunc(InBits); + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + KnownZero |= NewBits; + return; + } + case ISD::SIGN_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InSignBit = APInt::getSignBit(InBits); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; + APInt InMask = Mask; + InMask.trunc(InBits); + + // If any of the sign extended bits are demanded, we know that the sign + // bit is demanded. Temporarily set this bit in the mask for our callee. + if (NewBits.getBoolValue()) + InMask |= InSignBit; + + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + + // Note if the sign bit is known to be zero or one. + bool SignBitKnownZero = KnownZero.isNegative(); + bool SignBitKnownOne = KnownOne.isNegative(); + assert(!(SignBitKnownZero && SignBitKnownOne) && + "Sign bit can't be known to be both zero and one!"); + + // If the sign bit wasn't actually demanded by our caller, we don't + // want it set in the KnownZero and KnownOne result values. Reset the + // mask and reapply it to the result values. + InMask = Mask; + InMask.trunc(InBits); + KnownZero &= InMask; + KnownOne &= InMask; + + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + + // If the sign bit is known zero or one, the top bits match. + if (SignBitKnownZero) + KnownZero |= NewBits; + else if (SignBitKnownOne) + KnownOne |= NewBits; + return; + } + case ISD::ANY_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InMask = Mask; + InMask.trunc(InBits); + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + return; + } + case ISD::TRUNCATE: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InMask = Mask; + InMask.zext(InBits); + KnownZero.zext(InBits); + KnownOne.zext(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.trunc(BitWidth); + KnownOne.trunc(BitWidth); + break; + } + case ISD::AssertZext: { + MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); + ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, + KnownOne, Depth+1); + KnownZero |= (~InMask) & Mask; + return; + } + case ISD::FGETSIGN: + // All bits are zero except the low bit. + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + + case ISD::SUB: { + if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (CLHS->getAPIntValue().isNonNegative()) { + unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2, + Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + } + } + } + } + // fall through + case ISD::ADD: { + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes()); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); + + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + KnownZeroOut = std::min(KnownZeroOut, + KnownZero2.countTrailingOnes()); + + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); + return; + } + case ISD::SREM: + if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue(); + if (RA.isPowerOf2() || (-RA).isPowerOf2()) { + APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); + + // If the sign bit of the first operand is zero, the sign bit of + // the result is zero. If the first operand has no one bits below + // the second operand's single 1 bit, its sign will be zero. + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero2 |= ~LowBits; + + KnownZero |= KnownZero2 & Mask; + + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + } + } + return; + case ISD::UREM: { + if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue(); + if (RA.isPowerOf2()) { + APInt LowBits = (RA - 1); + APInt Mask2 = LowBits & Mask; + KnownZero |= ~LowBits & Mask; + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne, + Depth+1); + ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, + Depth+1); + + uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clear(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + return; + } + default: + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: + TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this); + } + return; + } +} + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "SRA X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ + MVT VT = Op.getValueType(); + assert(VT.isInteger() && "Invalid VT!"); + unsigned VTBits = VT.getSizeInBits(); + unsigned Tmp, Tmp2; + unsigned FirstAnswer = 1; + + if (Depth == 6) + return 1; // Limit search depth. + + switch (Op.getOpcode()) { + default: break; + case ISD::AssertSext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::AssertZext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp; + + case ISD::Constant: { + const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue(); + // If negative, return # leading ones. + if (Val.isNegative()) + return Val.countLeadingOnes(); + + // Return # leading zeros. + return Val.countLeadingZeros(); + } + + case ISD::SIGN_EXTEND: + Tmp = VTBits-Op.getOperand(0).getValueType().getSizeInBits(); + return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; + + case ISD::SIGN_EXTEND_INREG: + // Max of the input and what this extends. + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + Tmp = VTBits-Tmp+1; + + Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); + return std::max(Tmp, Tmp2); + + case ISD::SRA: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + // SRA X, C -> adds C sign bits. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + Tmp += C->getZExtValue(); + if (Tmp > VTBits) Tmp = VTBits; + } + return Tmp; + case ISD::SHL: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (C->getZExtValue() >= VTBits || // Bad shift. + C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. + return Tmp - C->getZExtValue(); + } + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // ComputeMaskedBits, and pick whichever answer is better. + } + break; + + case ISD::SELECT: + Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); + return std::min(Tmp, Tmp2); + + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + break; + // The boolean result conforms to getBooleanContents. Fall through. + case ISD::SETCC: + // If setcc returns 0/-1, all bits are sign bits. + if (TLI.getBooleanContents() == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned RotAmt = C->getZExtValue() & (VTBits-1); + + // Handle rotate right by N like a rotate left by 32-N. + if (Op.getOpcode() == ISD::ROTR) + RotAmt = (VTBits-RotAmt) & (VTBits-1); + + // If we aren't rotating out all of the known-in sign bits, return the + // number that are left. This handles rotl(sext(x), 1) for example. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp > RotAmt+1) return Tmp-RotAmt; + } + break; + case ISD::ADD: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + if (CRHS->isAllOnesValue()) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)) == Mask) + return VTBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero.isNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + break; + + case ISD::SUB: + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) + if (CLHS->isNullValue()) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)) == Mask) + return VTBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero.isNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + break; + case ISD::TRUNCATE: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + } + + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = LD->getMemoryVT().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = LD->getMemoryVT().getSizeInBits(); + return VTBits-Tmp; + } + } + + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) { + unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth); + if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + + if (KnownZero.isNegative()) { // sign bit is 0 + Mask = KnownZero; + } else if (KnownOne.isNegative()) { // sign bit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return FirstAnswer; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask = ~Mask; + Mask <<= Mask.getBitWidth()-VTBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); +} + + +bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { + GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); + if (!GA) return false; + if (GA->getOffset() != 0) return false; + GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()); + if (!GV) return false; + MachineModuleInfo *MMI = getMachineModuleInfo(); + return MMI && MMI->hasDebugInfo(); +} + + +/// getShuffleScalarElt - Returns the scalar element that will make up the ith +/// element of the result of the vector shuffle. +SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, + unsigned i) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + if (N->getMaskElt(i) < 0) + return getUNDEF(VT.getVectorElementType()); + unsigned Index = N->getMaskElt(i); + unsigned NumElems = VT.getVectorNumElements(); + SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1); + Index %= NumElems; + + if (V.getOpcode() == ISD::BIT_CONVERT) { + V = V.getOperand(0); + MVT VVT = V.getValueType(); + if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems) + return SDValue(); + } + if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) + return (Index == 0) ? V.getOperand(0) + : getUNDEF(VT.getVectorElementType()); + if (V.getOpcode() == ISD::BUILD_VECTOR) + return V.getOperand(Index); + if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V)) + return getShuffleScalarElt(SVN, Index); + return SDValue(); +} + + +/// getNode - Gets or creates the specified node. +/// +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<SDNode>(); + new (N) SDNode(Opcode, DL, getVTList(VT)); + CSEMap.InsertNode(N, IP); + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + MVT VT, SDValue Operand) { + // Constant fold unary operations with an integer constant operand. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { + const APInt &Val = C->getAPIntValue(); + unsigned BitWidth = VT.getSizeInBits(); + switch (Opcode) { + default: break; + case ISD::SIGN_EXTEND: + return getConstant(APInt(Val).sextOrTrunc(BitWidth), VT); + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::TRUNCATE: + return getConstant(APInt(Val).zextOrTrunc(BitWidth), VT); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + const uint64_t zero[] = {0, 0}; + // No compile time operations on this type. + if (VT==MVT::ppcf128) + break; + APFloat apf = APFloat(APInt(BitWidth, 2, zero)); + (void)apf.convertFromAPInt(Val, + Opcode==ISD::SINT_TO_FP, + APFloat::rmNearestTiesToEven); + return getConstantFP(apf, VT); + } + case ISD::BIT_CONVERT: + if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) + return getConstantFP(Val.bitsToFloat(), VT); + else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + return getConstantFP(Val.bitsToDouble(), VT); + break; + case ISD::BSWAP: + return getConstant(Val.byteSwap(), VT); + case ISD::CTPOP: + return getConstant(Val.countPopulation(), VT); + case ISD::CTLZ: + return getConstant(Val.countLeadingZeros(), VT); + case ISD::CTTZ: + return getConstant(Val.countTrailingZeros(), VT); + } + } + + // Constant fold unary operations with a floating point constant operand. + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) { + APFloat V = C->getValueAPF(); // make copy + if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) { + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, VT); + case ISD::FP_ROUND: + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*MVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + integerPart x[2]; + bool ignored; + assert(integerPartWidth >= 64); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), + Opcode==ISD::FP_TO_SINT, + APFloat::rmTowardZero, &ignored); + if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual + break; + APInt api(VT.getSizeInBits(), 2, x); + return getConstant(api, VT); + } + case ISD::BIT_CONVERT: + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; + } + } + } + + unsigned OpOpcode = Operand.getNode()->getOpcode(); + switch (Opcode) { + case ISD::TokenFactor: + case ISD::MERGE_VALUES: + case ISD::CONCAT_VECTORS: + return Operand; // Factor, merge or concat of one node? No need. + case ISD::FP_ROUND: assert(0 && "Invalid method to make FP_ROUND node"); + case ISD::FP_EXTEND: + assert(VT.isFloatingPoint() && + Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); + if (Operand.getValueType() == VT) return Operand; // noop conversion. + if (Operand.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SIGN_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid SIGN_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().bitsLT(VT) + && "Invalid sext node, dst < src!"); + if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + break; + case ISD::ZERO_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ZERO_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().bitsLT(VT) + && "Invalid zext node, dst < src!"); + if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) + return getNode(ISD::ZERO_EXTEND, DL, VT, + Operand.getNode()->getOperand(0)); + break; + case ISD::ANY_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ANY_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().bitsLT(VT) + && "Invalid anyext node, dst < src!"); + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND) + // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + break; + case ISD::TRUNCATE: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid TRUNCATE!"); + if (Operand.getValueType() == VT) return Operand; // noop truncate + assert(Operand.getValueType().bitsGT(VT) + && "Invalid truncate node, src < dst!"); + if (OpOpcode == ISD::TRUNCATE) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { + // If the source is smaller than the dest, we still need an extend. + if (Operand.getNode()->getOperand(0).getValueType().bitsLT(VT)) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + else + return Operand.getNode()->getOperand(0); + } + break; + case ISD::BIT_CONVERT: + // Basic sanity checking. + assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() + && "Cannot BIT_CONVERT between types of different sizes!"); + if (VT == Operand.getValueType()) return Operand; // noop conversion. + if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x) + return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0)); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SCALAR_TO_VECTOR: + assert(VT.isVector() && !Operand.getValueType().isVector() && + (VT.getVectorElementType() == Operand.getValueType() || + (VT.getVectorElementType().isInteger() && + Operand.getValueType().isInteger() && + VT.getVectorElementType().bitsLE(Operand.getValueType()))) && + "Illegal SCALAR_TO_VECTOR node!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined. + if (OpOpcode == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(Operand.getOperand(1)) && + Operand.getConstantOperandVal(1) == 0 && + Operand.getOperand(0).getValueType() == VT) + return Operand.getOperand(0); + break; + case ISD::FNEG: + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 + if (UnsafeFPMath && OpOpcode == ISD::FSUB) + return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), + Operand.getNode()->getOperand(0)); + if (OpOpcode == ISD::FNEG) // --X -> X + return Operand.getNode()->getOperand(0); + break; + case ISD::FABS: + if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) + return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0)); + break; + } + + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { // Don't CSE flag producing nodes + FoldingSetNodeID ID; + SDValue Ops[1] = { Operand }; + AddNodeIDNode(ID, Opcode, VTs, Ops, 1); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + N = NodeAllocator.Allocate<UnarySDNode>(); + new (N) UnarySDNode(Opcode, DL, VTs, Operand); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate<UnarySDNode>(); + new (N) UnarySDNode(Opcode, DL, VTs, Operand); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, + MVT VT, + ConstantSDNode *Cst1, + ConstantSDNode *Cst2) { + const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue(); + + switch (Opcode) { + case ISD::ADD: return getConstant(C1 + C2, VT); + case ISD::SUB: return getConstant(C1 - C2, VT); + case ISD::MUL: return getConstant(C1 * C2, VT); + case ISD::UDIV: + if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT); + break; + case ISD::UREM: + if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT); + break; + case ISD::SDIV: + if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT); + break; + case ISD::SREM: + if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT); + break; + case ISD::AND: return getConstant(C1 & C2, VT); + case ISD::OR: return getConstant(C1 | C2, VT); + case ISD::XOR: return getConstant(C1 ^ C2, VT); + case ISD::SHL: return getConstant(C1 << C2, VT); + case ISD::SRL: return getConstant(C1.lshr(C2), VT); + case ISD::SRA: return getConstant(C1.ashr(C2), VT); + case ISD::ROTL: return getConstant(C1.rotl(C2), VT); + case ISD::ROTR: return getConstant(C1.rotr(C2), VT); + default: break; + } + + return SDValue(); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2) { + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + switch (Opcode) { + default: break; + case ISD::TokenFactor: + assert(VT == MVT::Other && N1.getValueType() == MVT::Other && + N2.getValueType() == MVT::Other && "Invalid token factor!"); + // Fold trivial token factors. + if (N1.getOpcode() == ISD::EntryToken) return N2; + if (N2.getOpcode() == ISD::EntryToken) return N1; + if (N1 == N2) return N1; + break; + case ISD::CONCAT_VECTORS: + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to + // one big BUILD_VECTOR. + if (N1.getOpcode() == ISD::BUILD_VECTOR && + N2.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); + Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); + return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + } + break; + case ISD::AND: + assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's + // worth handling here. + if (N2C && N2C->isNullValue()) + return N2; + if (N2C && N2C->isAllOnesValue()) // X & -1 -> X + return N1; + break; + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so + // it's worth handling here. + if (N2C && N2C->isNullValue()) + return N1; + break; + case ISD::UDIV: + case ISD::UREM: + case ISD::MULHU: + case ISD::MULHS: + case ISD::MUL: + case ISD::SDIV: + case ISD::SREM: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + // fall through + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (UnsafeFPMath) { + if (Opcode == ISD::FADD) { + // 0+x --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) + if (CFP->getValueAPF().isZero()) + return N2; + // x+0 --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) + if (CFP->getValueAPF().isZero()) + return N1; + } else if (Opcode == ISD::FSUB) { + // x-0 --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) + if (CFP->getValueAPF().isZero()) + return N1; + } + } + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; + case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. + assert(N1.getValueType() == VT && + N1.getValueType().isFloatingPoint() && + N2.getValueType().isFloatingPoint() && + "Invalid FCOPYSIGN!"); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + assert(VT == N1.getValueType() && + "Shift operators return type must be the same as their first arg"); + assert(VT.isInteger() && N2.getValueType().isInteger() && + "Shifts only work on integers"); + + // Always fold shifts of i1 values so the code generator doesn't need to + // handle them. Since we know the size of the shift has to be less than the + // size of the value, the shift/rotate count is guaranteed to be zero. + if (VT == MVT::i1) + return N1; + break; + case ISD::FP_ROUND_INREG: { + MVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg round!"); + assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && + "Cannot FP_ROUND_INREG integer types"); + assert(EVT.bitsLE(VT) && "Not rounding down!"); + if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. + break; + } + case ISD::FP_ROUND: + assert(VT.isFloatingPoint() && + N1.getValueType().isFloatingPoint() && + VT.bitsLE(N1.getValueType()) && + isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!"); + if (N1.getValueType() == VT) return N1; // noop conversion. + break; + case ISD::AssertSext: + case ISD::AssertZext: { + MVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (VT == EVT) return N1; // noop assertion. + break; + } + case ISD::SIGN_EXTEND_INREG: { + MVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (EVT == VT) return N1; // Not actually extending + + if (N1C) { + APInt Val = N1C->getAPIntValue(); + unsigned FromBits = cast<VTSDNode>(N2)->getVT().getSizeInBits(); + Val <<= Val.getBitWidth()-FromBits; + Val = Val.ashr(Val.getBitWidth()-FromBits); + return getConstant(Val, VT); + } + break; + } + case ISD::EXTRACT_VECTOR_ELT: + // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. + if (N1.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is + // expanding copies of large vectors from registers. + if (N2C && + N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0) { + unsigned Factor = + N1.getOperand(0).getValueType().getVectorNumElements(); + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + N1.getOperand(N2C->getZExtValue() / Factor), + getConstant(N2C->getZExtValue() % Factor, + N2.getValueType())); + } + + // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is + // expanding large vector constants. + if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { + SDValue Elt = N1.getOperand(N2C->getZExtValue()); + if (Elt.getValueType() != VT) { + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + assert(VT.isInteger() && Elt.getValueType().isInteger() && + VT.bitsLE(Elt.getValueType()) && + "Bad type for BUILD_VECTOR operand"); + Elt = getNode(ISD::TRUNCATE, DL, VT, Elt); + } + return Elt; + } + + // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector + // operations are lowered to scalars. + if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { + // If the indices are the same, return the inserted element. + if (N1.getOperand(2) == N2) + return N1.getOperand(1); + // If the indices are known different, extract the element from + // the original vector. + else if (isa<ConstantSDNode>(N1.getOperand(2)) && + isa<ConstantSDNode>(N2)) + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); + } + break; + case ISD::EXTRACT_ELEMENT: + assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); + assert(!N1.getValueType().isVector() && !VT.isVector() && + (N1.getValueType().isInteger() == VT.isInteger()) && + "Wrong types for EXTRACT_ELEMENT!"); + + // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding + // 64-bit integers into 32-bit parts. Instead of building the extract of + // the BUILD_PAIR, only to have legalize rip it apart, just do it now. + if (N1.getOpcode() == ISD::BUILD_PAIR) + return N1.getOperand(N2C->getZExtValue()); + + // EXTRACT_ELEMENT of a constant int is also very common. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + unsigned ElementSize = VT.getSizeInBits(); + unsigned Shift = ElementSize * N2C->getZExtValue(); + APInt ShiftedVal = C->getAPIntValue().lshr(Shift); + return getConstant(ShiftedVal.trunc(ElementSize), VT); + } + break; + case ISD::EXTRACT_SUBVECTOR: + if (N1.getValueType() == VT) // Trivial extraction. + return N1; + break; + } + + if (N1C) { + if (N2C) { + SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C); + if (SV.getNode()) return SV; + } else { // Cannonicalize constant to RHS if commutative + if (isCommutativeBinOp(Opcode)) { + std::swap(N1C, N2C); + std::swap(N1, N2); + } + } + } + + // Constant fold FP operations. + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); + if (N1CFP) { + if (!N2CFP && isCommutativeBinOp(Opcode)) { + // Cannonicalize constant to RHS if commutative + std::swap(N1CFP, N2CFP); + std::swap(N1, N2); + } else if (N2CFP && VT != MVT::ppcf128) { + APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); + APFloat::opStatus s; + switch (Opcode) { + case ISD::FADD: + s = V1.add(V2, APFloat::rmNearestTiesToEven); + if (s != APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FSUB: + s = V1.subtract(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FMUL: + s = V1.multiply(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FDIV: + s = V1.divide(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + return getConstantFP(V1, VT); + break; + case ISD::FREM : + s = V1.mod(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + return getConstantFP(V1, VT); + break; + case ISD::FCOPYSIGN: + V1.copySign(V2); + return getConstantFP(V1, VT); + default: break; + } + } + } + + // Canonicalize an UNDEF to the RHS, even over a constant. + if (N1.getOpcode() == ISD::UNDEF) { + if (isCommutativeBinOp(Opcode)) { + std::swap(N1, N2); + } else { + switch (Opcode) { + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: + case ISD::SUB: + case ISD::FSUB: + case ISD::FDIV: + case ISD::FREM: + case ISD::SRA: + return N1; // fold op(undef, arg2) -> undef + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, VT); // fold op(undef, arg2) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N2; + } + } + } + + // Fold a bunch of operators when the RHS is undef. + if (N2.getOpcode() == ISD::UNDEF) { + switch (Opcode) { + case ISD::XOR: + if (N1.getOpcode() == ISD::UNDEF) + // Handle undef ^ undef -> 0 special case. This is a common + // idiom (misuse). + return getConstant(0, VT); + // fallthrough + case ISD::ADD: + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUB: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + return N2; // fold op(arg1, undef) -> undef + case ISD::MUL: + case ISD::AND: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, VT); // fold op(arg1, undef) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N1; + case ISD::OR: + if (!VT.isVector()) + return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + // For vectors, we can't easily build an all one vector, just return + // the LHS. + return N1; + case ISD::SRA: + return N1; + } + } + + // Memoize this node if possible. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { + SDValue Ops[] = { N1, N2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + N = NodeAllocator.Allocate<BinarySDNode>(); + new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate<BinarySDNode>(); + new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2, SDValue N3) { + // Perform various simplifications. + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + switch (Opcode) { + case ISD::CONCAT_VECTORS: + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to + // one big BUILD_VECTOR. + if (N1.getOpcode() == ISD::BUILD_VECTOR && + N2.getOpcode() == ISD::BUILD_VECTOR && + N3.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); + Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end()); + return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + } + break; + case ISD::SETCC: { + // Use FoldSetCC to simplify SETCC's. + SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL); + if (Simp.getNode()) return Simp; + break; + } + case ISD::SELECT: + if (N1C) { + if (N1C->getZExtValue()) + return N2; // select true, X, Y -> X + else + return N3; // select false, X, Y -> Y + } + + if (N2 == N3) return N2; // select C, X, X -> X + break; + case ISD::BRCOND: + if (N2C) { + if (N2C->getZExtValue()) // Unconditional branch + return getNode(ISD::BR, DL, MVT::Other, N1, N3); + else + return N1; // Never-taken branch + } + break; + case ISD::VECTOR_SHUFFLE: + assert(0 && "should use getVectorShuffle constructor!"); + break; + case ISD::BIT_CONVERT: + // Fold bit_convert nodes from a type to themselves. + if (N1.getValueType() == VT) + return N1; + break; + } + + // Memoize node if it doesn't produce a flag. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { + SDValue Ops[] = { N1, N2, N3 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + N = NodeAllocator.Allocate<TernarySDNode>(); + new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate<TernarySDNode>(); + new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + } + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VT, Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VT, Ops, 5); +} + +/// getMemsetValue - Vectorized representation of the memset value +/// operand. +static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG, + DebugLoc dl) { + unsigned NumBits = VT.isVector() ? + VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits(); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { + APInt Val = APInt(NumBits, C->getZExtValue() & 255); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Val = (Val << Shift) | Val; + Shift <<= 1; + } + if (VT.isInteger()) + return DAG.getConstant(Val, VT); + return DAG.getConstantFP(APFloat(Val), VT); + } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Value = DAG.getNode(ISD::OR, dl, VT, + DAG.getNode(ISD::SHL, dl, VT, Value, + DAG.getConstant(Shift, + TLI.getShiftAmountTy())), + Value); + Shift <<= 1; + } + + return Value; +} + +/// getMemsetStringVal - Similar to getMemsetValue. Except this is only +/// used when a memcpy is turned into a memset when the source is a constant +/// string ptr. +static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG, + const TargetLowering &TLI, + std::string &Str, unsigned Offset) { + // Handle vector with all elements zero. + if (Str.empty()) { + if (VT.isInteger()) + return DAG.getConstant(0, VT); + unsigned NumElts = VT.getVectorNumElements(); + MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getConstant(0, MVT::getVectorVT(EltVT, NumElts))); + } + + assert(!VT.isVector() && "Can't handle vector type here!"); + unsigned NumBits = VT.getSizeInBits(); + unsigned MSB = NumBits / 8; + uint64_t Val = 0; + if (TLI.isLittleEndian()) + Offset = Offset + MSB - 1; + for (unsigned i = 0; i != MSB; ++i) { + Val = (Val << 8) | (unsigned char)Str[Offset]; + Offset += TLI.isLittleEndian() ? -1 : 1; + } + return DAG.getConstant(Val, VT); +} + +/// getMemBasePlusOffset - Returns base and offset node for the +/// +static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, + SelectionDAG &DAG) { + MVT VT = Base.getValueType(); + return DAG.getNode(ISD::ADD, Base.getDebugLoc(), + VT, Base, DAG.getConstant(Offset, VT)); +} + +/// isMemSrcFromString - Returns true if memcpy source is a string constant. +/// +static bool isMemSrcFromString(SDValue Src, std::string &Str) { + unsigned SrcDelta = 0; + GlobalAddressSDNode *G = NULL; + if (Src.getOpcode() == ISD::GlobalAddress) + G = cast<GlobalAddressSDNode>(Src); + else if (Src.getOpcode() == ISD::ADD && + Src.getOperand(0).getOpcode() == ISD::GlobalAddress && + Src.getOperand(1).getOpcode() == ISD::Constant) { + G = cast<GlobalAddressSDNode>(Src.getOperand(0)); + SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue(); + } + if (!G) + return false; + + GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); + if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false)) + return true; + + return false; +} + +/// MeetsMaxMemopRequirement - Determines if the number of memory ops required +/// to replace the memset / memcpy is below the threshold. It also returns the +/// types of the sequence of memory ops to perform memset / memcpy. +static +bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps, + SDValue Dst, SDValue Src, + unsigned Limit, uint64_t Size, unsigned &Align, + std::string &Str, bool &isSrcStr, + SelectionDAG &DAG, + const TargetLowering &TLI) { + isSrcStr = isMemSrcFromString(Src, Str); + bool isSrcConst = isa<ConstantSDNode>(Src); + bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(); + MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr); + if (VT != MVT::iAny) { + unsigned NewAlign = (unsigned) + TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT()); + // If source is a string constant, this will require an unaligned load. + if (NewAlign > Align && (isSrcConst || AllowUnalign)) { + if (Dst.getOpcode() != ISD::FrameIndex) { + // Can't change destination alignment. It requires a unaligned store. + if (AllowUnalign) + VT = MVT::iAny; + } else { + int FI = cast<FrameIndexSDNode>(Dst)->getIndex(); + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + if (MFI->isFixedObjectIndex(FI)) { + // Can't change destination alignment. It requires a unaligned store. + if (AllowUnalign) + VT = MVT::iAny; + } else { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI) < NewAlign) + MFI->setObjectAlignment(FI, NewAlign); + Align = NewAlign; + } + } + } + } + + if (VT == MVT::iAny) { + if (AllowUnalign) { + VT = MVT::i64; + } else { + switch (Align & 7) { + case 0: VT = MVT::i64; break; + case 4: VT = MVT::i32; break; + case 2: VT = MVT::i16; break; + default: VT = MVT::i8; break; + } + } + + MVT LVT = MVT::i64; + while (!TLI.isTypeLegal(LVT)) + LVT = (MVT::SimpleValueType)(LVT.getSimpleVT() - 1); + assert(LVT.isInteger()); + + if (VT.bitsGT(LVT)) + VT = LVT; + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned VTSize = VT.getSizeInBits() / 8; + while (VTSize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + if (VT.isVector()) { + VT = MVT::i64; + while (!TLI.isTypeLegal(VT)) + VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); + VTSize = VT.getSizeInBits() / 8; + } else { + VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); + VTSize >>= 1; + } + } + + if (++NumMemOps > Limit) + return false; + MemOps.push_back(VT); + Size -= VTSize; + } + + return true; +} + +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff){ + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Expand memcpy to a series of load and store ops if the size operand falls + // below a certain threshold. + std::vector<MVT> MemOps; + uint64_t Limit = -1ULL; + if (!AlwaysInline) + Limit = TLI.getMaxStoresPerMemcpy(); + unsigned DstAlign = Align; // Destination alignment can change. + std::string Str; + bool CopyFromStr; + if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign, + Str, CopyFromStr, DAG, TLI)) + return SDValue(); + + + bool isZeroStr = CopyFromStr && Str.empty(); + SmallVector<SDValue, 8> OutChains; + unsigned NumMemOps = MemOps.size(); + uint64_t SrcOff = 0, DstOff = 0; + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + if (CopyFromStr && (isZeroStr || !VT.isVector())) { + // It's unlikely a store of a vector immediate can be done in a single + // instruction. It would require a load from a constantpool first. + // We also handle store a vector with all zero's. + // FIXME: Handle other cases where store of vector immediate is done in + // a single instruction. + Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, false, DstAlign); + } else { + Value = DAG.getLoad(VT, dl, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcSV, SrcSVOff + SrcOff, false, Align); + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, false, DstAlign); + } + OutChains.push_back(Store); + SrcOff += VTSize; + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff){ + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Expand memmove to a series of load and store ops if the size operand falls + // below a certain threshold. + std::vector<MVT> MemOps; + uint64_t Limit = -1ULL; + if (!AlwaysInline) + Limit = TLI.getMaxStoresPerMemmove(); + unsigned DstAlign = Align; // Destination alignment can change. + std::string Str; + bool CopyFromStr; + if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign, + Str, CopyFromStr, DAG, TLI)) + return SDValue(); + + uint64_t SrcOff = 0, DstOff = 0; + + SmallVector<SDValue, 8> LoadValues; + SmallVector<SDValue, 8> LoadChains; + SmallVector<SDValue, 8> OutChains; + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + Value = DAG.getLoad(VT, dl, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcSV, SrcSVOff + SrcOff, false, Align); + LoadValues.push_back(Value); + LoadChains.push_back(Value.getValue(1)); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + OutChains.clear(); + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + Store = DAG.getStore(Chain, dl, LoadValues[i], + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, false, DstAlign); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, + const Value *DstSV, uint64_t DstSVOff) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Expand memset to a series of load/store ops if the size operand + // falls below a certain threshold. + std::vector<MVT> MemOps; + std::string Str; + bool CopyFromStr; + if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(), + Size, Align, Str, CopyFromStr, DAG, TLI)) + return SDValue(); + + SmallVector<SDValue, 8> OutChains; + uint64_t DstOff = 0; + + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value = getMemsetValue(Src, VT, DAG, dl); + SDValue Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { + + // Check to see if we should lower the memcpy to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memcpy with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), + Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memcpy with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, + AlwaysInline, + DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + + // If we really need inline code and the target declined to provide it, + // use a (potentially long) sequence of loads and stores. + if (AlwaysInline) { + assert(ConstantSize && "AlwaysInline requires a constant size!"); + return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Align, true, + DstSV, DstSVOff, SrcSV, SrcSVOff); + } + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, false, CallingConv::C, false, + getExternalSymbol("memcpy", TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { + + // Check to see if we should lower the memmove to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memmove with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), + Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memmove with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, + DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, false, CallingConv::C, false, + getExternalSymbol("memmove", TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, + const Value *DstSV, uint64_t DstSVOff) { + + // Check to see if we should lower the memset to stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memset with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), + Align, DstSV, DstSVOff); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memset with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, + DstSV, DstSVOff); + if (Result.getNode()) + return Result; + + // Emit a library call. + const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = IntPtrTy; + Args.push_back(Entry); + // Extend or truncate the argument to be an i32 value for the call. + if (Src.getValueType().bitsGT(MVT::i32)) + Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src); + else + Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true; + Args.push_back(Entry); + Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false; + Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, false, CallingConv::C, false, + getExternalSymbol("memset", TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Cmp, + SDValue Swp, const Value* PtrVal, + unsigned Alignment) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + + MVT VT = Cmp.getValueType(); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(MemVT); + + SDVTList VTs = getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 4); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); + new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, + Chain, Ptr, Cmp, Swp, PtrVal, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Val, + const Value* PtrVal, + unsigned Alignment) { + assert((Opcode == ISD::ATOMIC_LOAD_ADD || + Opcode == ISD::ATOMIC_LOAD_SUB || + Opcode == ISD::ATOMIC_LOAD_AND || + Opcode == ISD::ATOMIC_LOAD_OR || + Opcode == ISD::ATOMIC_LOAD_XOR || + Opcode == ISD::ATOMIC_LOAD_NAND || + Opcode == ISD::ATOMIC_LOAD_MIN || + Opcode == ISD::ATOMIC_LOAD_MAX || + Opcode == ISD::ATOMIC_LOAD_UMIN || + Opcode == ISD::ATOMIC_LOAD_UMAX || + Opcode == ISD::ATOMIC_SWAP) && + "Invalid Atomic Op"); + + MVT VT = Val.getValueType(); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(MemVT); + + SDVTList VTs = getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr, Val}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); + new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, + Chain, Ptr, Val, PtrVal, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +/// getMergeValues - Create a MERGE_VALUES node from the given operands. +/// Allowed to return something different (and simpler) if Simplify is true. +SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, + DebugLoc dl) { + if (NumOps == 1) + return Ops[0]; + + SmallVector<MVT, 4> VTs; + VTs.reserve(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + VTs.push_back(Ops[i].getValueType()); + return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps), + Ops, NumOps); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, + const MVT *VTs, unsigned NumVTs, + const SDValue *Ops, unsigned NumOps, + MVT MemVT, const Value *srcValue, int SVOff, + unsigned Align, bool Vol, + bool ReadMem, bool WriteMem) { + return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, + MemVT, srcValue, SVOff, Align, Vol, + ReadMem, WriteMem); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, + MVT MemVT, const Value *srcValue, int SVOff, + unsigned Align, bool Vol, + bool ReadMem, bool WriteMem) { + // Memoize the node unless it returns a flag. + MemIntrinsicSDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = NodeAllocator.Allocate<MemIntrinsicSDNode>(); + new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, + srcValue, SVOff, Align, Vol, ReadMem, WriteMem); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate<MemIntrinsicSDNode>(); + new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, + srcValue, SVOff, Align, Vol, ReadMem, WriteMem); + } + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs, + bool IsTailCall, bool IsInreg, SDVTList VTs, + const SDValue *Operands, unsigned NumOperands) { + // Do not include isTailCall in the folding set profile. + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::CALL, VTs, Operands, NumOperands); + ID.AddInteger(CallingConv); + ID.AddInteger(IsVarArgs); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + // Instead of including isTailCall in the folding set, we just + // set the flag of the existing node. + if (!IsTailCall) + cast<CallSDNode>(E)->setNotTailCall(); + return SDValue(E, 0); + } + SDNode *N = NodeAllocator.Allocate<CallSDNode>(); + new (N) CallSDNode(CallingConv, dl, IsVarArgs, IsTailCall, IsInreg, + VTs, Operands, NumOperands); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, + ISD::LoadExtType ExtType, MVT VT, SDValue Chain, + SDValue Ptr, SDValue Offset, + const Value *SV, int SVOffset, MVT EVT, + bool isVolatile, unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(VT); + + if (VT == EVT) { + ExtType = ISD::NON_EXTLOAD; + } else if (ExtType == ISD::NON_EXTLOAD) { + assert(VT == EVT && "Non-extending load from different memory type!"); + } else { + // Extending load. + if (VT.isVector()) + assert(EVT.getVectorNumElements() == VT.getVectorNumElements() && + "Invalid vector extload!"); + else + assert(EVT.bitsLT(VT) && + "Should only be an extending load, not truncating!"); + assert((ExtType == ISD::EXTLOAD || VT.isInteger()) && + "Cannot sign/zero extend a FP/Vector load!"); + assert(VT.isInteger() == EVT.isInteger() && + "Cannot convert from FP to Int or Int -> FP!"); + } + + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.getOpcode() == ISD::UNDEF) && + "Unindexed load with an offset!"); + + SDVTList VTs = Indexed ? + getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); + ID.AddInteger(EVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, isVolatile, Alignment)); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<LoadSDNode>(); + new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, EVT, SV, SVOffset, + Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, + const Value *SV, int SVOffset, + bool isVolatile, unsigned Alignment) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef, + SV, SVOffset, VT, isVolatile, Alignment); +} + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT, + SDValue Chain, SDValue Ptr, + const Value *SV, + int SVOffset, MVT EVT, + bool isVolatile, unsigned Alignment) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef, + SV, SVOffset, EVT, isVolatile, Alignment); +} + +SDValue +SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); + assert(LD->getOffset().getOpcode() == ISD::UNDEF && + "Load is already a indexed load!"); + return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(), + LD->getChain(), Base, Offset, LD->getSrcValue(), + LD->getSrcValueOffset(), LD->getMemoryVT(), + LD->isVolatile(), LD->getAlignment()); +} + +SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, const Value *SV, int SVOffset, + bool isVolatile, unsigned Alignment) { + MVT VT = Val.getValueType(); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(VT); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, + isVolatile, Alignment)); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); + new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, + VT, SV, SVOffset, Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, const Value *SV, + int SVOffset, MVT SVT, + bool isVolatile, unsigned Alignment) { + MVT VT = Val.getValueType(); + + if (VT == SVT) + return getStore(Chain, dl, Val, Ptr, SV, SVOffset, isVolatile, Alignment); + + assert(VT.bitsGT(SVT) && "Not a truncation?"); + assert(VT.isInteger() == SVT.isInteger() && + "Can't do FP-INT conversion!"); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(VT); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, + isVolatile, Alignment)); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); + new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, + SVT, SV, SVOffset, Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + StoreSDNode *ST = cast<StoreSDNode>(OrigStore); + assert(ST->getOffset().getOpcode() == ISD::UNDEF && + "Store is already a indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); + new (N) StoreSDNode(Ops, dl, VTs, AM, + ST->isTruncatingStore(), ST->getMemoryVT(), + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getAlignment(), ST->isVolatile()); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getVAArg(MVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, + SDValue SV) { + SDValue Ops[] = { Chain, Ptr, SV }; + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + const SDUse *Ops, unsigned NumOps) { + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + // Copy from an SDUse array into an SDValue array for use with + // the regular getNode logic. + SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps); + return getNode(Opcode, DL, VT, &NewOps[0], NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + const SDValue *Ops, unsigned NumOps) { + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + switch (Opcode) { + default: break; + case ISD::SELECT_CC: { + assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); + assert(Ops[0].getValueType() == Ops[1].getValueType() && + "LHS and RHS of condition must have same type!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "True and False arms of SelectCC must have same type!"); + assert(Ops[2].getValueType() == VT && + "select_cc node must be of same type as true and false value!"); + break; + } + case ISD::BR_CC: { + assert(NumOps == 5 && "BR_CC takes 5 operands!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "LHS/RHS of comparison should match types!"); + break; + } + } + + // Memoize nodes. + SDNode *N; + SDVTList VTs = getVTList(VT); + + if (VT != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); + void *IP = 0; + + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = NodeAllocator.Allocate<SDNode>(); + new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate<SDNode>(); + new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + const std::vector<MVT> &ResultTys, + const SDValue *Ops, unsigned NumOps) { + return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), + Ops, NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + const MVT *VTs, unsigned NumVTs, + const SDValue *Ops, unsigned NumOps) { + if (NumVTs == 1) + return getNode(Opcode, DL, VTs[0], Ops, NumOps); + return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + const SDValue *Ops, unsigned NumOps) { + if (VTList.NumVTs == 1) + return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); + + switch (Opcode) { + // FIXME: figure out how to safely handle things like + // int foo(int x) { return 1 << (x & 255); } + // int bar() { return foo(256); } +#if 0 + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: + case ISD::SHL_PARTS: + if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + else if (N3.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { + // If the and is only masking out bits that cannot effect the shift, + // eliminate the and. + unsigned NumBits = VT.getSizeInBits()*2; + if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + } + break; +#endif + } + + // Memoize the node unless it returns a flag. + SDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + if (NumOps == 1) { + N = NodeAllocator.Allocate<UnarySDNode>(); + new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); + } else if (NumOps == 2) { + N = NodeAllocator.Allocate<BinarySDNode>(); + new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + } else if (NumOps == 3) { + N = NodeAllocator.Allocate<TernarySDNode>(); + new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]); + } else { + N = NodeAllocator.Allocate<SDNode>(); + new (N) SDNode(Opcode, DL, VTList, Ops, NumOps); + } + CSEMap.InsertNode(N, IP); + } else { + if (NumOps == 1) { + N = NodeAllocator.Allocate<UnarySDNode>(); + new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); + } else if (NumOps == 2) { + N = NodeAllocator.Allocate<BinarySDNode>(); + new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + } else if (NumOps == 3) { + N = NodeAllocator.Allocate<TernarySDNode>(); + new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]); + } else { + N = NodeAllocator.Allocate<SDNode>(); + new (N) SDNode(Opcode, DL, VTList, Ops, NumOps); + } + } + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) { + return getNode(Opcode, DL, VTList, 0, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1) { + SDValue Ops[] = { N1 }; + return getNode(Opcode, DL, VTList, Ops, 1); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2) { + SDValue Ops[] = { N1, N2 }; + return getNode(Opcode, DL, VTList, Ops, 2); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3) { + SDValue Ops[] = { N1, N2, N3 }; + return getNode(Opcode, DL, VTList, Ops, 3); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VTList, Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VTList, Ops, 5); +} + +SDVTList SelectionDAG::getVTList(MVT VT) { + return makeVTList(SDNode::getValueTypeList(VT), 1); +} + +SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) + return *I; + + MVT *Array = Allocator.Allocate<MVT>(2); + Array[0] = VT1; + Array[1] = VT2; + SDVTList Result = makeVTList(Array, 2); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && + I->VTs[2] == VT3) + return *I; + + MVT *Array = Allocator.Allocate<MVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + SDVTList Result = makeVTList(Array, 3); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && + I->VTs[2] == VT3 && I->VTs[3] == VT4) + return *I; + + MVT *Array = Allocator.Allocate<MVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + SDVTList Result = makeVTList(Array, 4); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) { + switch (NumVTs) { + case 0: assert(0 && "Cannot have nodes without results!"); + case 1: return getVTList(VTs[0]); + case 2: return getVTList(VTs[0], VTs[1]); + case 3: return getVTList(VTs[0], VTs[1], VTs[2]); + default: break; + } + + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) { + if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) + continue; + + bool NoMatch = false; + for (unsigned i = 2; i != NumVTs; ++i) + if (VTs[i] != I->VTs[i]) { + NoMatch = true; + break; + } + if (!NoMatch) + return *I; + } + + MVT *Array = Allocator.Allocate<MVT>(NumVTs); + std::copy(VTs, VTs+NumVTs, Array); + SDVTList Result = makeVTList(Array, NumVTs); + VTList.push_back(Result); + return Result; +} + + +/// UpdateNodeOperands - *Mutate* the specified node in-place to have the +/// specified operands. If the resultant node already exists in the DAG, +/// this does not modify the specified node, instead it returns the node that +/// already exists. If the resultant node does not exist in the DAG, the +/// input node is returned. As a degenerate case, if you specify the same +/// input operands as the node already has, the input node is returned. +SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) { + SDNode *N = InN.getNode(); + assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op == N->getOperand(0)) return InN; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) + return SDValue(Existing, InN.getResNo()); + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + N->OperandList[0].set(Op); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) { + SDNode *N = InN.getNode(); + assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) + return InN; // No operands changed, just return the input node. + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) + return SDValue(Existing, InN.getResNo()); + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + if (N->OperandList[0] != Op1) + N->OperandList[0].set(Op1); + if (N->OperandList[1] != Op2) + N->OperandList[1].set(Op2); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) { + SDValue Ops[] = { Op1, Op2, Op3 }; + return UpdateNodeOperands(N, Ops, 3); +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4) { + SDValue Ops[] = { Op1, Op2, Op3, Op4 }; + return UpdateNodeOperands(N, Ops, 4); +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4, SDValue Op5) { + SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; + return UpdateNodeOperands(N, Ops, 5); +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { + SDNode *N = InN.getNode(); + assert(N->getNumOperands() == NumOps && + "Update with wrong number of operands"); + + // Check to see if there is no change. + bool AnyChange = false; + for (unsigned i = 0; i != NumOps; ++i) { + if (Ops[i] != N->getOperand(i)) { + AnyChange = true; + break; + } + } + + // No operands changed, just return the input node. + if (!AnyChange) return InN; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) + return SDValue(Existing, InN.getResNo()); + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + for (unsigned i = 0; i != NumOps; ++i) + if (N->OperandList[i] != Ops[i]) + N->OperandList[i].set(Ops[i]); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + +/// DropOperands - Release the operands and set this node to have +/// zero operands. +void SDNode::DropOperands() { + // Unlike the code in MorphNodeTo that does this, we don't need to + // watch for dead nodes here. + for (op_iterator I = op_begin(), E = op_end(); I != E; ) { + SDUse &Use = *I++; + Use.set(SDValue()); + } +} + +/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a +/// machine opcode. +/// +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, 0, 0); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, MVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, MVT VT3, MVT VT4, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, + SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, MVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + SDVTList VTs, const SDValue *Ops, + unsigned NumOps) { + return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT) { + SDVTList VTs = getVTList(VT); + return MorphNodeTo(N, Opc, VTs, 0, 0); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return MorphNodeTo(N, Opc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return MorphNodeTo(N, Opc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return MorphNodeTo(N, Opc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return MorphNodeTo(N, Opc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return MorphNodeTo(N, Opc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, MVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return MorphNodeTo(N, Opc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, + SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return MorphNodeTo(N, Opc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return MorphNodeTo(N, Opc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return MorphNodeTo(N, Opc, VTs, Ops, 3); +} + +/// MorphNodeTo - These *mutate* the specified node to have the specified +/// return type, opcode, and operands. +/// +/// Note that MorphNodeTo returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. Note that the DebugLoc need not be the same. +/// +/// Using MorphNodeTo is faster than creating a new node and swapping it in +/// with ReplaceAllUsesWith both because it often avoids allocating a new +/// node, and because it doesn't require CSE recalculation for any of +/// the node's users. +/// +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + SDVTList VTs, const SDValue *Ops, + unsigned NumOps) { + // If an identical node already exists, use it. + void *IP = 0; + if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return ON; + } + + if (!RemoveNodeFromCSEMaps(N)) + IP = 0; + + // Start the morphing. + N->NodeType = Opc; + N->ValueList = VTs.VTs; + N->NumValues = VTs.NumVTs; + + // Clear the operands list, updating used nodes to remove this from their + // use list. Keep track of any operands that become dead as a result. + SmallPtrSet<SDNode*, 16> DeadNodeSet; + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Used = Use.getNode(); + Use.set(SDValue()); + if (Used->use_empty()) + DeadNodeSet.insert(Used); + } + + // If NumOps is larger than the # of operands we currently have, reallocate + // the operand list. + if (NumOps > N->NumOperands) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + + if (N->isMachineOpcode()) { + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + N->OperandList = OperandAllocator.Allocate<SDUse>(NumOps); + N->OperandsNeedDelete = false; + } else { + N->OperandList = new SDUse[NumOps]; + N->OperandsNeedDelete = true; + } + } + + // Assign the new operands. + N->NumOperands = NumOps; + for (unsigned i = 0, e = NumOps; i != e; ++i) { + N->OperandList[i].setUser(N); + N->OperandList[i].setInitial(Ops[i]); + } + + // Delete any nodes that are still dead after adding the uses for the + // new operands. + SmallVector<SDNode *, 16> DeadNodes; + for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(), + E = DeadNodeSet.end(); I != E; ++I) + if ((*I)->use_empty()) + DeadNodes.push_back(*I); + RemoveDeadNodes(DeadNodes); + + if (IP) + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + + +/// getTargetNode - These are used for target selectors to create a new node +/// with specified return type(s), target opcode, and operands. +/// +/// Note that getTargetNode returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT) { + return getNode(~Opcode, dl, VT).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + SDValue Op1) { + return getNode(~Opcode, dl, VT, Op1).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + SDValue Op1, SDValue Op2) { + return getNode(~Opcode, dl, VT, Op1, Op2).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + SDValue Op1, SDValue Op2, + SDValue Op3) { + return getNode(~Opcode, dl, VT, Op1, Op2, Op3).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + const SDValue *Ops, unsigned NumOps) { + return getNode(~Opcode, dl, VT, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Op; + return getNode(~Opcode, dl, VTs, &Op, 0).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + return getNode(~Opcode, dl, VTs, &Op1, 1).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return getNode(~Opcode, dl, VTs, Ops, 2).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getNode(~Opcode, dl, VTs, Ops, 3).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, MVT VT3, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2 }; + return getNode(~Opcode, dl, VTs, Ops, 2).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, MVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getNode(~Opcode, dl, VTs, Ops, 3).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, MVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, MVT VT3, MVT VT4, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + const std::vector<MVT> &ResultTys, + const SDValue *Ops, unsigned NumOps) { + return getNode(~Opcode, dl, ResultTys, Ops, NumOps).getNode(); +} + +/// getNodeIfExists - Get the specified node if it's already available, or +/// else return NULL. +SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, + const SDValue *Ops, unsigned NumOps) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return E; + } + return NULL; +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes From has a single result value. +/// +void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, + DAGUpdateListener *UpdateListener) { + SDNode *From = FromN.getNode(); + assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && + "Cannot replace with this method!"); + assert(From != To.getNode() && "Cannot replace uses of with self"); + + // Iterate over all the existing uses of From. New uses will be added + // to the beginning of the use list, which we avoid visiting. + // This specifically avoids visiting uses of From that arise while the + // replacement is happening, because any such uses would be the result + // of CSE: If an existing node looks like From after one of its operands + // is replaced by To, we don't want to replace of all its users with To + // too. See PR3018 for more info. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes that for each value of From, there is a +/// corresponding value in To in the same position with the same type. +/// +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, + DAGUpdateListener *UpdateListener) { +#ifndef NDEBUG + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + assert((!From->hasAnyUseOfValue(i) || + From->getValueType(i) == To->getValueType(i)) && + "Cannot use this version of ReplaceAllUsesWith!"); +#endif + + // Handle the trivial case. + if (From == To) + return; + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.setNode(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version can replace From with any result values. To must match the +/// number and types of values returned by From. +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, + const SDValue *To, + DAGUpdateListener *UpdateListener) { + if (From->getNumValues() == 1) // Handle the simple case efficiently. + return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener); + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + const SDValue &ToOp = To[Use.getResNo()]; + ++UI; + Use.set(ToOp); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The Deleted +/// vector is handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, + DAGUpdateListener *UpdateListener){ + // Handle the really simple, really trivial case efficiently. + if (From == To) return; + + // Handle the simple, trivial, case efficiently. + if (From.getNode()->getNumValues() == 1) { + ReplaceAllUsesWith(From, To, UpdateListener); + return; + } + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From.getNode()->use_begin(), + UE = From.getNode()->use_end(); + while (UI != UE) { + SDNode *User = *UI; + bool UserRemovedFromCSEMaps = false; + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + + // Skip uses of different values from the same node. + if (Use.getResNo() != From.getResNo()) { + ++UI; + continue; + } + + // If this node hasn't been modified yet, it's still in the CSE maps, + // so remove its old self from the CSE maps. + if (!UserRemovedFromCSEMaps) { + RemoveNodeFromCSEMaps(User); + UserRemovedFromCSEMaps = true; + } + + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // We are iterating over all uses of the From node, so if a use + // doesn't use the specific value, no changes are made. + if (!UserRemovedFromCSEMaps) + continue; + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +namespace { + /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith + /// to record information about a use. + struct UseMemo { + SDNode *User; + unsigned Index; + SDUse *Use; + }; + + /// operator< - Sort Memos by User. + bool operator<(const UseMemo &L, const UseMemo &R) { + return (intptr_t)L.User < (intptr_t)R.User; + } +} + +/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The same value +/// may appear in both the From and To list. The Deleted vector is +/// handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, + const SDValue *To, + unsigned Num, + DAGUpdateListener *UpdateListener){ + // Handle the simple, trivial case efficiently. + if (Num == 1) + return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener); + + // Read up all the uses and make records of them. This helps + // processing new uses that are introduced during the + // replacement process. + SmallVector<UseMemo, 4> Uses; + for (unsigned i = 0; i != Num; ++i) { + unsigned FromResNo = From[i].getResNo(); + SDNode *FromNode = From[i].getNode(); + for (SDNode::use_iterator UI = FromNode->use_begin(), + E = FromNode->use_end(); UI != E; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == FromResNo) { + UseMemo Memo = { *UI, i, &Use }; + Uses.push_back(Memo); + } + } + } + + // Sort the uses, so that all the uses from a given User are together. + std::sort(Uses.begin(), Uses.end()); + + for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); + UseIndex != UseIndexEnd; ) { + // We know that this user uses some value of From. If it is the right + // value, update it. + SDNode *User = Uses[UseIndex].User; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // The Uses array is sorted, so all the uses for a given User + // are next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + unsigned i = Uses[UseIndex].Index; + SDUse &Use = *Uses[UseIndex].Use; + ++UseIndex; + + Use.set(To[i]); + } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG +/// based on their topological order. It returns the maximum id and a vector +/// of the SDNodes* in assigned order by reference. +unsigned SelectionDAG::AssignTopologicalOrder() { + + unsigned DAGSize = 0; + + // SortedPos tracks the progress of the algorithm. Nodes before it are + // sorted, nodes after it are unsorted. When the algorithm completes + // it is at the end of the list. + allnodes_iterator SortedPos = allnodes_begin(); + + // Visit all the nodes. Move nodes with no operands to the front of + // the list immediately. Annotate nodes that do have operands with their + // operand count. Before we do this, the Node Id fields of the nodes + // may contain arbitrary values. After, the Node Id fields for nodes + // before SortedPos will contain the topological sort index, and the + // Node Id fields for nodes At SortedPos and after will contain the + // count of outstanding operands. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { + SDNode *N = I++; + unsigned Degree = N->getNumOperands(); + if (Degree == 0) { + // A node with no uses, add it to the result array immediately. + N->setNodeId(DAGSize++); + allnodes_iterator Q = N; + if (Q != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); + ++SortedPos; + } else { + // Temporarily use the Node Id as scratch space for the degree count. + N->setNodeId(Degree); + } + } + + // Visit all the nodes. As we iterate, moves nodes into sorted order, + // such that by the time the end is reached all nodes will be sorted. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { + SDNode *N = I; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *P = *UI; + unsigned Degree = P->getNodeId(); + --Degree; + if (Degree == 0) { + // All of P's operands are sorted, so P may sorted now. + P->setNodeId(DAGSize++); + if (P != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); + ++SortedPos; + } else { + // Update P's outstanding operand count. + P->setNodeId(Degree); + } + } + } + + assert(SortedPos == AllNodes.end() && + "Topological sort incomplete!"); + assert(AllNodes.front().getOpcode() == ISD::EntryToken && + "First node in topological sort is not the entry token!"); + assert(AllNodes.front().getNodeId() == 0 && + "First node in topological sort has non-zero id!"); + assert(AllNodes.front().getNumOperands() == 0 && + "First node in topological sort has operands!"); + assert(AllNodes.back().getNodeId() == (int)DAGSize-1 && + "Last node in topologic sort has unexpected id!"); + assert(AllNodes.back().use_empty() && + "Last node in topologic sort has users!"); + assert(DAGSize == allnodes_size() && "Node count mismatch!"); + return DAGSize; +} + + + +//===----------------------------------------------------------------------===// +// SDNode Class +//===----------------------------------------------------------------------===// + +HandleSDNode::~HandleSDNode() { + DropOperands(); +} + +GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA, + MVT VT, int64_t o) + : SDNode(isa<GlobalVariable>(GA) && + cast<GlobalVariable>(GA)->isThreadLocal() ? + // Thread Local + (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) : + // Non Thread Local + (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress), + DebugLoc::getUnknownLoc(), getSDVTList(VT)), Offset(o) { + TheGlobal = const_cast<GlobalValue*>(GA); +} + +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT memvt, + const Value *srcValue, int SVO, + unsigned alignment, bool vol) + : SDNode(Opc, dl, VTs), MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment); + assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!"); + assert(getAlignment() == alignment && "Alignment representation error!"); + assert(isVolatile() == vol && "Volatile representation error!"); +} + +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, + const SDValue *Ops, + unsigned NumOps, MVT memvt, const Value *srcValue, + int SVO, unsigned alignment, bool vol) + : SDNode(Opc, dl, VTs, Ops, NumOps), + MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment); + assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!"); + assert(getAlignment() == alignment && "Alignment representation error!"); + assert(isVolatile() == vol && "Volatile representation error!"); +} + +/// getMemOperand - Return a MachineMemOperand object describing the memory +/// reference performed by this memory reference. +MachineMemOperand MemSDNode::getMemOperand() const { + int Flags = 0; + if (isa<LoadSDNode>(this)) + Flags = MachineMemOperand::MOLoad; + else if (isa<StoreSDNode>(this)) + Flags = MachineMemOperand::MOStore; + else if (isa<AtomicSDNode>(this)) { + Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + } + else { + const MemIntrinsicSDNode* MemIntrinNode = dyn_cast<MemIntrinsicSDNode>(this); + assert(MemIntrinNode && "Unknown MemSDNode opcode!"); + if (MemIntrinNode->readMem()) Flags |= MachineMemOperand::MOLoad; + if (MemIntrinNode->writeMem()) Flags |= MachineMemOperand::MOStore; + } + + int Size = (getMemoryVT().getSizeInBits() + 7) >> 3; + if (isVolatile()) Flags |= MachineMemOperand::MOVolatile; + + // Check if the memory reference references a frame index + const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(getBasePtr().getNode()); + if (!getSrcValue() && FI) + return MachineMemOperand(PseudoSourceValue::getFixedStack(FI->getIndex()), + Flags, 0, Size, getAlignment()); + else + return MachineMemOperand(getSrcValue(), Flags, getSrcValueOffset(), + Size, getAlignment()); +} + +/// Profile - Gather unique data for the node. +/// +void SDNode::Profile(FoldingSetNodeID &ID) const { + AddNodeIDNode(ID, this); +} + +/// getValueTypeList - Return a pointer to the specified value type. +/// +const MVT *SDNode::getValueTypeList(MVT VT) { + if (VT.isExtended()) { + static std::set<MVT, MVT::compareRawBits> EVTs; + return &(*EVTs.insert(VT).first); + } else { + static MVT VTs[MVT::LAST_VALUETYPE]; + VTs[VT.getSimpleVT()] = VT; + return &VTs[VT.getSimpleVT()]; + } +} + +/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the +/// indicated value. This method ignores uses of other values defined by this +/// operation. +bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + // TODO: Only iterate over uses of a given value of the node + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { + if (UI.getUse().getResNo() == Value) { + if (NUses == 0) + return false; + --NUses; + } + } + + // Found exactly the right number of uses? + return NUses == 0; +} + + +/// hasAnyUseOfValue - Return true if there are any use of the indicated +/// value. This method ignores uses of other values defined by this operation. +bool SDNode::hasAnyUseOfValue(unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) + if (UI.getUse().getResNo() == Value) + return true; + + return false; +} + + +/// isOnlyUserOf - Return true if this node is the only use of N. +/// +bool SDNode::isOnlyUserOf(SDNode *N) const { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (User == this) + Seen = true; + else + return false; + } + + return Seen; +} + +/// isOperand - Return true if this node is an operand of N. +/// +bool SDValue::isOperandOf(SDNode *N) const { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (*this == N->getOperand(i)) + return true; + return false; +} + +bool SDNode::isOperandOf(SDNode *N) const { + for (unsigned i = 0, e = N->NumOperands; i != e; ++i) + if (this == N->OperandList[i].getNode()) + return true; + return false; +} + +/// reachesChainWithoutSideEffects - Return true if this operand (which must +/// be a chain) reaches the specified operand without crossing any +/// side-effecting instructions. In practice, this looks through token +/// factors and non-volatile loads. In order to remain efficient, this only +/// looks a couple of nodes in, it does not do an exhaustive search. +bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, + unsigned Depth) const { + if (*this == Dest) return true; + + // Don't search too deeply, we just want to be able to see through + // TokenFactor's etc. + if (Depth == 0) return false; + + // If this is a token factor, all inputs to the TF happen in parallel. If any + // of the operands of the TF reach dest, then we can do the xform. + if (getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) + return true; + return false; + } + + // Loads don't have side effects, look through them. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { + if (!Ld->isVolatile()) + return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); + } + return false; +} + + +static void findPredecessor(SDNode *N, const SDNode *P, bool &found, + SmallPtrSet<SDNode *, 32> &Visited) { + if (found || !Visited.insert(N)) + return; + + for (unsigned i = 0, e = N->getNumOperands(); !found && i != e; ++i) { + SDNode *Op = N->getOperand(i).getNode(); + if (Op == P) { + found = true; + return; + } + findPredecessor(Op, P, found, Visited); + } +} + +/// isPredecessorOf - Return true if this node is a predecessor of N. This node +/// is either an operand of N or it can be reached by recursively traversing +/// up the operands. +/// NOTE: this is an expensive method. Use it carefully. +bool SDNode::isPredecessorOf(SDNode *N) const { + SmallPtrSet<SDNode *, 32> Visited; + bool found = false; + findPredecessor(N, this, found, Visited); + return found; +} + +uint64_t SDNode::getConstantOperandVal(unsigned Num) const { + assert(Num < NumOperands && "Invalid child # of SDNode!"); + return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); +} + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<<Unknown DAG Node>>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->get(getMachineOpcode()).getName(); + return "<<Unknown Machine Node>>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<<Unknown Target Node>>"; + } + return "<<Unknown Node>>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: + return "<<Deleted Node!>>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MEMOPERAND: return "MemOperand"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::ARG_FLAGS: return "ArgFlags"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + + case ISD::Constant: return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::EHSELECTION: return "EHSELECTION"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IID = cast<ConstantSDNode>(getOperand(0))->getZExtValue(); + return Intrinsic::getName((Intrinsic::ID)IID); + } + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned IID = cast<ConstantSDNode>(getOperand(1))->getZExtValue(); + return Intrinsic::getName((Intrinsic::ID)IID); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstantFP:return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::DBG_LABEL: return "dbg_label"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::DECLARE: return "declare"; + case ISD::HANDLENODE: return "handlenode"; + case ISD::FORMAL_ARGUMENTS: return "formal_arguments"; + case ISD::CALL: return "call"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FPOWI: return "fpowi"; + case ISD::FPOW: return "fpow"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::FRINT: return "frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + + case ISD::SETCC: return "setcc"; + case ISD::VSETCC: return "vsetcc"; + case ISD::SELECT: return "select"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BIT_CONVERT: return "bit_convert"; + + case ISD::CONVERT_RNDSAT: { + switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { + default: assert(0 && "Unknown cvt code!"); + case ISD::CVT_FF: return "cvt_ff"; + case ISD::CVT_FS: return "cvt_fs"; + case ISD::CVT_FU: return "cvt_fu"; + case ISD::CVT_SF: return "cvt_sf"; + case ISD::CVT_UF: return "cvt_uf"; + case ISD::CVT_SS: return "cvt_ss"; + case ISD::CVT_SU: return "cvt_su"; + case ISD::CVT_US: return "cvt_us"; + case ISD::CVT_UU: return "cvt_uu"; + } + } + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::RET: return "ret"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + + // Bit manipulation + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTLZ: return "ctlz"; + + // Debug info + case ISD::DBG_STOPPOINT: return "dbg_stoppoint"; + case ISD::DEBUG_LOC: return "debug_loc"; + + // Trampolines + case ISD::TRAMPOLINE: return "trampoline"; + + case ISD::CONDCODE: + switch (cast<CondCodeSDNode>(this)->get()) { + default: assert(0 && "Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setue"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: + return ""; + case ISD::PRE_INC: + return "<pre-inc>"; + case ISD::PRE_DEC: + return "<pre-dec>"; + case ISD::POST_INC: + return "<post-inc>"; + case ISD::POST_DEC: + return "<post-dec>"; + } +} + +std::string ISD::ArgFlagsTy::getArgFlagsString() { + std::string S = "< "; + + if (isZExt()) + S += "zext "; + if (isSExt()) + S += "sext "; + if (isInReg()) + S += "inreg "; + if (isSRet()) + S += "sret "; + if (isByVal()) + S += "byval "; + if (isNest()) + S += "nest "; + if (getByValAlign()) + S += "byval-align:" + utostr(getByValAlign()) + " "; + if (getOrigAlign()) + S += "orig-align:" + utostr(getOrigAlign()) + " "; + if (getByValSize()) + S += "byval-size:" + utostr(getByValSize()) + " "; + return S + ">"; +} + +void SDNode::dump() const { dump(0); } +void SDNode::dump(const SelectionDAG *G) const { + print(errs(), G); +} + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + OS << (void*)this << ": "; + + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getMVTString(); + } + OS << " = " << getOperationName(G); +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(this); + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } + + if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "<APFloat("; + CSDN->getValueAPF().bitcastToAPInt().dump(); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + WriteAsOperand(OS, GADN->getGlobal()); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { + OS << "<" << JTDN->getIndex() << ">"; + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { + if (G && R->getReg() && + TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + OS << " " << G->getTarget().getRegisterInfo()->getName(R->getReg()); + } else { + OS << " #" << R->getReg(); + } + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(this)) { + OS << "'" << ES->getSymbol() << "'"; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << "<null>"; + } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(this)) { + if (M->MO.getValue()) + OS << "<" << M->MO.getValue() << ":" << M->MO.getOffset() << ">"; + else + OS << "<null:" << M->MO.getOffset() << ">"; + } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(this)) { + OS << N->getArgFlags().getArgFlagsString(); + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { + OS << ":" << N->getVT().getMVTString(); + } + else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { + const Value *SrcValue = LD->getSrcValue(); + int SrcOffset = LD->getSrcValueOffset(); + OS << " <"; + if (SrcValue) + OS << SrcValue; + else + OS << "null"; + OS << ":" << SrcOffset << ">"; + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << " <anyext "; break; + case ISD::SEXTLOAD: OS << " <sext "; break; + case ISD::ZEXTLOAD: OS << " <zext "; break; + } + if (doExt) + OS << LD->getMemoryVT().getMVTString() << ">"; + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << " " << AM; + if (LD->isVolatile()) + OS << " <volatile>"; + OS << " alignment=" << LD->getAlignment(); + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { + const Value *SrcValue = ST->getSrcValue(); + int SrcOffset = ST->getSrcValueOffset(); + OS << " <"; + if (SrcValue) + OS << SrcValue; + else + OS << "null"; + OS << ":" << SrcOffset << ">"; + + if (ST->isTruncatingStore()) + OS << " <trunc " << ST->getMemoryVT().getMVTString() << ">"; + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << " " << AM; + if (ST->isVolatile()) + OS << " <volatile>"; + OS << " alignment=" << ST->getAlignment(); + } else if (const AtomicSDNode* AT = dyn_cast<AtomicSDNode>(this)) { + const Value *SrcValue = AT->getSrcValue(); + int SrcOffset = AT->getSrcValueOffset(); + OS << " <"; + if (SrcValue) + OS << SrcValue; + else + OS << "null"; + OS << ":" << SrcOffset << ">"; + if (AT->isVolatile()) + OS << " <volatile>"; + OS << " alignment=" << AT->getAlignment(); + } +} + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + OS << " "; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; + OS << (void*)getOperand(i).getNode(); + if (unsigned RN = getOperand(i).getResNo()) + OS << ":" << RN; + } + print_details(OS, G); +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getNode()->hasOneUse()) + DumpNodes(N->getOperand(i).getNode(), indent+2, G); + else + cerr << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": <multiple use>"; + + + cerr << "\n" << std::string(indent, ' '); + N->dump(G); +} + +void SelectionDAG::dump() const { + cerr << "SelectionDAG has " << AllNodes.size() << " nodes:"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = I; + if (!N->hasOneUse() && N != getRoot().getNode()) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + + cerr << "\n\n"; +} + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + print_details(OS, G); +} + +typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N)) // If we've been here before, return now. + return; + // Dump the current SDNode, but don't end the line yet. + OS << std::string(indent, ' '); + N->printr(OS, G); + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + if (i) OS << ","; + OS << " "; + if (child->getNumOperands() == 0) { + // This child has no grandchildren; print it inline right here. + child->printr(OS, G); + once.insert(child); + } else { // Just the address. FIXME: also print the child's opcode + OS << (void*)child; + if (unsigned RN = N->getOperand(i).getResNo()) + OS << ":" << RN; + } + } + OS << "\n"; + // Dump children that have grandchildren on their own line(s). + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + DumpNodesr(OS, child, indent+2, G, once); + } +} + +void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(errs(), this, 0, 0, once); +} + + +// getAddressSpace - Return the address space this GlobalAddress belongs to. +unsigned GlobalAddressSDNode::getAddressSpace() const { + return getGlobal()->getType()->getAddressSpace(); +} + + +const Type *ConstantPoolSDNode::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + +bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, + APInt &SplatUndef, + unsigned &SplatBitSize, + bool &HasAnyUndefs, + unsigned MinSplatBits) { + MVT VT = getValueType(0); + assert(VT.isVector() && "Expected a vector type"); + unsigned sz = VT.getSizeInBits(); + if (MinSplatBits > sz) + return false; + + SplatValue = APInt(sz, 0); + SplatUndef = APInt(sz, 0); + + // Get the bits. Bits with undefined values (when the corresponding element + // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared + // in SplatValue. If any of the values are not constant, give up and return + // false. + unsigned int nOps = getNumOperands(); + assert(nOps > 0 && "isConstantSplat has 0-size build vector"); + unsigned EltBitSize = VT.getVectorElementType().getSizeInBits(); + for (unsigned i = 0; i < nOps; ++i) { + SDValue OpVal = getOperand(i); + unsigned BitPos = i * EltBitSize; + + if (OpVal.getOpcode() == ISD::UNDEF) + SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize); + else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) + SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). + zextOrTrunc(sz) << BitPos); + else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) + SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos; + else + return false; + } + + // The build_vector is all constants or undefs. Find the smallest element + // size that splats the vector. + + HasAnyUndefs = (SplatUndef != 0); + while (sz > 8) { + + unsigned HalfSize = sz / 2; + APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize); + APInt LowValue = APInt(SplatValue).trunc(HalfSize); + APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize); + APInt LowUndef = APInt(SplatUndef).trunc(HalfSize); + + // If the two halves do not match (ignoring undef bits), stop here. + if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || + MinSplatBits > HalfSize) + break; + + SplatValue = HighValue | LowValue; + SplatUndef = HighUndef & LowUndef; + + sz = HalfSize; + } + + SplatBitSize = sz; + return true; +} + +bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) { + // Find the first non-undef value in the shuffle mask. + unsigned i, e; + for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) + /* search */; + + assert(i != e && "VECTOR_SHUFFLE node with all undef indices!"); + + // Make sure all remaining elements are either undef or the same as the first + // non-undef value. + for (int Idx = Mask[i]; i != e; ++i) + if (Mask[i] >= 0 && Mask[i] != Idx) + return false; + return true; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp new file mode 100644 index 0000000..889d7f5 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -0,0 +1,6052 @@ +//===-- SelectionDAGBuild.cpp - Selection-DAG building --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "SelectionDAGBuild.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Constants.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +using namespace llvm; + +/// LimitFloatPrecision - Generate low-precision inline sequences for +/// some float libcalls (6, 8 or 12 bits). +static unsigned LimitFloatPrecision; + +static cl::opt<unsigned, true> +LimitFPPrecision("limit-float-precision", + cl::desc("Generate low-precision inline sequences " + "for some float libcalls"), + cl::location(LimitFloatPrecision), + cl::init(0)); + +/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence +/// of insertvalue or extractvalue indices that identify a member, return +/// the linearized index of the start of the member. +/// +static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, + const unsigned *Indices, + const unsigned *IndicesEnd, + unsigned CurIndex = 0) { + // Base case: We're done. + if (Indices && Indices == IndicesEnd) + return CurIndex; + + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) { + if (Indices && *Indices == unsigned(EI - EB)) + return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); + } + return CurIndex; + } + // Given an array type, recursively traverse the elements. + else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = ATy->getElementType(); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + if (Indices && *Indices == i) + return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); + } + return CurIndex; + } + // We haven't found the type we're looking for, so keep searching. + return CurIndex + 1; +} + +/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of +/// MVTs that represent all the individual underlying +/// non-aggregate types that comprise it. +/// +/// If Offsets is non-null, it points to a vector to be filled in +/// with the in-memory offsets of each of the individual values. +/// +static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, + SmallVectorImpl<MVT> &ValueVTs, + SmallVectorImpl<uint64_t> *Offsets = 0, + uint64_t StartingOffset = 0) { + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) + ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, + StartingOffset + SL->getElementOffset(EI - EB)); + return; + } + // Given an array type, recursively traverse the elements. + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = ATy->getElementType(); + uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty == Type::VoidTy) + return; + // Base case: we can get an MVT for this LLVM IR type. + ValueVTs.push_back(TLI.getValueType(Ty)); + if (Offsets) + Offsets->push_back(StartingOffset); +} + +namespace llvm { + /// RegsForValue - This struct represents the registers (physical or virtual) + /// that a particular set of values is assigned, and the type information about + /// the value. The most common situation is to represent one value at a time, + /// but struct or array values are handled element-wise as multiple values. + /// The splitting of aggregates is performed recursively, so that we never + /// have aggregate-typed registers. The values at this point do not necessarily + /// have legal types, so each value may require one or more registers of some + /// legal type. + /// + struct VISIBILITY_HIDDEN RegsForValue { + /// TLI - The TargetLowering object. + /// + const TargetLowering *TLI; + + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector<MVT, 4> ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector<MVT, 4> RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector<unsigned, 4> Regs; + + RegsForValue() : TLI(0) {} + + RegsForValue(const TargetLowering &tli, + const SmallVector<unsigned, 4> ®s, + MVT regvt, MVT valuevt) + : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + RegsForValue(const TargetLowering &tli, + const SmallVector<unsigned, 4> ®s, + const SmallVector<MVT, 4> ®vts, + const SmallVector<MVT, 4> &valuevts) + : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} + RegsForValue(const TargetLowering &tli, + unsigned Reg, const Type *Ty) : TLI(&tli) { + ComputeValueVTs(tli, Ty, ValueVTs); + + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + MVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(ValueVT); + MVT RegisterVT = TLI->getRegisterType(ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } + } + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + TLI = RHS.TLI; + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the + /// specified value into the registers specified by this object. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Code, + bool HasMatching, unsigned MatchingIdx, + SelectionDAG &DAG, std::vector<SDValue> &Ops) const; + }; +} + +/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by +/// PHI nodes or outside of the basic block that defines it, or used by a +/// switch or atomic instruction, which may expand to multiple basic blocks. +static bool isUsedOutsideOfDefiningBlock(Instruction *I) { + if (isa<PHINode>(I)) return true; + BasicBlock *BB = I->getParent(); + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) + if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI)) + return true; + return false; +} + +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) { + // With FastISel active, we may be splitting blocks, so force creation + // of virtual registers for all non-dead arguments. + // Don't force virtual registers for byval arguments though, because + // fast-isel can't handle those in all cases. + if (EnableFastISel && !A->hasByValAttr()) + return A->use_empty(); + + BasicBlock *Entry = A->getParent()->begin(); + for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI) + if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI)) + return false; // Use not in entry block. + return true; +} + +FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli) + : TLI(tli) { +} + +void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, + SelectionDAG &DAG, + bool EnableFastISel) { + Fn = &fn; + MF = &mf; + RegInfo = &MF->getRegInfo(); + + // Create a vreg for each argument register that is not dead and is used + // outside of the entry block for the function. + for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); + AI != E; ++AI) + if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) + InitializeRegForValue(AI); + + // Initialize the mapping of values to registers. This is only set up for + // instruction values that are used outside of the block that defines + // them. + Function::iterator BB = Fn->begin(), EB = Fn->end(); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { + const Type *Ty = AI->getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + AI->getAlignment()); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + StaticAllocaMap[AI] = + MF->getFrameInfo()->CreateStackObject(TySize, Align); + } + + for (; BB != EB; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I)) + if (!isa<AllocaInst>(I) || + !StaticAllocaMap.count(cast<AllocaInst>(I))) + InitializeRegForValue(I); + + // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This + // also creates the initial PHI MachineInstrs, though none of the input + // operands are populated. + for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) { + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); + MBBMap[BB] = MBB; + MF->push_back(MBB); + + // Create Machine PHI nodes for LLVM PHI nodes, lowering them as + // appropriate. + PHINode *PN; + DebugLoc DL; + for (BasicBlock::iterator + I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (Function *F = CI->getCalledFunction()) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::dbg_stoppoint: { + DbgStopPointInst *SPI = cast<DbgStopPointInst>(I); + + if (DIDescriptor::ValidDebugInfo(SPI->getContext(), + CodeGenOpt::Default)) { + DICompileUnit CU(cast<GlobalVariable>(SPI->getContext())); + unsigned idx = MF->getOrCreateDebugLocID(CU.getGV(), + SPI->getLine(), + SPI->getColumn()); + DL = DebugLoc::get(idx); + } + + break; + } + case Intrinsic::dbg_func_start: { + DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I); + Value *SP = FSI->getSubprogram(); + + if (DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::Default)) { + DISubprogram Subprogram(cast<GlobalVariable>(SP)); + DICompileUnit CU(Subprogram.getCompileUnit()); + unsigned Line = Subprogram.getLineNumber(); + DL = DebugLoc::get(MF->getOrCreateDebugLocID(CU.getGV(), + Line, 0)); + } + + break; + } + } + } + } + + PN = dyn_cast<PHINode>(I); + if (!PN || PN->use_empty()) continue; + + unsigned PHIReg = ValueMap[PN]; + assert(PHIReg && "PHI node does not have an assigned virtual register!"); + + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + MVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(VT); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + for (unsigned i = 0; i != NumRegisters; ++i) + BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i); + PHIReg += NumRegisters; + } + } + } +} + +unsigned FunctionLoweringInfo::MakeReg(MVT VT) { + return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); +} + +/// CreateRegForValue - Allocate the appropriate number of virtual registers of +/// the correctly promoted or expanded types. Assign these registers +/// consecutive vreg numbers and return the first assigned number. +/// +/// In the case that the given value has struct or array type, this function +/// will assign registers for each member or element. +/// +unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, V->getType(), ValueVTs); + + unsigned FirstReg = 0; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + MVT ValueVT = ValueVTs[Value]; + MVT RegisterVT = TLI.getRegisterType(ValueVT); + + unsigned NumRegs = TLI.getNumRegisters(ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) { + unsigned R = MakeReg(RegisterVT); + if (!FirstReg) FirstReg = R; + } + } + return FirstReg; +} + +/// getCopyFromParts - Create a value that contains the specified legal parts +/// combined into the value they represent. If the parts combine to a type +/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT +/// (ISD::AssertSext). +static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, + const SDValue *Parts, + unsigned NumParts, MVT PartVT, MVT ValueVT, + ISD::NodeType AssertOp = ISD::DELETED_NODE) { + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + if (NumParts > 1) { + // Assemble the value from multiple parts. + if (!ValueVT.isVector() && ValueVT.isInteger()) { + unsigned PartBits = PartVT.getSizeInBits(); + unsigned ValueBits = ValueVT.getSizeInBits(); + + // Assemble the power of 2 part. + unsigned RoundParts = NumParts & (NumParts - 1) ? + 1 << Log2_32(NumParts) : NumParts; + unsigned RoundBits = PartBits * RoundParts; + MVT RoundVT = RoundBits == ValueBits ? + ValueVT : MVT::getIntegerVT(RoundBits); + SDValue Lo, Hi; + + MVT HalfVT = MVT::getIntegerVT(RoundBits/2); + + if (RoundParts > 2) { + Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT); + Hi = getCopyFromParts(DAG, dl, Parts+RoundParts/2, RoundParts/2, + PartVT, HalfVT); + } else { + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]); + } + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi); + + if (RoundParts < NumParts) { + // Assemble the trailing non-power-of-2 part. + unsigned OddParts = NumParts - RoundParts; + MVT OddVT = MVT::getIntegerVT(OddParts * PartBits); + Hi = getCopyFromParts(DAG, dl, + Parts+RoundParts, OddParts, PartVT, OddVT); + + // Combine the round and odd parts. + Lo = Val; + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + MVT TotalVT = MVT::getIntegerVT(NumParts * PartBits); + Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi); + Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi, + DAG.getConstant(Lo.getValueType().getSizeInBits(), + TLI.getPointerTy())); + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo); + Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi); + } + } else if (ValueVT.isVector()) { + // Handle a multi-element vector. + MVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = + TLI.getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + assert(RegisterVT == Parts[0].getValueType() && + "Part type doesn't match part!"); + + // Assemble the parts into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + if (NumIntermediates == NumParts) { + // If the register was not expanded, truncate or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1, + PartVT, IntermediateVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, build the intermediate operands + // from the parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor, + PartVT, IntermediateVT); + } + + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate + // operands. + Val = DAG.getNode(IntermediateVT.isVector() ? + ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl, + ValueVT, &Ops[0], NumIntermediates); + } else if (PartVT.isFloatingPoint()) { + // FP split into multiple FP parts (for ppcf128) + assert(ValueVT == MVT(MVT::ppcf128) && PartVT == MVT(MVT::f64) && + "Unexpected split"); + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[1]); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi); + } else { + // FP split into integer parts (soft fp) + assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && + !PartVT.isVector() && "Unexpected split"); + MVT IntVT = MVT::getIntegerVT(ValueVT.getSizeInBits()); + Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT); + } + } + + // There is now one part, held in Val. Correct it to match ValueVT. + PartVT = Val.getValueType(); + + if (PartVT == ValueVT) + return Val; + + if (PartVT.isVector()) { + assert(ValueVT.isVector() && "Unknown vector conversion!"); + return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); + } + + if (ValueVT.isVector()) { + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial scalar-to-vector conversions should get here!"); + return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val); + } + + if (PartVT.isInteger() && + ValueVT.isInteger()) { + if (ValueVT.bitsLT(PartVT)) { + // For a truncate, see if we have any information to + // indicate whether the truncated bits will always be + // zero or sign-extension. + if (AssertOp != ISD::DELETED_NODE) + Val = DAG.getNode(AssertOp, dl, PartVT, Val, + DAG.getValueType(ValueVT)); + return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + } else { + return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val); + } + } + + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + if (ValueVT.bitsLT(Val.getValueType())) + // FP_ROUND's are always exact here. + return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val, + DAG.getIntPtrConstant(1)); + return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val); + } + + if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) + return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); + + assert(0 && "Unknown mismatch!"); + return SDValue(); +} + +/// getCopyToParts - Create a series of nodes that contain the specified value +/// split into legal parts. If the parts contain more bits than Val, then, for +/// integers, ExtendKind can be used to specify how to generate the extra bits. +static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, + SDValue *Parts, unsigned NumParts, MVT PartVT, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT PtrVT = TLI.getPointerTy(); + MVT ValueVT = Val.getValueType(); + unsigned PartBits = PartVT.getSizeInBits(); + unsigned OrigNumParts = NumParts; + assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); + + if (!NumParts) + return; + + if (!ValueVT.isVector()) { + if (PartVT == ValueVT) { + assert(NumParts == 1 && "No-op copy with multiple parts!"); + Parts[0] = Val; + return; + } + + if (NumParts * PartBits > ValueVT.getSizeInBits()) { + // If the parts cover more bits than the value has, promote the value. + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + assert(NumParts == 1 && "Do not know what to promote to!"); + Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val); + } else if (PartVT.isInteger() && ValueVT.isInteger()) { + ValueVT = MVT::getIntegerVT(NumParts * PartBits); + Val = DAG.getNode(ExtendKind, dl, ValueVT, Val); + } else { + assert(0 && "Unknown mismatch!"); + } + } else if (PartBits == ValueVT.getSizeInBits()) { + // Different types of the same size. + assert(NumParts == 1 && PartVT != ValueVT); + Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); + } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { + // If the parts cover less bits than value has, truncate the value. + if (PartVT.isInteger() && ValueVT.isInteger()) { + ValueVT = MVT::getIntegerVT(NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + } else { + assert(0 && "Unknown mismatch!"); + } + } + + // The value may have changed - recompute ValueVT. + ValueVT = Val.getValueType(); + assert(NumParts * PartBits == ValueVT.getSizeInBits() && + "Failed to tile the value with PartVT!"); + + if (NumParts == 1) { + assert(PartVT == ValueVT && "Type conversion failed!"); + Parts[0] = Val; + return; + } + + // Expand the value into multiple parts. + if (NumParts & (NumParts - 1)) { + // The number of parts is not a power of 2. Split off and copy the tail. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Do not know what to expand to!"); + unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundBits = RoundParts * PartBits; + unsigned OddParts = NumParts - RoundParts; + SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val, + DAG.getConstant(RoundBits, + TLI.getPointerTy())); + getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT); + if (TLI.isBigEndian()) + // The odd parts were reversed by getCopyToParts - unreverse them. + std::reverse(Parts + RoundParts, Parts + NumParts); + NumParts = RoundParts; + ValueVT = MVT::getIntegerVT(NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + } + + // The number of parts is a power of 2. Repeatedly bisect the value using + // EXTRACT_ELEMENT. + Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::getIntegerVT(ValueVT.getSizeInBits()), + Val); + for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { + for (unsigned i = 0; i < NumParts; i += StepSize) { + unsigned ThisBits = StepSize * PartBits / 2; + MVT ThisVT = MVT::getIntegerVT (ThisBits); + SDValue &Part0 = Parts[i]; + SDValue &Part1 = Parts[i+StepSize/2]; + + Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + ThisVT, Part0, + DAG.getConstant(1, PtrVT)); + Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + ThisVT, Part0, + DAG.getConstant(0, PtrVT)); + + if (ThisBits == PartBits && ThisVT != PartVT) { + Part0 = DAG.getNode(ISD::BIT_CONVERT, dl, + PartVT, Part0); + Part1 = DAG.getNode(ISD::BIT_CONVERT, dl, + PartVT, Part1); + } + } + } + + if (TLI.isBigEndian()) + std::reverse(Parts, Parts + OrigNumParts); + + return; + } + + // Vector ValueVT. + if (NumParts == 1) { + if (PartVT != ValueVT) { + if (PartVT.isVector()) { + Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); + } else { + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial vector-to-scalar conversions should get here!"); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + PartVT, Val, + DAG.getConstant(0, PtrVT)); + } + } + + Parts[0] = Val; + return; + } + + // Handle a multi-element vector. + MVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = TLI + .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + unsigned NumElements = ValueVT.getVectorNumElements(); + + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + + // Split the vector into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + for (unsigned i = 0; i != NumIntermediates; ++i) + if (IntermediateVT.isVector()) + Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + IntermediateVT, Val, + DAG.getConstant(i * (NumElements / NumIntermediates), + PtrVT)); + else + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + IntermediateVT, Val, + DAG.getConstant(i, PtrVT)); + + // Split the intermediate operands into legal parts. + if (NumParts == NumIntermediates) { + // If the register was not expanded, promote or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, split each the value into + // legal parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + getCopyToParts(DAG, dl, Ops[i], &Parts[i * Factor], Factor, PartVT); + } +} + + +void SelectionDAGLowering::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { + AA = &aa; + GFI = gfi; + TD = DAG.getTarget().getTargetData(); +} + +/// clear - Clear out the curret SelectionDAG and the associated +/// state and prepare this SelectionDAGLowering object to be used +/// for a new block. This doesn't clear out information about +/// additional blocks that are needed to complete switch lowering +/// or PHI node updating; that information is cleared out as it is +/// consumed. +void SelectionDAGLowering::clear() { + NodeMap.clear(); + PendingLoads.clear(); + PendingExports.clear(); + DAG.clear(); + CurDebugLoc = DebugLoc::getUnknownLoc(); +} + +/// getRoot - Return the current virtual root of the Selection DAG, +/// flushing any PendingLoad items. This must be done before emitting +/// a store or any other node that may need to be ordered after any +/// prior load instructions. +/// +SDValue SelectionDAGLowering::getRoot() { + if (PendingLoads.empty()) + return DAG.getRoot(); + + if (PendingLoads.size() == 1) { + SDValue Root = PendingLoads[0]; + DAG.setRoot(Root); + PendingLoads.clear(); + return Root; + } + + // Otherwise, we have to make a token factor node. + SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingLoads[0], PendingLoads.size()); + PendingLoads.clear(); + DAG.setRoot(Root); + return Root; +} + +/// getControlRoot - Similar to getRoot, but instead of flushing all the +/// PendingLoad items, flush all the PendingExports items. It is necessary +/// to do this before emitting a terminator instruction. +/// +SDValue SelectionDAGLowering::getControlRoot() { + SDValue Root = DAG.getRoot(); + + if (PendingExports.empty()) + return Root; + + // Turn all of the CopyToReg chains into one factored node. + if (Root.getOpcode() != ISD::EntryToken) { + unsigned i = 0, e = PendingExports.size(); + for (; i != e; ++i) { + assert(PendingExports[i].getNode()->getNumOperands() > 1); + if (PendingExports[i].getNode()->getOperand(0) == Root) + break; // Don't add the root if we already indirectly depend on it. + } + + if (i == e) + PendingExports.push_back(Root); + } + + Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingExports[0], + PendingExports.size()); + PendingExports.clear(); + DAG.setRoot(Root); + return Root; +} + +void SelectionDAGLowering::visit(Instruction &I) { + visit(I.getOpcode(), I); +} + +void SelectionDAGLowering::visit(unsigned Opcode, User &I) { + // Note: this doesn't use InstVisitor, because it has to work with + // ConstantExpr's in addition to instructions. + switch (Opcode) { + default: assert(0 && "Unknown instruction type encountered!"); + abort(); + // Build the switch statement using the Instruction.def file. +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE:return visit##OPCODE((CLASS&)I); +#include "llvm/Instruction.def" + } +} + +void SelectionDAGLowering::visitAdd(User &I) { + if (I.getType()->isFPOrFPVector()) + visitBinary(I, ISD::FADD); + else + visitBinary(I, ISD::ADD); +} + +void SelectionDAGLowering::visitMul(User &I) { + if (I.getType()->isFPOrFPVector()) + visitBinary(I, ISD::FMUL); + else + visitBinary(I, ISD::MUL); +} + +SDValue SelectionDAGLowering::getValue(const Value *V) { + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) { + MVT VT = TLI.getValueType(V->getType(), true); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) + return N = DAG.getConstant(*CI, VT); + + if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return N = DAG.getGlobalAddress(GV, VT); + + if (isa<ConstantPointerNull>(C)) + return N = DAG.getConstant(0, TLI.getPointerTy()); + + if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) + return N = DAG.getConstantFP(*CFP, VT); + + if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) + return N = DAG.getUNDEF(VT); + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + visit(CE->getOpcode(), *CE); + SDValue N1 = NodeMap[V]; + assert(N1.getNode() && "visit didn't populate the ValueMap!"); + return N1; + } + + if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) { + SmallVector<SDValue, 4> Constants; + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) { + SDNode *Val = getValue(*OI).getNode(); + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Constants.push_back(SDValue(Val, i)); + } + return DAG.getMergeValues(&Constants[0], Constants.size(), + getCurDebugLoc()); + } + + if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) { + assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && + "Unknown struct or array constant!"); + + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, C->getType(), ValueVTs); + unsigned NumElts = ValueVTs.size(); + if (NumElts == 0) + return SDValue(); // empty struct + SmallVector<SDValue, 4> Constants(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + MVT EltVT = ValueVTs[i]; + if (isa<UndefValue>(C)) + Constants[i] = DAG.getUNDEF(EltVT); + else if (EltVT.isFloatingPoint()) + Constants[i] = DAG.getConstantFP(0, EltVT); + else + Constants[i] = DAG.getConstant(0, EltVT); + } + return DAG.getMergeValues(&Constants[0], NumElts, getCurDebugLoc()); + } + + const VectorType *VecTy = cast<VectorType>(V->getType()); + unsigned NumElements = VecTy->getNumElements(); + + // Now that we know the number and type of the elements, get that number of + // elements into the Ops array based on what kind of constant it is. + SmallVector<SDValue, 16> Ops; + if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) { + for (unsigned i = 0; i != NumElements; ++i) + Ops.push_back(getValue(CP->getOperand(i))); + } else { + assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); + MVT EltVT = TLI.getValueType(VecTy->getElementType()); + + SDValue Op; + if (EltVT.isFloatingPoint()) + Op = DAG.getConstantFP(0, EltVT); + else + Op = DAG.getConstant(0, EltVT); + Ops.assign(NumElements, Op); + } + + // Create a BUILD_VECTOR node. + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); + } + + // If this is a static alloca, generate it as the frameindex instead of + // computation. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + } + + unsigned InReg = FuncInfo.ValueMap[V]; + assert(InReg && "Value not in map!"); + + RegsForValue RFV(TLI, InReg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); +} + + +void SelectionDAGLowering::visitRet(ReturnInst &I) { + if (I.getNumOperands() == 0) { + DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), + MVT::Other, getControlRoot())); + return; + } + + SmallVector<SDValue, 8> NewValues; + NewValues.push_back(getControlRoot()); + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) continue; + + SDValue RetOp = getValue(I.getOperand(i)); + for (unsigned j = 0, f = NumValues; j != f; ++j) { + MVT VT = ValueVTs[j]; + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + const Function *F = I.getParent()->getParent(); + if (F->paramHasAttr(0, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->paramHasAttr(0, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + MVT MinVT = TLI.getRegisterType(MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(VT); + MVT PartVT = TLI.getRegisterType(VT); + SmallVector<SDValue, 4> Parts(NumParts); + getCopyToParts(DAG, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + j), + &Parts[0], NumParts, PartVT, ExtendKind); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (F->paramHasAttr(0, Attribute::InReg)) + Flags.setInReg(); + for (unsigned i = 0; i < NumParts; ++i) { + NewValues.push_back(Parts[i]); + NewValues.push_back(DAG.getArgFlags(Flags)); + } + } + } + DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), MVT::Other, + &NewValues[0], NewValues.size())); +} + +/// CopyToExportRegsIfNeeded - If the given value has virtual registers +/// created for it, emit nodes to copy the value into the virtual +/// registers. +void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) { + if (!V->use_empty()) { + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) + CopyValueToVirtualRegister(V, VMI->second); + } +} + +/// ExportFromCurrentBlock - If this condition isn't known to be exported from +/// the current basic block, add it to ValueMap now so that we'll get a +/// CopyTo/FromReg. +void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) { + // No need to export constants. + if (!isa<Instruction>(V) && !isa<Argument>(V)) return; + + // Already exported? + if (FuncInfo.isExportedInst(V)) return; + + unsigned Reg = FuncInfo.InitializeRegForValue(V); + CopyValueToVirtualRegister(V, Reg); +} + +bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V, + const BasicBlock *FromBB) { + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. + if (Instruction *VI = dyn_cast<Instruction>(V)) { + // Can export from current BB. + if (VI->getParent() == FromBB) + return true; + + // Is already exported, noop. + return FuncInfo.isExportedInst(V); + } + + // If this is an argument, we can export it if the BB is the entry block or + // if it is already exported. + if (isa<Argument>(V)) { + if (FromBB == &FromBB->getParent()->getEntryBlock()) + return true; + + // Otherwise, can only export this if it is already exported. + return FuncInfo.isExportedInst(V); + } + + // Otherwise, constants can always be exported. + return true; +} + +static bool InBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == BB; + return true; +} + +/// getFCmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR floating-point condition code. This includes +/// consideration of global floating-point math flags. +/// +static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) { + ISD::CondCode FPC, FOC; + switch (Pred) { + case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; + case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; + case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; + case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; + case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; + case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; + case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; + case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; + case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; + case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; + case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; + case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; + case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; + case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; + case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; + case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; + default: + assert(0 && "Invalid FCmp predicate opcode!"); + FOC = FPC = ISD::SETFALSE; + break; + } + if (FiniteOnlyFPMath()) + return FOC; + else + return FPC; +} + +/// getICmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR integer condition code. +/// +static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) { + switch (Pred) { + case ICmpInst::ICMP_EQ: return ISD::SETEQ; + case ICmpInst::ICMP_NE: return ISD::SETNE; + case ICmpInst::ICMP_SLE: return ISD::SETLE; + case ICmpInst::ICMP_ULE: return ISD::SETULE; + case ICmpInst::ICMP_SGE: return ISD::SETGE; + case ICmpInst::ICMP_UGE: return ISD::SETUGE; + case ICmpInst::ICMP_SLT: return ISD::SETLT; + case ICmpInst::ICMP_ULT: return ISD::SETULT; + case ICmpInst::ICMP_SGT: return ISD::SETGT; + case ICmpInst::ICMP_UGT: return ISD::SETUGT; + default: + assert(0 && "Invalid ICmp predicate opcode!"); + return ISD::SETNE; + } +} + +/// EmitBranchForMergedCondition - Helper method for FindMergedConditions. +/// This function emits a branch and is used at the leaves of an OR or an +/// AND operator tree. +/// +void +SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB) { + const BasicBlock *BB = CurBB->getBasicBlock(); + + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { + // The operands of the cmp have to be in this block. We don't know + // how to export them from some other block. If this is the first block + // of the sequence, no exporting is needed. + if (CurBB == CurMBB || + (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && + isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { + ISD::CondCode Condition; + if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + Condition = getICmpCondCode(IC->getPredicate()); + } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { + Condition = getFCmpCondCode(FC->getPredicate()); + } else { + Condition = ISD::SETEQ; // silence warning. + assert(0 && "Unknown compare instruction"); + } + + CaseBlock CB(Condition, BOp->getOperand(0), + BOp->getOperand(1), NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); + return; + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(), + NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); +} + +/// FindMergedConditions - If Cond is an expression like +void SelectionDAGLowering::FindMergedConditions(Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + unsigned Opc) { + // If this node is not part of the or/and tree, emit it as a branch. + Instruction *BOp = dyn_cast<Instruction>(Cond); + if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || + (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI = CurBB; + MachineFunction &MF = DAG.getMachineFunction(); + MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + } +} + +/// If the set of cases should be emitted as a series of branches, return true. +/// If we should emit this as a bunch of and/or'd together conditions, return +/// false. +bool +SelectionDAGLowering::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ + if (Cases.size() != 2) return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + return true; +} + +void SelectionDAGLowering::visitBr(BranchInst &I) { + // Update machine-CFG edges. + MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + if (I.isUnconditional()) { + // Update machine-CFG edges. + CurMBB->addSuccessor(Succ0MBB); + + // If this is not a fall-through branch, emit the branch. + if (Succ0MBB != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Succ0MBB))); + return; + } + + // If this condition is one of the special cases we handle, do special stuff + // now. + Value *CondVal = I.getCondition(); + MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; + + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + // + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { + if (BOp->hasOneUse() && + (BOp->getOpcode() == Instruction::And || + BOp->getOpcode() == Instruction::Or)) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode()); + // If the compares in later blocks need to use values not currently + // exported from this block, export them now. This block should always + // be the first entry. + assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (ShouldEmitAsBranches(SwitchCases)) { + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SwitchCases[i].CmpRHS); + } + + // Emit the branch for this block. + visitSwitchCase(SwitchCases[0]); + SwitchCases.erase(SwitchCases.begin()); + return; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) + CurMBB->getParent()->erase(SwitchCases[i].ThisBB); + + SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(), + NULL, Succ0MBB, Succ1MBB, CurMBB); + // Use visitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + visitSwitchCase(CB); +} + +/// visitSwitchCase - Emits the necessary code to represent a single node in +/// the binary search tree resulting from lowering a switch instruction. +void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { + SDValue Cond; + SDValue CondLHS = getValue(CB.CmpLHS); + DebugLoc dl = getCurDebugLoc(); + + // Build the setcc now. + if (CB.CmpMHS == NULL) { + // Fold "(X == true)" to X and "(X == false)" to !X to + // handle common cases produced by branch lowering. + if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ) + Cond = CondLHS; + else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) { + SDValue True = DAG.getConstant(1, CondLHS.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); + } else + Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); + } else { + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); + + const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + + SDValue CmpOp = getValue(CB.CmpMHS); + MVT VT = CmpOp.getValueType(); + + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), + ISD::SETLE); + } else { + SDValue SUB = DAG.getNode(ISD::SUB, dl, + VT, CmpOp, DAG.getConstant(Low, VT)); + Cond = DAG.getSetCC(dl, MVT::i1, SUB, + DAG.getConstant(High-Low, VT), ISD::SETULE); + } + } + + // Update successor info + CurMBB->addSuccessor(CB.TrueBB); + CurMBB->addSuccessor(CB.FalseBB); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + // If the lhs block is the next block, invert the condition so that we can + // fall through to the lhs instead of the rhs block. + if (CB.TrueBB == NextBlock) { + std::swap(CB.TrueBB, CB.FalseBB); + SDValue True = DAG.getConstant(1, Cond.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); + } + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(CB.TrueBB)); + + // If the branch was constant folded, fix up the CFG. + if (BrCond.getOpcode() == ISD::BR) { + CurMBB->removeSuccessor(CB.FalseBB); + DAG.setRoot(BrCond); + } else { + // Otherwise, go ahead and insert the false branch. + if (BrCond == getControlRoot()) + CurMBB->removeSuccessor(CB.TrueBB); + + if (CB.FalseBB == NextBlock) + DAG.setRoot(BrCond); + else + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB))); + } +} + +/// visitJumpTable - Emit JumpTable node in the current MBB +void SelectionDAGLowering::visitJumpTable(JumpTable &JT) { + // Emit the code for the jump table + assert(JT.Reg != -1U && "Should lower JT Header first!"); + MVT PTy = TLI.getPointerTy(); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + JT.Reg, PTy); + SDValue Table = DAG.getJumpTable(JT.JTI, PTy); + DAG.setRoot(DAG.getNode(ISD::BR_JT, getCurDebugLoc(), + MVT::Other, Index.getValue(1), + Table, Index)); +} + +/// visitJumpTableHeader - This function emits necessary code to produce index +/// in the JumpTable from switch case. +void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, + JumpTableHeader &JTH) { + // Subtract the lowest switch case value from the value being switched on and + // conditional branch to default mbb if the result is greater than the + // difference between smallest and largest cases. + SDValue SwitchOp = getValue(JTH.SValue); + MVT VT = SwitchOp.getValueType(); + SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(JTH.First, VT)); + + // The SDNode we just created, which holds the value being switched on minus + // the the smallest case value, needs to be copied to a virtual register so it + // can be used as an index into the jump table in a subsequent basic block. + // This value may be smaller or larger than the target's pointer type, and + // therefore require extension or truncating. + if (VT.bitsGT(TLI.getPointerTy())) + SwitchOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + else + SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + + unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + JumpTableReg, SwitchOp); + JT.Reg = JumpTableReg; + + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the largest + // case in the switch. + SDValue CMP = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(SUB.getValueType()), SUB, + DAG.getConstant(JTH.Last-JTH.First,VT), + ISD::SETUGT); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, CMP, + DAG.getBasicBlock(JT.Default)); + + if (JT.MBB == NextBlock) + DAG.setRoot(BrCond); + else + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, + DAG.getBasicBlock(JT.MBB))); +} + +/// visitBitTestHeader - This function emits necessary code to produce value +/// suitable for "bit tests" +void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { + // Subtract the minimum value + SDValue SwitchOp = getValue(B.SValue); + MVT VT = SwitchOp.getValueType(); + SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(B.First, VT)); + + // Check range + SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(SUB.getValueType()), + SUB, DAG.getConstant(B.Range, VT), + ISD::SETUGT); + + SDValue ShiftOp; + if (VT.bitsGT(TLI.getPointerTy())) + ShiftOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + else + ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + + B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + B.Reg, ShiftOp); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + MachineBasicBlock* MBB = B.Cases[0].ThisBB; + + CurMBB->addSuccessor(B.Default); + CurMBB->addSuccessor(MBB); + + SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, RangeCmp, + DAG.getBasicBlock(B.Default)); + + if (MBB == NextBlock) + DAG.setRoot(BrRange); + else + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, + DAG.getBasicBlock(MBB))); +} + +/// visitBitTestCase - this function produces one "bit test" +void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + BitTestCase &B) { + // Make desired shift + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, + TLI.getPointerTy()); + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(1, TLI.getPointerTy()), + ShiftOp); + + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + TLI.getPointerTy(), SwitchVal, + DAG.getConstant(B.Mask, TLI.getPointerTy())); + SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(AndOp.getValueType()), + AndOp, DAG.getConstant(0, TLI.getPointerTy()), + ISD::SETNE); + + CurMBB->addSuccessor(B.TargetBB); + CurMBB->addSuccessor(NextMBB); + + SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, getControlRoot(), + AndCmp, DAG.getBasicBlock(B.TargetBB)); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + if (NextMBB == NextBlock) + DAG.setRoot(BrAnd); + else + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, + DAG.getBasicBlock(NextMBB))); +} + +void SelectionDAGLowering::visitInvoke(InvokeInst &I) { + // Retrieve successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; + MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + + const Value *Callee(I.getCalledValue()); + if (isa<InlineAsm>(Callee)) + visitInlineAsm(&I); + else + LowerCallTo(&I, getValue(Callee), false, LandingPad); + + // If the value of the invoke is used outside of its defining block, make it + // available as a virtual register. + CopyToExportRegsIfNeeded(&I); + + // Update successor info + CurMBB->addSuccessor(Return); + CurMBB->addSuccessor(LandingPad); + + // Drop into normal successor. + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); +} + +void SelectionDAGLowering::visitUnwind(UnwindInst &I) { +} + +/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for +/// small case ranges). +bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + Case& BackCase = *(CR.Range.second-1); + + // Size is the number of Cases represented by this range. + size_t Size = CR.Range.second - CR.Range.first; + if (Size > 3) + return false; + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + // TODO: If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + + // Rearrange the case blocks so that the last one falls through if possible. + if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { + // The last case block won't fall through into 'NextBlock' if we emit the + // branches in this order. See if rearranging a case value would help. + for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) { + if (I->BB == NextBlock) { + std::swap(*I, BackCase); + break; + } + } + } + + // Create a CaseBlock record representing a conditional branch to + // the Case's target mbb if the value being switched on SV is equal + // to C. + MachineBasicBlock *CurBlock = CR.CaseBB; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + MachineBasicBlock *FallThrough; + if (I != E-1) { + FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); + CurMF->insert(BBI, FallThrough); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } else { + // If the last case doesn't match, go to the default block. + FallThrough = Default; + } + + Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->High == I->Low) { + // This is just small small case range :) containing exactly 1 case + CC = ISD::SETEQ; + LHS = SV; RHS = I->High; MHS = NULL; + } else { + CC = ISD::SETLE; + LHS = I->Low; MHS = SV; RHS = I->High; + } + CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock); + + // If emitting the first comparison, just call visitSwitchCase to emit the + // code into the current block. Otherwise, push the CaseBlock onto the + // vector to be later processed by SDISel, and insert the node's MBB + // before the next MBB. + if (CurBlock == CurMBB) + visitSwitchCase(CB); + else + SwitchCases.push_back(CB); + + CurBlock = FallThrough; + } + + return true; +} + +static inline bool areJTsAllowed(const TargetLowering &TLI) { + return !DisableJumpTables && + (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + +static APInt ComputeRange(const APInt &First, const APInt &Last) { + APInt LastExt(Last), FirstExt(First); + uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; + LastExt.sext(BitWidth); FirstExt.sext(BitWidth); + return (LastExt - FirstExt + 1ULL); +} + +/// handleJTSwitchCase - Emit jumptable for current switch case range +bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue(); + + size_t TSize = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); + + if (!areJTsAllowed(TLI) || TSize <= 3) + return false; + + APInt Range = ComputeRange(First, Last); + double Density = (double)TSize / Range.roundToDouble(); + if (Density < 0.4) + return false; + + DEBUG(errs() << "Lowering jump table\n" + << "First entry: " << First << ". Last entry: " << Last << '\n' + << "Range: " << Range + << "Size: " << TSize << ". Density: " << Density << "\n\n"); + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Create a new basic block to hold the code for loading the address + // of the jump table, and jumping to it. Update successor information; + // we will either branch to the default case for the switch, or the jump + // table. + MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, JumpTableBB); + CR.CaseBB->addSuccessor(Default); + CR.CaseBB->addSuccessor(JumpTableBB); + + // Build a vector of destination BBs, corresponding to each target + // of the jump table. If the value of the jump table slot corresponds to + // a case statement, push the case's BB onto the vector, otherwise, push + // the default BB. + std::vector<MachineBasicBlock*> DestBBs; + APInt TEI = First; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { + const APInt& Low = cast<ConstantInt>(I->Low)->getValue(); + const APInt& High = cast<ConstantInt>(I->High)->getValue(); + + if (Low.sle(TEI) && TEI.sle(High)) { + DestBBs.push_back(I->BB); + if (TEI==High) + ++I; + } else { + DestBBs.push_back(Default); + } + } + + // Update successor info. Add one edge to each unique successor. + BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); + for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), + E = DestBBs.end(); I != E; ++I) { + if (!SuccsHandled[(*I)->getNumber()]) { + SuccsHandled[(*I)->getNumber()] = true; + JumpTableBB->addSuccessor(*I); + } + } + + // Create a jump table index for this jump table, or return an existing + // one. + unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs); + + // Set the jump table information so that we can codegen it as a second + // MachineBasicBlock + JumpTable JT(-1U, JTI, JumpTableBB, Default); + JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB)); + if (CR.CaseBB == CurMBB) + visitJumpTableHeader(JT, JTH); + + JTCases.push_back(JumpTableBlock(JTH, JT)); + + return true; +} + +/// handleBTSplitSwitchCase - emit comparison and split binary search tree into +/// 2 subtrees. +bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Size is the number of Cases represented by this range. + unsigned Size = CR.Range.second - CR.Range.first; + + const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue(); + double FMetric = 0; + CaseItr Pivot = CR.Range.first + Size/2; + + // Select optimal pivot, maximizing sum density of LHS and RHS. This will + // (heuristically) allow us to emit JumpTable's later. + size_t TSize = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); + + size_t LSize = FrontCase.size(); + size_t RSize = TSize-LSize; + DEBUG(errs() << "Selecting best pivot: \n" + << "First: " << First << ", Last: " << Last <<'\n' + << "LSize: " << LSize << ", RSize: " << RSize << '\n'); + for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; + J!=E; ++I, ++J) { + const APInt& LEnd = cast<ConstantInt>(I->High)->getValue(); + const APInt& RBegin = cast<ConstantInt>(J->Low)->getValue(); + APInt Range = ComputeRange(LEnd, RBegin); + assert((Range - 2ULL).isNonNegative() && + "Invalid case distance"); + double LDensity = (double)LSize / (LEnd - First + 1ULL).roundToDouble(); + double RDensity = (double)RSize / (Last - RBegin + 1ULL).roundToDouble(); + double Metric = Range.logBase2()*(LDensity+RDensity); + // Should always split in some non-trivial place + DEBUG(errs() <<"=>Step\n" + << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' + << "LDensity: " << LDensity + << ", RDensity: " << RDensity << '\n' + << "Metric: " << Metric << '\n'); + if (FMetric < Metric) { + Pivot = J; + FMetric = Metric; + DEBUG(errs() << "Current metric set to: " << FMetric << '\n'); + } + + LSize += J->size(); + RSize -= J->size(); + } + if (areJTsAllowed(TLI)) { + // If our case is dense we *really* should handle it earlier! + assert((FMetric > 0) && "Should handle dense range earlier!"); + } else { + Pivot = CR.Range.first + Size/2; + } + + CaseRange LHSR(CR.Range.first, Pivot); + CaseRange RHSR(Pivot, CR.Range.second); + Constant *C = Pivot->Low; + MachineBasicBlock *FalseBB = 0, *TrueBB = 0; + + // We know that we branch to the LHS if the Value being switched on is + // less than the Pivot value, C. We use this to optimize our binary + // tree a bit, by recognizing that if SV is greater than or equal to the + // LHS's Case Value, and that Case Value is exactly one less than the + // Pivot's Value, then we can branch directly to the LHS's Target, + // rather than creating a leaf node for it. + if ((LHSR.second - LHSR.first) == 1 && + LHSR.first->High == CR.GE && + cast<ConstantInt>(C)->getValue() == + (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { + TrueBB = LHSR.first->BB; + } else { + TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, TrueBB); + WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + // Similar to the optimization above, if the Value being switched on is + // known to be less than the Constant CR.LT, and the current Case Value + // is CR.LT - 1, then we can branch directly to the target block for + // the current Case Value, rather than emitting a RHS leaf node for it. + if ((RHSR.second - RHSR.first) == 1 && CR.LT && + cast<ConstantInt>(RHSR.first->Low)->getValue() == + (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { + FalseBB = RHSR.first->BB; + } else { + FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, FalseBB); + WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + // Create a CaseBlock record representing a conditional branch to + // the LHS node if the value being switched on SV is less than C. + // Otherwise, branch to LHS. + CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + + if (CR.CaseBB == CurMBB) + visitSwitchCase(CB); + else + SwitchCases.push_back(CB); + + return true; +} + +/// handleBitTestsSwitchCase - if current case range has few destination and +/// range span less, than machine word bitwidth, encode case range into series +/// of masks and emit bit tests with these masks. +bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default){ + unsigned IntPtrBits = TLI.getPointerTy().getSizeInBits(); + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // If target does not have legal shift left, do not emit bit tests at all. + if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) + return false; + + size_t numCmps = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) { + // Single case counts one, case range - two. + numCmps += (I->Low == I->High ? 1 : 2); + } + + // Count unique destinations + SmallSet<MachineBasicBlock*, 4> Dests; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + Dests.insert(I->BB); + if (Dests.size() > 3) + // Don't bother the code below, if there are too much unique destinations + return false; + } + DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n' + << "Total number of comparisons: " << numCmps << '\n'); + + // Compute span of values. + const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); + APInt cmpRange = maxValue - minValue; + + DEBUG(errs() << "Compare range: " << cmpRange << '\n' + << "Low bound: " << minValue << '\n' + << "High bound: " << maxValue << '\n'); + + if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) || + (!(Dests.size() == 1 && numCmps >= 3) && + !(Dests.size() == 2 && numCmps >= 5) && + !(Dests.size() >= 3 && numCmps >= 6))) + return false; + + DEBUG(errs() << "Emitting bit tests\n"); + APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); + + // Optimize the case where all the case values fit in a + // word without having to subtract minValue. In this case, + // we can optimize away the subtraction. + if (minValue.isNonNegative() && + maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) { + cmpRange = maxValue; + } else { + lowBound = minValue; + } + + CaseBitsVector CasesBits; + unsigned i, count = 0; + + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + MachineBasicBlock* Dest = I->BB; + for (i = 0; i < count; ++i) + if (Dest == CasesBits[i].BB) + break; + + if (i == count) { + assert((count < 3) && "Too much destinations to test!"); + CasesBits.push_back(CaseBits(0, Dest, 0)); + count++; + } + + const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); + const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); + + uint64_t lo = (lowValue - lowBound).getZExtValue(); + uint64_t hi = (highValue - lowBound).getZExtValue(); + + for (uint64_t j = lo; j <= hi; j++) { + CasesBits[i].Mask |= 1ULL << j; + CasesBits[i].Bits++; + } + + } + std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); + + BitTestInfo BTC; + + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + DEBUG(errs() << "Cases:\n"); + for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { + DEBUG(errs() << "Mask: " << CasesBits[i].Mask + << ", Bits: " << CasesBits[i].Bits + << ", BB: " << CasesBits[i].BB << '\n'); + + MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, CaseBB); + BTC.push_back(BitTestCase(CasesBits[i].Mask, + CaseBB, + CasesBits[i].BB)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + BitTestBlock BTB(lowBound, cmpRange, SV, + -1U, (CR.CaseBB == CurMBB), + CR.CaseBB, Default, BTC); + + if (CR.CaseBB == CurMBB) + visitBitTestHeader(BTB); + + BitTestCases.push_back(BTB); + + return true; +} + + +/// Clusterify - Transform simple list of Cases into list of CaseRange's +size_t SelectionDAGLowering::Clusterify(CaseVector& Cases, + const SwitchInst& SI) { + size_t numCmps = 0; + + // Start with "simple" cases + for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { + MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)]; + Cases.push_back(Case(SI.getSuccessorValue(i), + SI.getSuccessorValue(i), + SMBB)); + } + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size() >= 2) + // Must recompute end() each iteration because it may be + // invalidated by erase if we hold on to it + for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) { + const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); + const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); + MachineBasicBlock* nextBB = J->BB; + MachineBasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { + I->High = J->High; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; + } + + return numCmps; +} + +void SelectionDAGLowering::visitSwitch(SwitchInst &SI) { + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + + MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; + + // If there is only the default destination, branch to it if it is not the + // next basic block. Otherwise, just fall through. + if (SI.getNumOperands() == 2) { + // Update machine-CFG edges. + + // If this is not a fall-through branch, emit the branch. + CurMBB->addSuccessor(Default); + if (Default != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Default))); + return; + } + + // If there are any non-default case statements, create a vector of Cases + // representing each one, and sort the vector so that we can efficiently + // create a binary search tree from them. + CaseVector Cases; + size_t numCmps = Clusterify(Cases, SI); + DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << '\n'); + numCmps = 0; + + // Get the Value to be switched on and default basic blocks, which will be + // inserted into CaseBlock records, representing basic blocks in the binary + // search tree. + Value *SV = SI.getOperand(0); + + // Push the initial CaseRec onto the worklist + CaseRecVector WorkList; + WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end()))); + + while (!WorkList.empty()) { + // Grab a record representing a case range to process off the worklist + CaseRec CR = WorkList.back(); + WorkList.pop_back(); + + if (handleBitTestsSwitchCase(CR, WorkList, SV, Default)) + continue; + + // If the range has few cases (two or less) emit a series of specific + // tests. + if (handleSmallSwitchRange(CR, WorkList, SV, Default)) + continue; + + // If the switch has more than 5 blocks, and at least 40% dense, and the + // target supports indirect branches, then emit a jump table rather than + // lowering the switch to a binary tree of conditional branches. + if (handleJTSwitchCase(CR, WorkList, SV, Default)) + continue; + + // Emit binary tree. We need to pick a pivot, and push left and right ranges + // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. + handleBTSplitSwitchCase(CR, WorkList, SV, Default); + } +} + + +void SelectionDAGLowering::visitSub(User &I) { + // -0.0 - X --> fneg + const Type *Ty = I.getType(); + if (isa<VectorType>(Ty)) { + if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) { + const VectorType *DestTy = cast<VectorType>(I.getType()); + const Type *ElTy = DestTy->getElementType(); + if (ElTy->isFloatingPoint()) { + unsigned VL = DestTy->getNumElements(); + std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy)); + Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); + if (CV == CNZ) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } + } + } + } + if (Ty->isFloatingPoint()) { + if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) + if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } + } + + visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB); +} + +void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + if (!isa<VectorType>(I.getType()) && + Op2.getValueType() != TLI.getShiftAmountTy()) { + // If the operand is smaller than the shift count type, promote it. + if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // If the operand is larger than the shift count type but the shift + // count type has enough bits to represent any shift value, truncate + // it now. This is a common case and it exposes the truncate to + // optimization early. + else if (TLI.getShiftAmountTy().getSizeInBits() >= + Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // Otherwise we'll need to temporarily settle for some other + // convenient type; type legalization will make adjustments as + // needed. + else if (TLI.getPointerTy().bitsLT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), Op2); + else if (TLI.getPointerTy().bitsGT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), Op2); + } + + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGLowering::visitICmp(User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (ICmpInst *IC = dyn_cast<ICmpInst>(&I)) + predicate = IC->getPredicate(); + else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode = getICmpCondCode(predicate); + setValue(&I, DAG.getSetCC(getCurDebugLoc(),MVT::i1, Op1, Op2, Opcode)); +} + +void SelectionDAGLowering::visitFCmp(User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (FCmpInst *FC = dyn_cast<FCmpInst>(&I)) + predicate = FC->getPredicate(); + else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Condition = getFCmpCondCode(predicate); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), MVT::i1, Op1, Op2, Condition)); +} + +void SelectionDAGLowering::visitVICmp(User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (VICmpInst *IC = dyn_cast<VICmpInst>(&I)) + predicate = IC->getPredicate(); + else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode = getICmpCondCode(predicate); + setValue(&I, DAG.getVSetCC(getCurDebugLoc(), Op1.getValueType(), + Op1, Op2, Opcode)); +} + +void SelectionDAGLowering::visitVFCmp(User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (VFCmpInst *FC = dyn_cast<VFCmpInst>(&I)) + predicate = FC->getPredicate(); + else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Condition = getFCmpCondCode(predicate); + MVT DestVT = TLI.getValueType(I.getType()); + + setValue(&I, DAG.getVSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); +} + +void SelectionDAGLowering::visitSelect(User &I) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues != 0) { + SmallVector<SDValue, 4> Values(NumValues); + SDValue Cond = getValue(I.getOperand(0)); + SDValue TrueVal = getValue(I.getOperand(1)); + SDValue FalseVal = getValue(I.getOperand(2)); + + for (unsigned i = 0; i != NumValues; ++i) + Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), + TrueVal.getValueType(), Cond, + SDValue(TrueVal.getNode(), TrueVal.getResNo() + i), + SDValue(FalseVal.getNode(), FalseVal.getResNo() + i)); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); + } +} + + +void SelectionDAGLowering::visitTrunc(User &I) { + // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitZExt(User &I) { + // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // ZExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitSExt(User &I) { + // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // SExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitFPTrunc(User &I) { + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), + DestVT, N, DAG.getIntPtrConstant(0))); +} + +void SelectionDAGLowering::visitFPExt(User &I){ + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitFPToUI(User &I) { + // FPToUI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitFPToSI(User &I) { + // FPToSI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitUIToFP(User &I) { + // UIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitSIToFP(User &I){ + // SIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitPtrToInt(User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + MVT SrcVT = N.getValueType(); + MVT DestVT = TLI.getValueType(I.getType()); + SDValue Result; + if (DestVT.bitsLT(SrcVT)) + Result = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N); + else + // Note: ZERO_EXTEND can handle cases where the sizes are equal too + Result = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N); + setValue(&I, Result); +} + +void SelectionDAGLowering::visitIntToPtr(User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + MVT SrcVT = N.getValueType(); + MVT DestVT = TLI.getValueType(I.getType()); + if (DestVT.bitsLT(SrcVT)) + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); + else + // Note: ZERO_EXTEND can handle cases where the sizes are equal too + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + DestVT, N)); +} + +void SelectionDAGLowering::visitBitCast(User &I) { + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + + // BitCast assures us that source and destination are the same size so this + // is either a BIT_CONVERT or a no-op. + if (DestVT != N.getValueType()) + setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + DestVT, N)); // convert types + else + setValue(&I, N); // noop cast. +} + +void SelectionDAGLowering::visitInsertElement(User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InVal = getValue(I.getOperand(1)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(2))); + + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), + InVec, InVal, InIdx)); +} + +void SelectionDAGLowering::visitExtractElement(User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(1))); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), InVec, InIdx)); +} + + +// Utility for visitShuffleVector - Returns true if the mask is mask starting +// from SIndx and increasing to the element length (undefs are allowed). +static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) { + unsigned MaskNumElts = Mask.size(); + for (unsigned i = 0; i != MaskNumElts; ++i) + if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx))) + return false; + return true; +} + +void SelectionDAGLowering::visitShuffleVector(User &I) { + SmallVector<int, 8> Mask; + SDValue Src1 = getValue(I.getOperand(0)); + SDValue Src2 = getValue(I.getOperand(1)); + + // Convert the ConstantVector mask operand into an array of ints, with -1 + // representing undef values. + SmallVector<Constant*, 8> MaskElts; + cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts); + unsigned MaskNumElts = MaskElts.size(); + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (isa<UndefValue>(MaskElts[i])) + Mask.push_back(-1); + else + Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue()); + } + + MVT VT = TLI.getValueType(I.getType()); + MVT SrcVT = Src1.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); + + if (SrcNumElts == MaskNumElts) { + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &Mask[0])); + return; + } + + // Normalize the shuffle vector since mask and vector length don't match. + if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { + // Mask is longer than the source vectors and is a multiple of the source + // vectors. We can use concatenate vector to make the mask and vectors + // lengths match. + if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src1, Src2)); + return; + } + + // Pad both vectors with undefs to make them the same length as the mask. + unsigned NumConcat = MaskNumElts / SrcNumElts; + bool Src1U = Src1.getOpcode() == ISD::UNDEF; + bool Src2U = Src2.getOpcode() == ISD::UNDEF; + SDValue UndefVal = DAG.getUNDEF(SrcVT); + + SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); + SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); + MOps1[0] = Src1; + MOps2[0] = Src2; + + Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps1[0], NumConcat); + Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps2[0], NumConcat); + + // Readjust mask for new input vector length. + SmallVector<int, 8> MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < (int)SrcNumElts) + MappedOps.push_back(Idx); + else + MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + } + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + + if (SrcNumElts > MaskNumElts) { + // Analyze the access pattern of the vector to see if we can extract + // two subvectors and do the shuffle. The analysis is done by calculating + // the range of elements the mask access on both vectors. + int MinRange[2] = { SrcNumElts+1, SrcNumElts+1}; + int MaxRange[2] = {-1, -1}; + + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + int Input = 0; + if (Idx < 0) + continue; + + if (Idx >= (int)SrcNumElts) { + Input = 1; + Idx -= SrcNumElts; + } + if (Idx > MaxRange[Input]) + MaxRange[Input] = Idx; + if (Idx < MinRange[Input]) + MinRange[Input] = Idx; + } + + // Check if the access is smaller than the vector size and can we find + // a reasonable extract index. + int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not Extract. + int StartIdx[2]; // StartIdx to extract from + for (int Input=0; Input < 2; ++Input) { + if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { + RangeUse[Input] = 0; // Unused + StartIdx[Input] = 0; + } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { + // Fits within range but we should see if we can find a good + // start index that is a multiple of the mask length. + if (MaxRange[Input] < (int)MaskNumElts) { + RangeUse[Input] = 1; // Extract from beginning of the vector + StartIdx[Input] = 0; + } else { + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts < SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. + } + } + } + + if (RangeUse[0] == 0 && RangeUse[0] == 0) { + setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. + return; + } + else if (RangeUse[0] < 2 && RangeUse[1] < 2) { + // Extract appropriate subvector and generate a vector shuffle + for (int Input=0; Input < 2; ++Input) { + SDValue& Src = Input == 0 ? Src1 : Src2; + if (RangeUse[Input] == 0) { + Src = DAG.getUNDEF(VT); + } else { + Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, + Src, DAG.getIntPtrConstant(StartIdx[Input])); + } + } + // Calculate new mask. + SmallVector<int, 8> MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + MappedOps.push_back(Idx); + else if (Idx < (int)SrcNumElts) + MappedOps.push_back(Idx - StartIdx[0]); + else + MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + } + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + } + + // We can't use either concat vectors or extract subvectors so fall back to + // replacing the shuffle with extract and build vector. + // to insert and build vector. + MVT EltVT = VT.getVectorElementType(); + MVT PtrVT = TLI.getPointerTy(); + SmallVector<SDValue,8> Ops; + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + } else { + int Idx = Mask[i]; + if (Idx < (int)SrcNumElts) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src1, DAG.getConstant(Idx, PtrVT))); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src2, + DAG.getConstant(Idx - SrcNumElts, PtrVT))); + } + } + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size())); +} + +void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Value *Op1 = I.getOperand(1); + const Type *AggTy = I.getType(); + const Type *ValTy = Op1->getType(); + bool IntoUndef = isa<UndefValue>(Op0); + bool FromUndef = isa<UndefValue>(Op1); + + unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, + I.idx_begin(), I.idx_end()); + + SmallVector<MVT, 4> AggValueVTs; + ComputeValueVTs(TLI, AggTy, AggValueVTs); + SmallVector<MVT, 4> ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumAggValues = AggValueVTs.size(); + unsigned NumValValues = ValValueVTs.size(); + SmallVector<SDValue, 4> Values(NumAggValues); + + SDValue Agg = getValue(Op0); + SDValue Val = getValue(Op1); + unsigned i = 0; + // Copy the beginning value(s) from the original aggregate. + for (; i != LinearIndex; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + // Copy values from the inserted value(s). + for (; i != LinearIndex + NumValValues; ++i) + Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + // Copy remaining value(s) from the original aggregate. + for (; i != NumAggValues; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&AggValueVTs[0], NumAggValues), + &Values[0], NumAggValues)); +} + +void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Type *AggTy = Op0->getType(); + const Type *ValTy = I.getType(); + bool OutOfUndef = isa<UndefValue>(Op0); + + unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, + I.idx_begin(), I.idx_end()); + + SmallVector<MVT, 4> ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumValValues = ValValueVTs.size(); + SmallVector<SDValue, 4> Values(NumValValues); + + SDValue Agg = getValue(Op0); + // Copy out the selected value(s). + for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) + Values[i - LinearIndex] = + OutOfUndef ? + DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValValueVTs[0], NumValValues), + &Values[0], NumValValues)); +} + + +void SelectionDAGLowering::visitGetElementPtr(User &I) { + SDValue N = getValue(I.getOperand(0)); + const Type *Ty = I.getOperand(0)->getType(); + + for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end(); + OI != E; ++OI) { + Value *Idx = *OI; + if (const StructType *StTy = dyn_cast<StructType>(Ty)) { + unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + DAG.getIntPtrConstant(Offset)); + } + Ty = StTy->getElementType(Field); + } else { + Ty = cast<SequentialType>(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->getZExtValue() == 0) continue; + uint64_t Offs = + TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + SDValue OffsVal; + unsigned PtrBits = TLI.getPointerTy().getSizeInBits(); + if (PtrBits < 64) { + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(Offs, MVT::i64)); + } else + OffsVal = DAG.getIntPtrConstant(Offs); + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + OffsVal); + continue; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = TD->getTypeAllocSize(Ty); + SDValue IdxN = getValue(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend + // it. + if (IdxN.getValueType().bitsLT(N.getValueType())) + IdxN = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), + N.getValueType(), IdxN); + else if (IdxN.getValueType().bitsGT(N.getValueType())) + IdxN = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + N.getValueType(), IdxN); + + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (ElementSize != 1) { + if (isPowerOf2_64(ElementSize)) { + unsigned Amt = Log2_64(ElementSize); + IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), + N.getValueType(), IdxN, + DAG.getConstant(Amt, TLI.getPointerTy())); + } else { + SDValue Scale = DAG.getIntPtrConstant(ElementSize); + IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), + N.getValueType(), IdxN, Scale); + } + } + + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), + N.getValueType(), N, IdxN); + } + } + setValue(&I, N); +} + +void SelectionDAGLowering::visitAlloca(AllocaInst &I) { + // If this is a fixed sized alloca in the entry block of the function, + // allocate it statically on the stack. + if (FuncInfo.StaticAllocaMap.count(&I)) + return; // getValue will auto-populate this. + + const Type *Ty = I.getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + I.getAlignment()); + + SDValue AllocSize = getValue(I.getArraySize()); + + AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(), + AllocSize, + DAG.getConstant(TySize, AllocSize.getValueType())); + + + + MVT IntPtr = TLI.getPointerTy(); + if (IntPtr.bitsLT(AllocSize.getValueType())) + AllocSize = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + IntPtr, AllocSize); + else if (IntPtr.bitsGT(AllocSize.getValueType())) + AllocSize = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + IntPtr, AllocSize); + + // Handle alignment. If the requested alignment is less than or equal to + // the stack alignment, ignore it. If the size is greater than or equal to + // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. + unsigned StackAlign = + TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); + if (Align <= StackAlign) + Align = 0; + + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. + AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(StackAlign-1)); + // Mask out the low bits for alignment purposes. + AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); + + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; + SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), + VTs, Ops, 3); + setValue(&I, DSA); + DAG.setRoot(DSA.getValue(1)); + + // Inform the Frame Information that we have just allocated a variable-sized + // object. + CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject(); +} + +void SelectionDAGLowering::visitLoad(LoadInst &I) { + const Value *SV = I.getOperand(0); + SDValue Ptr = getValue(SV); + + const Type *Ty = I.getType(); + bool isVolatile = I.isVolatile(); + unsigned Alignment = I.getAlignment(); + + SmallVector<MVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + SDValue Root; + bool ConstantMemory = false; + if (I.isVolatile()) + // Serialize volatile loads with other side effects. + Root = getRoot(); + else if (AA->pointsToConstantMemory(SV)) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = DAG.getRoot(); + } + + SmallVector<SDValue, 4> Values(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + MVT PtrVT = Ptr.getValueType(); + for (unsigned i = 0; i != NumValues; ++i) { + SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, + DAG.getNode(ISD::ADD, getCurDebugLoc(), + PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)), + SV, Offsets[i], + isVolatile, Alignment); + Values[i] = L; + Chains[i] = L.getValue(1); + } + + if (!ConstantMemory) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, + &Chains[0], NumValues); + if (isVolatile) + DAG.setRoot(Chain); + else + PendingLoads.push_back(Chain); + } + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); +} + + +void SelectionDAGLowering::visitStore(StoreInst &I) { + Value *SrcV = I.getOperand(0); + Value *PtrV = I.getOperand(1); + + SmallVector<MVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + // Get the lowered operands. Note that we do this after + // checking if NumResults is zero, because with zero results + // the operands won't have values in the map. + SDValue Src = getValue(SrcV); + SDValue Ptr = getValue(PtrV); + + SDValue Root = getRoot(); + SmallVector<SDValue, 4> Chains(NumValues); + MVT PtrVT = Ptr.getValueType(); + bool isVolatile = I.isVolatile(); + unsigned Alignment = I.getAlignment(); + for (unsigned i = 0; i != NumValues; ++i) + Chains[i] = DAG.getStore(Root, getCurDebugLoc(), + SDValue(Src.getNode(), Src.getResNo() + i), + DAG.getNode(ISD::ADD, getCurDebugLoc(), + PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)), + PtrV, Offsets[i], + isVolatile, Alignment); + + DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues)); +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, + unsigned Intrinsic) { + bool HasChain = !I.doesNotAccessMemory(); + bool OnlyLoad = HasChain && I.onlyReadsMemory(); + + // Build the operand list. + SmallVector<SDValue, 8> Ops; + if (HasChain) { // If this intrinsic has side-effects, chainify it. + if (OnlyLoad) { + // We don't need to serialize loads against other loads. + Ops.push_back(DAG.getRoot()); + } else { + Ops.push_back(getRoot()); + } + } + + // Info is set by getTgtMemInstrinsic + TargetLowering::IntrinsicInfo Info; + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + + // Add the intrinsic ID as an integer operand if it's not a target intrinsic. + if (!IsTgtIntrinsic) + Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); + + // Add all operands of the call to the operand list. + for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { + SDValue Op = getValue(I.getOperand(i)); + assert(TLI.isTypeLegal(Op.getValueType()) && + "Intrinsic uses a non-legal type?"); + Ops.push_back(Op); + } + + std::vector<MVT> VTArray; + if (I.getType() != Type::VoidTy) { + MVT VT = TLI.getValueType(I.getType()); + if (VT.isVector()) { + const VectorType *DestTy = cast<VectorType>(I.getType()); + MVT EltVT = TLI.getValueType(DestTy->getElementType()); + + VT = MVT::getVectorVT(EltVT, DestTy->getNumElements()); + assert(VT != MVT::Other && "Intrinsic uses a non-legal type?"); + } + + assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?"); + VTArray.push_back(VT); + } + if (HasChain) + VTArray.push_back(MVT::Other); + + SDVTList VTs = DAG.getVTList(&VTArray[0], VTArray.size()); + + // Create the node. + SDValue Result; + if (IsTgtIntrinsic) { + // This is target intrinsic that touches memory + Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), + VTs, &Ops[0], Ops.size(), + Info.memVT, Info.ptrVal, Info.offset, + Info.align, Info.vol, + Info.readMem, Info.writeMem); + } + else if (!HasChain) + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + else if (I.getType() != Type::VoidTy) + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + else + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + if (I.getType() != Type::VoidTy) { + if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) { + MVT VT = TLI.getValueType(PTy); + Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); + } + setValue(&I, Result); + } +} + +/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. +static GlobalVariable *ExtractTypeInfo(Value *V) { + V = V->stripPointerCasts(); + GlobalVariable *GV = dyn_cast<GlobalVariable>(V); + assert ((GV || isa<ConstantPointerNull>(V)) && + "TypeInfo must be a global variable or NULL"); + return GV; +} + +namespace llvm { + +/// AddCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, + MachineBasicBlock *MBB) { + // Inform the MachineModuleInfo of the personality for this landing pad. + ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2)); + assert(CE->getOpcode() == Instruction::BitCast && + isa<Function>(CE->getOperand(0)) && + "Personality should be a function"); + MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0))); + + // Gather all the type infos for this landing pad and pass them along to + // MachineModuleInfo. + std::vector<GlobalVariable *> TyInfo; + unsigned N = I.getNumOperands(); + + for (unsigned i = N - 1; i > 2; --i) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) { + unsigned FilterLength = CI->getZExtValue(); + unsigned FirstCatch = i + FilterLength + !FilterLength; + assert (FirstCatch <= N && "Invalid filter length"); + + if (FirstCatch < N) { + TyInfo.reserve(N - FirstCatch); + for (unsigned j = FirstCatch; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + if (!FilterLength) { + // Cleanup. + MMI->addCleanup(MBB); + } else { + // Filter. + TyInfo.reserve(FilterLength - 1); + for (unsigned j = i + 1; j < FirstCatch; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addFilterTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + N = i; + } + } + + if (N > 3) { + TyInfo.reserve(N - 3); + for (unsigned j = 3; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + } +} + +} + +/// GetSignificand - Get the significand and build it into a floating-point +/// number with exponent of 1: +/// +/// Op = (Op & 0x007fffff) | 0x3f800000; +/// +/// where Op is the hexidecimal representation of floating point value. +static SDValue +GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { + SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x007fffff, MVT::i32)); + SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, + DAG.getConstant(0x3f800000, MVT::i32)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); +} + +/// GetExponent - Get the exponent: +/// +/// (float)(int)(((Op & 0x7f800000) >> 23) - 127); +/// +/// where Op is the hexidecimal representation of floating point value. +static SDValue +GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, + DebugLoc dl) { + SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x7f800000, MVT::i32)); + SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, + DAG.getConstant(23, TLI.getPointerTy())); + SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, + DAG.getConstant(127, MVT::i32)); + return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); +} + +/// getF32Constant - Get 32-bit floating point constant. +static SDValue +getF32Constant(SelectionDAG &DAG, unsigned Flt) { + return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); +} + +/// Inlined utility function to implement binary input atomic intrinsics for +/// visitIntrinsicCall: I is a call instruction +/// Op is the associated NodeType for I +const char * +SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { + SDValue Root = getRoot(); + SDValue L = + DAG.getAtomic(Op, getCurDebugLoc(), + getValue(I.getOperand(2)).getValueType().getSimpleVT(), + Root, + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + I.getOperand(1)); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; +} + +// implVisitAluOverflow - Lower arithmetic overflow instrinsics. +const char * +SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2); + + setValue(&I, Result); + return 0; +} + +/// visitExp - Lower an exp intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitExp(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OFe 1.4426950f + // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + getF32Constant(DAG, 0x3fb8aa3b)); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5); + + // Add the exponent into the result in integer domain. + SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // 0.000107046256 error, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7); + + // Add the exponent into the result in integer domain. + SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::i32, t13); + + // Add the exponent into the result in integer domain. + SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FEXP, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitLog - Lower a log intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitLog(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Scale the exponent by log(2) [0.69314718f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3f317218)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // LogofMantissa = + // -1.1609546f + + // (1.4034025f - 0.23903021f * x) * x; + // + // error 0.0034276066, which is better than 8 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbe74c456)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3fb3a2b1)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f949a29)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // LogOfMantissa = + // -1.7417939f + + // (2.8212026f + + // (-1.4699568f + + // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; + // + // error 0.000061011436, which is 14 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbd67b6d6)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ee4f4b8)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fbc278b)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40348e95)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fdef31a)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // LogOfMantissa = + // -2.1072184f + + // (4.2372794f + + // (-3.7029485f + + // (2.2781945f + + // (-0.87823314f + + // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; + // + // error 0.0000023660568, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbc91e5ac)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e4350aa)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f60d3e3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x4011cdf0)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x406cfd1c)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x408797cb)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4006dcab)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitLog2(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Get the exponent. + SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + // Different possible minimax approximations of significand in + // floating-point for various degrees of accuracy over [1,2]. + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; + // + // error 0.0049451742, which is more than 7 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbeb08fe0)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x40019463)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fd6633d)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log2ofMantissa = + // -2.51285454f + + // (4.07009056f + + // (-2.12067489f + + // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; + // + // error 0.0000876136000, which is better than 13 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbda7262e)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f25280b)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x4007b923)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40823e2f)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x4020d29c)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log2ofMantissa = + // -3.0400495f + + // (6.1129976f + + // (-5.3420409f + + // (3.2865683f + + // (-1.2669343f + + // (0.27515199f - + // 0.25691327e-1f * x) * x) * x) * x) * x) * x; + // + // error 0.0000018516, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbcd2769e)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e8ce0b9)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fa22ae7)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40525723)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x40aaf200)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x40c39dad)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4042902c)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG2, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitLog10(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Scale the exponent by log10(2) [0.30102999f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3e9a209a)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log10ofMantissa = + // -0.50419619f + + // (0.60948995f - 0.10380950f * x) * x; + // + // error 0.0014886165, which is 6 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbdd49a13)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f1c0789)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f011300)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log10ofMantissa = + // -0.64831180f + + // (0.91751397f + + // (-0.31664806f + 0.47637168e-1f * x) * x) * x; + // + // error 0.00019228036, which is better than 12 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3d431f31)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ea21fb2)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f6ae232)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f25f7c3)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log10ofMantissa = + // -0.84299375f + + // (1.5327582f + + // (-1.0688956f + + // (0.49102474f + + // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; + // + // error 0.0000037995730, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3c5d51ce)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e00685a)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3efb6798)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f88d192)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fc4316c)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3f57ce70)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG10, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitExp2(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); + + // FractionalPartOfX = x - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FEXP2, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitPow - Lower a pow intrinsic. Handles the special sequences for +/// limited-precision mode with x == 10.0f. +void +SelectionDAGLowering::visitPow(CallInst &I) { + SDValue result; + Value *Val = I.getOperand(1); + DebugLoc dl = getCurDebugLoc(); + bool IsExp10 = false; + + if (getValue(Val).getValueType() == MVT::f32 && + getValue(I.getOperand(2)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) { + if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { + APFloat Ten(10.0f); + IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten); + } + } + } + + if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(2)); + + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OF10 3.3219281f + // IntegerPartOfX = (int32_t)(x * LOG2OF10); + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + getF32Constant(DAG, 0x40549a78)); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = x - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // twoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FPOW, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2))); + } + + setValue(&I, result); +} + +/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If +/// we want to emit this as a call to a named external function, return the name +/// otherwise lower it and return null. +const char * +SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { + DebugLoc dl = getCurDebugLoc(); + switch (Intrinsic) { + default: + // By default, turn this into a target intrinsic node. + visitTargetIntrinsic(I, Intrinsic); + return 0; + case Intrinsic::vastart: visitVAStart(I); return 0; + case Intrinsic::vaend: visitVAEnd(I); return 0; + case Intrinsic::vacopy: visitVACopy(I); return 0; + case Intrinsic::returnaddress: + setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::frameaddress: + setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::setjmp: + return "_setjmp"+!TLI.usesUnderscoreSetJmp(); + break; + case Intrinsic::longjmp: + return "_longjmp"+!TLI.usesUnderscoreLongJmp(); + break; + case Intrinsic::memcpy: { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op3 = getValue(I.getOperand(3)); + unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, + I.getOperand(1), 0, I.getOperand(2), 0)); + return 0; + } + case Intrinsic::memset: { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op3 = getValue(I.getOperand(3)); + unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, + I.getOperand(1), 0)); + return 0; + } + case Intrinsic::memmove: { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op3 = getValue(I.getOperand(3)); + unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); + + // If the source and destination are known to not be aliases, we can + // lower memmove as memcpy. + uint64_t Size = -1ULL; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3)) + Size = C->getZExtValue(); + if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) == + AliasAnalysis::NoAlias) { + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, + I.getOperand(1), 0, I.getOperand(2), 0)); + return 0; + } + + DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, + I.getOperand(1), 0, I.getOperand(2), 0)); + return 0; + } + case Intrinsic::dbg_stoppoint: { + DbgStopPointInst &SPI = cast<DbgStopPointInst>(I); + if (DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLevel)) { + MachineFunction &MF = DAG.getMachineFunction(); + DICompileUnit CU(cast<GlobalVariable>(SPI.getContext())); + DebugLoc Loc = DebugLoc::get(MF.getOrCreateDebugLocID(CU.getGV(), + SPI.getLine(), SPI.getColumn())); + setCurDebugLoc(Loc); + + if (OptLevel == CodeGenOpt::None) + DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(), + SPI.getLine(), + SPI.getColumn(), + SPI.getContext())); + } + return 0; + } + case Intrinsic::dbg_region_start: { + DwarfWriter *DW = DAG.getDwarfWriter(); + DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I); + + if (DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLevel) && + DW && DW->ShouldEmitDwarfDebug()) { + unsigned LabelID = + DW->RecordRegionStart(cast<GlobalVariable>(RSI.getContext())); + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), LabelID)); + } + + return 0; + } + case Intrinsic::dbg_region_end: { + DwarfWriter *DW = DAG.getDwarfWriter(); + DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I); + + if (DIDescriptor::ValidDebugInfo(REI.getContext(), OptLevel) && + DW && DW->ShouldEmitDwarfDebug()) { + MachineFunction &MF = DAG.getMachineFunction(); + DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext())); + + if (Subprogram.isNull() || Subprogram.describes(MF.getFunction())) { + unsigned LabelID = + DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext())); + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), LabelID)); + } else { + // This is end of inlined function. Debugging information for inlined + // function is not handled yet (only supported by FastISel). + if (OptLevel == CodeGenOpt::None) { + unsigned ID = DW->RecordInlinedFnEnd(Subprogram); + if (ID != 0) + // Returned ID is 0 if this is unbalanced "end of inlined + // scope". This could happen if optimizer eats dbg intrinsics or + // "beginning of inlined scope" is not recoginized due to missing + // location info. In such cases, do ignore this region.end. + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), ID)); + } + } + } + + return 0; + } + case Intrinsic::dbg_func_start: { + DwarfWriter *DW = DAG.getDwarfWriter(); + DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I); + Value *SP = FSI.getSubprogram(); + if (!DIDescriptor::ValidDebugInfo(SP, OptLevel)) + return 0; + + MachineFunction &MF = DAG.getMachineFunction(); + if (OptLevel == CodeGenOpt::None) { + // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what + // (most?) gdb expects. + DebugLoc PrevLoc = CurDebugLoc; + DISubprogram Subprogram(cast<GlobalVariable>(SP)); + DICompileUnit CompileUnit = Subprogram.getCompileUnit(); + + if (!Subprogram.describes(MF.getFunction())) { + // This is a beginning of an inlined function. + + // If llvm.dbg.func.start is seen in a new block before any + // llvm.dbg.stoppoint intrinsic then the location info is unknown. + // FIXME : Why DebugLoc is reset at the beginning of each block ? + if (PrevLoc.isUnknown()) + return 0; + + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + setCurDebugLoc(DebugLoc::get( + MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0))); + + if (DW && DW->ShouldEmitDwarfDebug()) { + DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); + unsigned LabelID = DW->RecordInlinedFnStart(Subprogram, + DICompileUnit(PrevLocTpl.CompileUnit), + PrevLocTpl.Line, + PrevLocTpl.Col); + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), LabelID)); + } + } else { + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + MF.setDefaultDebugLoc(DebugLoc::get( + MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0))); + if (DW && DW->ShouldEmitDwarfDebug()) { + // llvm.dbg.func_start also defines beginning of function scope. + DW->RecordRegionStart(cast<GlobalVariable>(FSI.getSubprogram())); + } + } + } else { + DISubprogram Subprogram(cast<GlobalVariable>(SP)); + + std::string SPName; + Subprogram.getLinkageName(SPName); + if (!SPName.empty() + && strcmp(SPName.c_str(), MF.getFunction()->getNameStart())) { + // This is beginning of inlined function. Debugging information for + // inlined function is not handled yet (only supported by FastISel). + return 0; + } + + // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is + // what (most?) gdb expects. + DICompileUnit CompileUnit = Subprogram.getCompileUnit(); + + // Record the source line but does not create a label for the normal + // function start. It will be emitted at asm emission time. However, + // create a label if this is a beginning of inlined function. + unsigned Line = Subprogram.getLineNumber(); + setCurDebugLoc(DebugLoc::get( + MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0))); + // FIXME - Start new region because llvm.dbg.func_start also defines + // beginning of function scope. + } + + return 0; + } + case Intrinsic::dbg_declare: { + if (OptLevel == CodeGenOpt::None) { + DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + Value *Variable = DI.getVariable(); + if (DIDescriptor::ValidDebugInfo(Variable, OptLevel)) + DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(), + getValue(DI.getAddress()), getValue(Variable))); + } else { + // FIXME: Do something sensible here when we support debug declare. + } + return 0; + } + case Intrinsic::eh_exception: { + // Insert the EXCEPTIONADDR instruction. + assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!"); + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDValue Ops[1]; + Ops[0] = DAG.getRoot(); + SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1); + setValue(&I, Op); + DAG.setRoot(Op.getValue(1)); + return 0; + } + + case Intrinsic::eh_selector_i32: + case Intrinsic::eh_selector_i64: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + MVT VT = (Intrinsic == Intrinsic::eh_selector_i32 ? + MVT::i32 : MVT::i64); + + if (MMI) { + if (CurMBB->isLandingPad()) + AddCatchInfo(I, MMI, CurMBB); + else { +#ifndef NDEBUG + FuncInfo.CatchInfoLost.insert(&I); +#endif + // FIXME: Mark exception selector register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionSelectorRegister(); + if (Reg) CurMBB->addLiveIn(Reg); + } + + // Insert the EHSELECTION instruction. + SDVTList VTs = DAG.getVTList(VT, MVT::Other); + SDValue Ops[2]; + Ops[0] = getValue(I.getOperand(1)); + Ops[1] = getRoot(); + SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); + setValue(&I, Op); + DAG.setRoot(Op.getValue(1)); + } else { + setValue(&I, DAG.getConstant(0, VT)); + } + + return 0; + } + + case Intrinsic::eh_typeid_for_i32: + case Intrinsic::eh_typeid_for_i64: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + MVT VT = (Intrinsic == Intrinsic::eh_typeid_for_i32 ? + MVT::i32 : MVT::i64); + + if (MMI) { + // Find the type id for the given typeinfo. + GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); + + unsigned TypeID = MMI->getTypeIDFor(GV); + setValue(&I, DAG.getConstant(TypeID, VT)); + } else { + // Return something different to eh_selector. + setValue(&I, DAG.getConstant(1, VT)); + } + + return 0; + } + + case Intrinsic::eh_return_i32: + case Intrinsic::eh_return_i64: + if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { + MMI->setCallsEHReturn(true); + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, + MVT::Other, + getControlRoot(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)))); + } else { + setValue(&I, DAG.getConstant(0, TLI.getPointerTy())); + } + + return 0; + case Intrinsic::eh_unwind_init: + if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { + MMI->setCallsUnwindInit(true); + } + + return 0; + + case Intrinsic::eh_dwarf_cfa: { + MVT VT = getValue(I.getOperand(1)).getValueType(); + SDValue CfaArg; + if (VT.bitsGT(TLI.getPointerTy())) + CfaArg = DAG.getNode(ISD::TRUNCATE, dl, + TLI.getPointerTy(), getValue(I.getOperand(1))); + else + CfaArg = DAG.getNode(ISD::SIGN_EXTEND, dl, + TLI.getPointerTy(), getValue(I.getOperand(1))); + + SDValue Offset = DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, + TLI.getPointerTy()), + CfaArg); + setValue(&I, DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), + DAG.getNode(ISD::FRAMEADDR, dl, + TLI.getPointerTy(), + DAG.getConstant(0, + TLI.getPointerTy())), + Offset)); + return 0; + } + + case Intrinsic::convertff: + case Intrinsic::convertfsi: + case Intrinsic::convertfui: + case Intrinsic::convertsif: + case Intrinsic::convertuif: + case Intrinsic::convertss: + case Intrinsic::convertsu: + case Intrinsic::convertus: + case Intrinsic::convertuu: { + ISD::CvtCode Code = ISD::CVT_INVALID; + switch (Intrinsic) { + case Intrinsic::convertff: Code = ISD::CVT_FF; break; + case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; + case Intrinsic::convertfui: Code = ISD::CVT_FU; break; + case Intrinsic::convertsif: Code = ISD::CVT_SF; break; + case Intrinsic::convertuif: Code = ISD::CVT_UF; break; + case Intrinsic::convertss: Code = ISD::CVT_SS; break; + case Intrinsic::convertsu: Code = ISD::CVT_SU; break; + case Intrinsic::convertus: Code = ISD::CVT_US; break; + case Intrinsic::convertuu: Code = ISD::CVT_UU; break; + } + MVT DestVT = TLI.getValueType(I.getType()); + Value* Op1 = I.getOperand(1); + setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), + DAG.getValueType(DestVT), + DAG.getValueType(getValue(Op1).getValueType()), + getValue(I.getOperand(2)), + getValue(I.getOperand(3)), + Code)); + return 0; + } + + case Intrinsic::sqrt: + setValue(&I, DAG.getNode(ISD::FSQRT, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::powi: + setValue(&I, DAG.getNode(ISD::FPOWI, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)))); + return 0; + case Intrinsic::sin: + setValue(&I, DAG.getNode(ISD::FSIN, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::cos: + setValue(&I, DAG.getNode(ISD::FCOS, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::log: + visitLog(I); + return 0; + case Intrinsic::log2: + visitLog2(I); + return 0; + case Intrinsic::log10: + visitLog10(I); + return 0; + case Intrinsic::exp: + visitExp(I); + return 0; + case Intrinsic::exp2: + visitExp2(I); + return 0; + case Intrinsic::pow: + visitPow(I); + return 0; + case Intrinsic::pcmarker: { + SDValue Tmp = getValue(I.getOperand(1)); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); + return 0; + } + case Intrinsic::readcyclecounter: { + SDValue Op = getRoot(); + SDValue Tmp = DAG.getNode(ISD::READCYCLECOUNTER, dl, + DAG.getVTList(MVT::i64, MVT::Other), + &Op, 1); + setValue(&I, Tmp); + DAG.setRoot(Tmp.getValue(1)); + return 0; + } + case Intrinsic::part_select: { + // Currently not implemented: just abort + assert(0 && "part_select intrinsic not implemented"); + abort(); + } + case Intrinsic::part_set: { + // Currently not implemented: just abort + assert(0 && "part_set intrinsic not implemented"); + abort(); + } + case Intrinsic::bswap: + setValue(&I, DAG.getNode(ISD::BSWAP, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::cttz: { + SDValue Arg = getValue(I.getOperand(1)); + MVT Ty = Arg.getValueType(); + SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg); + setValue(&I, result); + return 0; + } + case Intrinsic::ctlz: { + SDValue Arg = getValue(I.getOperand(1)); + MVT Ty = Arg.getValueType(); + SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg); + setValue(&I, result); + return 0; + } + case Intrinsic::ctpop: { + SDValue Arg = getValue(I.getOperand(1)); + MVT Ty = Arg.getValueType(); + SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg); + setValue(&I, result); + return 0; + } + case Intrinsic::stacksave: { + SDValue Op = getRoot(); + SDValue Tmp = DAG.getNode(ISD::STACKSAVE, dl, + DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); + setValue(&I, Tmp); + DAG.setRoot(Tmp.getValue(1)); + return 0; + } + case Intrinsic::stackrestore: { + SDValue Tmp = getValue(I.getOperand(1)); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Tmp)); + return 0; + } + case Intrinsic::stackprotector: { + // Emit code into the DAG to store the stack guard onto the stack. + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MVT PtrTy = TLI.getPointerTy(); + + SDValue Src = getValue(I.getOperand(1)); // The guard's value. + AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); + + int FI = FuncInfo.StaticAllocaMap[Slot]; + MFI->setStackProtectorIndex(FI); + + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + + // Store the stack protector onto the stack. + SDValue Result = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, + PseudoSourceValue::getFixedStack(FI), + 0, true); + setValue(&I, Result); + DAG.setRoot(Result); + return 0; + } + case Intrinsic::var_annotation: + // Discard annotate attributes + return 0; + + case Intrinsic::init_trampoline: { + const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts()); + + SDValue Ops[6]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getOperand(1)); + Ops[2] = getValue(I.getOperand(2)); + Ops[3] = getValue(I.getOperand(3)); + Ops[4] = DAG.getSrcValue(I.getOperand(1)); + Ops[5] = DAG.getSrcValue(F); + + SDValue Tmp = DAG.getNode(ISD::TRAMPOLINE, dl, + DAG.getVTList(TLI.getPointerTy(), MVT::Other), + Ops, 6); + + setValue(&I, Tmp); + DAG.setRoot(Tmp.getValue(1)); + return 0; + } + + case Intrinsic::gcroot: + if (GFI) { + Value *Alloca = I.getOperand(1); + Constant *TypeMap = cast<Constant>(I.getOperand(2)); + + FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); + GFI->addStackRoot(FI->getIndex(), TypeMap); + } + return 0; + + case Intrinsic::gcread: + case Intrinsic::gcwrite: + assert(0 && "GC failed to lower gcread/gcwrite intrinsics!"); + return 0; + + case Intrinsic::flt_rounds: { + setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); + return 0; + } + + case Intrinsic::trap: { + DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + return 0; + } + + case Intrinsic::uadd_with_overflow: + return implVisitAluOverflow(I, ISD::UADDO); + case Intrinsic::sadd_with_overflow: + return implVisitAluOverflow(I, ISD::SADDO); + case Intrinsic::usub_with_overflow: + return implVisitAluOverflow(I, ISD::USUBO); + case Intrinsic::ssub_with_overflow: + return implVisitAluOverflow(I, ISD::SSUBO); + case Intrinsic::umul_with_overflow: + return implVisitAluOverflow(I, ISD::UMULO); + case Intrinsic::smul_with_overflow: + return implVisitAluOverflow(I, ISD::SMULO); + + case Intrinsic::prefetch: { + SDValue Ops[4]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getOperand(1)); + Ops[2] = getValue(I.getOperand(2)); + Ops[3] = getValue(I.getOperand(3)); + DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); + return 0; + } + + case Intrinsic::memory_barrier: { + SDValue Ops[6]; + Ops[0] = getRoot(); + for (int x = 1; x < 6; ++x) + Ops[x] = getValue(I.getOperand(x)); + + DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6)); + return 0; + } + case Intrinsic::atomic_cmp_swap: { + SDValue Root = getRoot(); + SDValue L = + DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(), + getValue(I.getOperand(2)).getValueType().getSimpleVT(), + Root, + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + getValue(I.getOperand(3)), + I.getOperand(1)); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; + } + case Intrinsic::atomic_load_add: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD); + case Intrinsic::atomic_load_sub: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB); + case Intrinsic::atomic_load_or: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR); + case Intrinsic::atomic_load_xor: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR); + case Intrinsic::atomic_load_and: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND); + case Intrinsic::atomic_load_nand: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND); + case Intrinsic::atomic_load_max: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX); + case Intrinsic::atomic_load_min: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN); + case Intrinsic::atomic_load_umin: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN); + case Intrinsic::atomic_load_umax: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX); + case Intrinsic::atomic_swap: + return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); + } +} + + +void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, + bool IsTailCall, + MachineBasicBlock *LandingPad) { + const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + unsigned BeginLabel = 0, EndLabel = 0; + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Args.reserve(CS.arg_size()); + for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + SDValue ArgNode = getValue(*i); + Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); + + unsigned attrInd = i - CS.arg_begin() + 1; + Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.Alignment = CS.getParamAlignment(attrInd); + Args.push_back(Entry); + } + + if (LandingPad && MMI) { + // Insert a label before the invoke call to mark the try range. This can be + // used to detect deletion of the invoke via the MachineModuleInfo. + BeginLabel = MMI->NextLabelID(); + // Both PendingLoads and PendingExports must be flushed here; + // this call might not return. + (void)getRoot(); + DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(), + getControlRoot(), BeginLabel)); + } + + std::pair<SDValue,SDValue> Result = + TLI.LowerCallTo(getRoot(), CS.getType(), + CS.paramHasAttr(0, Attribute::SExt), + CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), + CS.paramHasAttr(0, Attribute::InReg), + CS.getCallingConv(), + IsTailCall && PerformTailCallOpt, + Callee, Args, DAG, getCurDebugLoc()); + if (CS.getType() != Type::VoidTy) + setValue(CS.getInstruction(), Result.first); + DAG.setRoot(Result.second); + + if (LandingPad && MMI) { + // Insert a label at the end of the invoke call to mark the try range. This + // can be used to detect deletion of the invoke via the MachineModuleInfo. + EndLabel = MMI->NextLabelID(); + DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(), + getRoot(), EndLabel)); + + // Inform MachineModuleInfo of range. + MMI->addInvoke(LandingPad, BeginLabel, EndLabel); + } +} + + +void SelectionDAGLowering::visitCall(CallInst &I) { + const char *RenameFn = 0; + if (Function *F = I.getCalledFunction()) { + if (F->isDeclaration()) { + const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo(); + if (II) { + if (unsigned IID = II->getIntrinsicID(F)) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + if (unsigned IID = F->getIntrinsicID()) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + + // Check for well-known libc/libm calls. If the function is internal, it + // can't be a library call. + unsigned NameLen = F->getNameLen(); + if (!F->hasLocalLinkage() && NameLen) { + const char *NameStr = F->getNameStart(); + if (NameStr[0] == 'c' && + ((NameLen == 8 && !strcmp(NameStr, "copysign")) || + (NameLen == 9 && !strcmp(NameStr, "copysignf")))) { + if (I.getNumOperands() == 3 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType() && + I.getType() == I.getOperand(2)->getType()) { + SDValue LHS = getValue(I.getOperand(1)); + SDValue RHS = getValue(I.getOperand(2)); + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), + LHS.getValueType(), LHS, RHS)); + return; + } + } else if (NameStr[0] == 'f' && + ((NameLen == 4 && !strcmp(NameStr, "fabs")) || + (NameLen == 5 && !strcmp(NameStr, "fabsf")) || + (NameLen == 5 && !strcmp(NameStr, "fabsl")))) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (NameStr[0] == 's' && + ((NameLen == 3 && !strcmp(NameStr, "sin")) || + (NameLen == 4 && !strcmp(NameStr, "sinf")) || + (NameLen == 4 && !strcmp(NameStr, "sinl")))) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (NameStr[0] == 'c' && + ((NameLen == 3 && !strcmp(NameStr, "cos")) || + (NameLen == 4 && !strcmp(NameStr, "cosf")) || + (NameLen == 4 && !strcmp(NameStr, "cosl")))) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } + } + } else if (isa<InlineAsm>(I.getOperand(0))) { + visitInlineAsm(&I); + return; + } + + SDValue Callee; + if (!RenameFn) + Callee = getValue(I.getOperand(0)); + else + Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + + LowerCallTo(&I, Callee, I.isTailCall()); +} + + +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, + SDValue *Flag) const { + // Assemble the legal parts into the final values. + SmallVector<SDValue, 4> Values(ValueVTs.size()); + SmallVector<SDValue, 8> Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + MVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(ValueVT); + MVT RegisterVT = RegVTs[Value]; + + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (Flag == 0) + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } + Chain = P.getValue(1); + + // If the source register was virtual and if we know something about it, + // add an assert node. + if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && + RegisterVT.isInteger() && !RegisterVT.isVector()) { + unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; + FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); + if (FLI.LiveOutRegInfo.size() > SlotNo) { + FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo]; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI.NumSignBits; + unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + MVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + + if (FromVT != MVT::Other) { + P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + + } + } + } + + Parts[i] = P; + } + + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), + NumRegs, RegisterVT, ValueVT); + Part += NumRegs; + Parts.clear(); + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Values[0], ValueVTs.size()); +} + +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + MVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI->getNumRegisters(ValueVT); + MVT RegisterVT = RegVTs[Value]; + + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), + &Parts[Part], NumParts, RegisterVT); + Part += NumParts; + } + + // Copy the parts into the registers. + SmallVector<SDValue, 8> Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (Flag == 0) + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); + else { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); + } + Chains[i] = Part.getValue(0); + } + + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); +} + +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, + bool HasMatching,unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const { + MVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy(); + assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!"); + unsigned Flag = Code | (Regs.size() << 3); + if (HasMatching) + Flag |= 0x80000000 | (MatchingIdx << 16); + Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy)); + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI->getNumRegisters(ValueVTs[Value]); + MVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + } + } +} + +/// isAllocatableRegister - If the specified register is safe to allocate, +/// i.e. it isn't a stack pointer or some other special register, return the +/// register class for the register. Otherwise, return null. +static const TargetRegisterClass * +isAllocatableRegister(unsigned Reg, MachineFunction &MF, + const TargetLowering &TLI, + const TargetRegisterInfo *TRI) { + MVT FoundVT = MVT::Other; + const TargetRegisterClass *FoundRC = 0; + for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), + E = TRI->regclass_end(); RCI != E; ++RCI) { + MVT ThisVT = MVT::Other; + + const TargetRegisterClass *RC = *RCI; + // If none of the the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (TLI.isTypeLegal(*I)) { + // If we have already found this register in a different register class, + // choose the one with the largest VT specified. For example, on + // PowerPC, we favor f64 register classes over f32. + if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { + ThisVT = *I; + break; + } + } + } + + if (ThisVT == MVT::Other) continue; + + // NOTE: This isn't ideal. In particular, this might allocate the + // frame pointer in functions that need it (due to them not being taken + // out of allocation, because a variable sized allocation hasn't been seen + // yet). This is a slight code pessimization, but should still work. + for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), + E = RC->allocation_order_end(MF); I != E; ++I) + if (*I == Reg) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } + } + return FoundRC; +} + + +namespace llvm { +/// AsmOperandInfo - This contains information for each constraint that we are +/// lowering. +class VISIBILITY_HIDDEN SDISelAsmOperandInfo : + public TargetLowering::AsmOperandInfo { +public: + /// CallOperand - If this is the result output operand or a clobber + /// this is null, otherwise it is the incoming operand to the CallInst. + /// This gets modified as the asm is processed. + SDValue CallOperand; + + /// AssignedRegs - If this is a register or register class operand, this + /// contains the set of register corresponding to the operand. + RegsForValue AssignedRegs; + + explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info) + : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { + } + + /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers + /// busy in OutputRegs/InputRegs. + void MarkAllocatedRegs(bool isOutReg, bool isInReg, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs, + const TargetRegisterInfo &TRI) const { + if (isOutReg) { + for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) + MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI); + } + if (isInReg) { + for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) + MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI); + } + } + + /// getCallOperandValMVT - Return the MVT of the Value* that this operand + /// corresponds to. If there is no Value* for this operand, it returns + /// MVT::Other. + MVT getCallOperandValMVT(const TargetLowering &TLI, + const TargetData *TD) const { + if (CallOperandVal == 0) return MVT::Other; + + if (isa<BasicBlock>(CallOperandVal)) + return TLI.getPointerTy(); + + const llvm::Type *OpTy = CallOperandVal->getType(); + + // If this is an indirect operand, the operand is a pointer to the + // accessed type. + if (isIndirect) + OpTy = cast<PointerType>(OpTy)->getElementType(); + + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = TD->getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpTy = IntegerType::get(BitSize); + break; + } + } + + return TLI.getValueType(OpTy, true); + } + +private: + /// MarkRegAndAliases - Mark the specified register and all aliases in the + /// specified set. + static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg"); + Regs.insert(Reg); + if (const unsigned *Aliases = TRI.getAliasSet(Reg)) + for (; *Aliases; ++Aliases) + Regs.insert(*Aliases); + } +}; +} // end llvm namespace. + + +/// GetRegistersForValue - Assign registers (virtual or physical) for the +/// specified operand. We prefer to assign virtual registers, to allow the +/// register allocator handle the assignment process. However, if the asm uses +/// features that we can't model on machineinstrs, we have SDISel do the +/// allocation. This produces generally horrible, but correct, code. +/// +/// OpInfo describes the operand. +/// Input and OutputRegs are the set of already allocated physical registers. +/// +void SelectionDAGLowering:: +GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs) { + // Compute whether this value requires an input register, an output register, + // or both. + bool isOutReg = false; + bool isInReg = false; + switch (OpInfo.Type) { + case InlineAsm::isOutput: + isOutReg = true; + + // If there is an input constraint that matches this, we need to reserve + // the input register so no other inputs allocate to it. + isInReg = OpInfo.hasMatchingInput(); + break; + case InlineAsm::isInput: + isInReg = true; + isOutReg = false; + break; + case InlineAsm::isClobber: + isOutReg = true; + isInReg = true; + break; + } + + + MachineFunction &MF = DAG.getMachineFunction(); + SmallVector<unsigned, 4> Regs; + + // If this is a constraint for a single physreg, or a constraint for a + // register class, find it. + std::pair<unsigned, const TargetRegisterClass*> PhysReg = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + + unsigned NumRegs = 1; + if (OpInfo.ConstraintVT != MVT::Other) { + // If this is a FP input in an integer register (or visa versa) insert a bit + // cast of the input value. More generally, handle any case where the input + // value disagrees with the register class we plan to stick this in. + if (OpInfo.Type == InlineAsm::isInput && + PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { + // Try to convert to the first MVT that the reg class contains. If the + // types are identical size, use a bitcast to convert (e.g. two differing + // vector types). + MVT RegVT = *PhysReg.second->vt_begin(); + if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { + // If the input is a FP value and we want it in FP registers, do a + // bitcast to the corresponding integer type. This turns an f64 value + // into i64, which can be passed with two i32 values on a 32-bit + // machine. + RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); + OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } + } + + NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT); + } + + MVT RegVT; + MVT ValueVT = OpInfo.ConstraintVT; + + // If this is a constraint for a specific physical register, like {r17}, + // assign it now. + if (unsigned AssignedReg = PhysReg.first) { + const TargetRegisterClass *RC = PhysReg.second; + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = *RC->vt_begin(); + + // Get the actual register value type. This is important, because the user + // may have asked for (e.g.) the AX register in i32 type. We need to + // remember that AX is actually i16 to get the right extension. + RegVT = *RC->vt_begin(); + + // This is a explicit reference to a physical register. + Regs.push_back(AssignedReg); + + // If this is an expanded reference, add the rest of the regs to Regs. + if (NumRegs != 1) { + TargetRegisterClass::iterator I = RC->begin(); + for (; *I != AssignedReg; ++I) + assert(I != RC->end() && "Didn't find reg!"); + + // Already added the first reg. + --NumRegs; ++I; + for (; NumRegs; --NumRegs, ++I) { + assert(I != RC->end() && "Ran out of registers to allocate!"); + Regs.push_back(*I); + } + } + OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); + return; + } + + // Otherwise, if this was a reference to an LLVM register class, create vregs + // for this reference. + if (const TargetRegisterClass *RC = PhysReg.second) { + RegVT = *RC->vt_begin(); + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = RegVT; + + // Create the appropriate number of virtual registers. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (; NumRegs; --NumRegs) + Regs.push_back(RegInfo.createVirtualRegister(RC)); + + OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + return; + } + + // This is a reference to a register class that doesn't directly correspond + // to an LLVM register class. Allocate NumRegs consecutive, available, + // registers from the class. + std::vector<unsigned> RegClassRegs + = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + unsigned NumAllocated = 0; + for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { + unsigned Reg = RegClassRegs[i]; + // See if this register is available. + if ((isOutReg && OutputRegs.count(Reg)) || // Already used. + (isInReg && InputRegs.count(Reg))) { // Already used. + // Make sure we find consecutive registers. + NumAllocated = 0; + continue; + } + + // Check to see if this register is allocatable (i.e. don't give out the + // stack pointer). + const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI); + if (!RC) { // Couldn't allocate this register. + // Reset NumAllocated to make sure we return consecutive registers. + NumAllocated = 0; + continue; + } + + // Okay, this register is good, we can use it. + ++NumAllocated; + + // If we allocated enough consecutive registers, succeed. + if (NumAllocated == NumRegs) { + unsigned RegStart = (i-NumAllocated)+1; + unsigned RegEnd = i+1; + // Mark all of the allocated registers used. + for (unsigned i = RegStart; i != RegEnd; ++i) + Regs.push_back(RegClassRegs[i]); + + OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(), + OpInfo.ConstraintVT); + OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); + return; + } + } + + // Otherwise, we couldn't allocate enough registers for this. +} + +/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being +/// processed uses a memory 'm' constraint. +static bool +hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos, + const TargetLowering &TLI) { + for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { + InlineAsm::ConstraintInfo &CI = CInfos[i]; + for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); + if (CType == TargetLowering::C_Memory) + return true; + } + + // Indirect operand accesses access memory. + if (CI.isIndirect) + return true; + } + + return false; +} + +/// visitInlineAsm - Handle a call to an InlineAsm object. +/// +void SelectionDAGLowering::visitInlineAsm(CallSite CS) { + InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + + /// ConstraintOperands - Information about all of the constraints. + std::vector<SDISelAsmOperandInfo> ConstraintOperands; + + std::set<unsigned> OutputRegs, InputRegs; + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + std::vector<InlineAsm::ConstraintInfo> + ConstraintInfos = IA->ParseConstraints(); + + bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI); + + SDValue Chain, Flag; + + // We won't need to flush pending loads if this asm doesn't touch + // memory and is nonvolatile. + if (hasMemory || IA->hasSideEffects()) + Chain = getRoot(); + else + Chain = DAG.getRoot(); + + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i])); + SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); + + MVT OpVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + // Indirect outputs just consume an argument. + if (OpInfo.isIndirect) { + OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + break; + } + + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(CS.getType() != Type::VoidTy && "Bad inline asm!"); + if (const StructType *STy = dyn_cast<StructType>(CS.getType())) { + OpVT = TLI.getValueType(STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpVT = TLI.getValueType(CS.getType()); + } + ++ResNo; + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + // If this is an input or an indirect output, process the call argument. + // BasicBlocks are labels, currently appearing only in asm's. + if (OpInfo.CallOperandVal) { + if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); + } else { + OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); + } + + OpVT = OpInfo.getCallOperandValMVT(TLI, TD); + } + + OpInfo.ConstraintVT = OpVT; + } + + // Second pass over the constraints: compute which constraint option to use + // and assign registers to constraints that want a specific physreg. + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (OpInfo.ConstraintVT.getSizeInBits() != + Input.ConstraintVT.getSizeInBits())) { + cerr << "llvm: error: Unsupported asm: input constraint with a " + << "matching output constraint of incompatible type!\n"; + exit(1); + } + Input.ConstraintVT = OpInfo.ConstraintVT; + } + } + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG); + + // If this is a memory input, and if the operand is not indirect, do what we + // need to to provide an address for the memory input. + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + !OpInfo.isIndirect) { + assert(OpInfo.Type == InlineAsm::isInput && + "Can only indirectify direct input operands!"); + + // Memory operands really want the address of the value. If we don't have + // an indirect input, put it in the constpool if we can, otherwise spill + // it to a stack slot. + + // If the operand is a float, integer, or vector constant, spill to a + // constant pool entry to get its address. + Value *OpVal = OpInfo.CallOperandVal; + if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || + isa<ConstantVector>(OpVal)) { + OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), + TLI.getPointerTy()); + } else { + // Otherwise, create a stack slot and emit a store to it before the + // asm. + const Type *Ty = OpVal->getType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Chain = DAG.getStore(Chain, getCurDebugLoc(), + OpInfo.CallOperand, StackSlot, NULL, 0); + OpInfo.CallOperand = StackSlot; + } + + // There is no longer a Value* corresponding to this operand. + OpInfo.CallOperandVal = 0; + // It is now an indirect operand. + OpInfo.isIndirect = true; + } + + // If this constraint is for a specific register, allocate it before + // anything else. + if (OpInfo.ConstraintType == TargetLowering::C_Register) + GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + } + ConstraintInfos.clear(); + + + // Second pass - Loop over all of the operands, assigning virtual or physregs + // to register class operands. + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // C_Register operands have already been allocated, Other/Memory don't need + // to be. + if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) + GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + } + + // AsmNodeOperands - The operands for the ISD::INLINEASM node. + std::vector<SDValue> AsmNodeOperands; + AsmNodeOperands.push_back(SDValue()); // reserve space for input chain + AsmNodeOperands.push_back( + DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other)); + + + // Loop over all of the inputs, copying the operand values into the + // appropriate registers and processing the output regs. + RegsForValue RetValRegs; + + // IndirectStoresToEmit - The set of stores to emit after the inline asm node. + std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; + + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + switch (OpInfo.Type) { + case InlineAsm::isOutput: { + if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && + OpInfo.ConstraintType != TargetLowering::C_Register) { + // Memory output, or 'other' output (e.g. 'X' constraint). + assert(OpInfo.isIndirect && "Memory output must be indirect operand"); + + // Add information to the INLINEASM node to know about this output. + unsigned ResOpType = 4/*MEM*/ | (1<<3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.push_back(OpInfo.CallOperand); + break; + } + + // Otherwise, this is a register or register class output. + + // Copy the output from the appropriate register. Find a register that + // we can use. + if (OpInfo.AssignedRegs.Regs.empty()) { + cerr << "llvm: error: Couldn't allocate output reg for constraint '" + << OpInfo.ConstraintCode << "'!\n"; + exit(1); + } + + // If this is an indirect operand, store through the pointer after the + // asm. + if (OpInfo.isIndirect) { + IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, + OpInfo.CallOperandVal)); + } else { + // This is the result value of the call. + assert(CS.getType() != Type::VoidTy && "Bad inline asm!"); + // Concatenate this output onto the outputs list. + RetValRegs.append(OpInfo.AssignedRegs); + } + + // Add information to the INLINEASM node to know that this register is + // set. + OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? + 6 /* EARLYCLOBBER REGDEF */ : + 2 /* REGDEF */ , + false, + 0, + DAG, AsmNodeOperands); + break; + } + case InlineAsm::isInput: { + SDValue InOperandVal = OpInfo.CallOperand; + + if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? + // If this is required to match an output register we have already set, + // just use its register. + unsigned OperandNo = OpInfo.getMatchedOperand(); + + // Scan until we find the definition we already emitted of this operand. + // When we find it, create a RegsForValue operand. + unsigned CurOp = 2; // The first operand. + for (; OperandNo; --OperandNo) { + // Advance to the next operand. + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + assert(((OpFlag & 7) == 2 /*REGDEF*/ || + (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ || + (OpFlag & 7) == 4 /*MEM*/) && + "Skipped past definitions?"); + CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; + } + + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + if ((OpFlag & 7) == 2 /*REGDEF*/ + || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) { + // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. + assert(!OpInfo.isIndirect && + "Don't know how to handle tied indirect register inputs yet!"); + RegsForValue MatchedRegs; + MatchedRegs.TLI = &TLI; + MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); + MVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); + MatchedRegs.RegVTs.push_back(RegVT); + MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); + i != e; ++i) + MatchedRegs.Regs. + push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT))); + + // Use the produced MatchedRegs object to + MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + Chain, &Flag); + MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, + true, OpInfo.getMatchedOperand(), + DAG, AsmNodeOperands); + break; + } else { + assert(((OpFlag & 7) == 4) && "Unknown matching constraint!"); + assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 && + "Unexpected number of operands"); + // Add information to the INLINEASM node to know about this input. + // See InlineAsm.h isUseOperandTiedToDef. + OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + TLI.getPointerTy())); + AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); + break; + } + } + + if (OpInfo.ConstraintType == TargetLowering::C_Other) { + assert(!OpInfo.isIndirect && + "Don't know how to handle indirect other inputs yet!"); + + std::vector<SDValue> Ops; + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], + hasMemory, Ops, DAG); + if (Ops.empty()) { + cerr << "llvm: error: Invalid operand for inline asm constraint '" + << OpInfo.ConstraintCode << "'!\n"; + exit(1); + } + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); + break; + } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + assert(InOperandVal.getValueType() == TLI.getPointerTy() && + "Memory operands expect pointer values"); + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = 4/*MEM*/ | (1<<3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.push_back(InOperandVal); + break; + } + + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Register) && + "Unknown constraint type!"); + assert(!OpInfo.isIndirect && + "Don't know how to handle indirect register inputs yet!"); + + // Copy the input into the appropriate registers. + if (OpInfo.AssignedRegs.Regs.empty()) { + cerr << "llvm: error: Couldn't allocate output reg for constraint '" + << OpInfo.ConstraintCode << "'!\n"; + exit(1); + } + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + Chain, &Flag); + + OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0, + DAG, AsmNodeOperands); + break; + } + case InlineAsm::isClobber: { + // Add the clobbered value to the operand list, so that the register + // allocator is aware that the physreg got clobbered. + if (!OpInfo.AssignedRegs.Regs.empty()) + OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */, + false, 0, DAG,AsmNodeOperands); + break; + } + } + } + + // Finish up input operands. + AsmNodeOperands[0] = Chain; + if (Flag.getNode()) AsmNodeOperands.push_back(Flag); + + Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), + DAG.getVTList(MVT::Other, MVT::Flag), + &AsmNodeOperands[0], AsmNodeOperands.size()); + Flag = Chain.getValue(1); + + // If this asm returns a register value, copy the result from that register + // and set it as the value of the call. + if (!RetValRegs.Regs.empty()) { + SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + Chain, &Flag); + + // FIXME: Why don't we do this for inline asms with MRVs? + if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { + MVT ResultType = TLI.getValueType(CS.getType()); + + // If any of the results of the inline asm is a vector, it may have the + // wrong width/num elts. This can happen for register classes that can + // contain multiple different value types. The preg or vreg allocated may + // not have the same VT as was expected. Convert it to the right type + // with bit_convert. + if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { + Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + ResultType, Val); + + } else if (ResultType != Val.getValueType() && + ResultType.isInteger() && Val.getValueType().isInteger()) { + // If a result value was tied to an input value, the computed result may + // have a wider width than the expected result. Extract the relevant + // portion. + Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val); + } + + assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); + } + + setValue(CS.getInstruction(), Val); + // Don't need to use this as a chain in this case. + if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) + return; + } + + std::vector<std::pair<SDValue, Value*> > StoresToEmit; + + // Process indirect outputs, first output all of the flagged copies out of + // physregs. + for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { + RegsForValue &OutRegs = IndirectStoresToEmit[i].first; + Value *Ptr = IndirectStoresToEmit[i].second; + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + Chain, &Flag); + StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); + + } + + // Emit the non-flagged stores from the physregs. + SmallVector<SDValue, 8> OutChains; + for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) + OutChains.push_back(DAG.getStore(Chain, getCurDebugLoc(), + StoresToEmit[i].first, + getValue(StoresToEmit[i].second), + StoresToEmit[i].second, 0)); + if (!OutChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &OutChains[0], OutChains.size()); + DAG.setRoot(Chain); +} + + +void SelectionDAGLowering::visitMalloc(MallocInst &I) { + SDValue Src = getValue(I.getOperand(0)); + + // Scale up by the type size in the original i32 type width. Various + // mid-level optimizers may make assumptions about demanded bits etc from the + // i32-ness of the optimizer: we do not want to promote to i64 and then + // multiply on 64-bit targets. + // FIXME: Malloc inst should go away: PR715. + uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType()); + if (ElementSize != 1) + Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(), + Src, DAG.getConstant(ElementSize, Src.getValueType())); + + MVT IntPtr = TLI.getPointerTy(); + + if (IntPtr.bitsLT(Src.getValueType())) + Src = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), IntPtr, Src); + else if (IntPtr.bitsGT(Src.getValueType())) + Src = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), IntPtr, Src); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Src; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Args.push_back(Entry); + + std::pair<SDValue,SDValue> Result = + TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false, + CallingConv::C, PerformTailCallOpt, + DAG.getExternalSymbol("malloc", IntPtr), + Args, DAG, getCurDebugLoc()); + setValue(&I, Result.first); // Pointers always fit in registers + DAG.setRoot(Result.second); +} + +void SelectionDAGLowering::visitFree(FreeInst &I) { + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = getValue(I.getOperand(0)); + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Args.push_back(Entry); + MVT IntPtr = TLI.getPointerTy(); + std::pair<SDValue,SDValue> Result = + TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, false, false, + CallingConv::C, PerformTailCallOpt, + DAG.getExternalSymbol("free", IntPtr), Args, DAG, + getCurDebugLoc()); + DAG.setRoot(Result.second); +} + +void SelectionDAGLowering::visitVAStart(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(1)))); +} + +void SelectionDAGLowering::visitVAArg(VAArgInst &I) { + SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), + getRoot(), getValue(I.getOperand(0)), + DAG.getSrcValue(I.getOperand(0))); + setValue(&I, V); + DAG.setRoot(V.getValue(1)); +} + +void SelectionDAGLowering::visitVAEnd(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(1)))); +} + +void SelectionDAGLowering::visitVACopy(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + DAG.getSrcValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(2)))); +} + +/// TargetLowering::LowerArguments - This is the default LowerArguments +/// implementation, which just inserts a FORMAL_ARGUMENTS node. FIXME: When all +/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be +/// integrated into SDISel. +void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &ArgValues, + DebugLoc dl) { + // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node. + SmallVector<SDValue, 3+16> Ops; + Ops.push_back(DAG.getRoot()); + Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy())); + Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy())); + + // Add one result value for each formal argument. + SmallVector<MVT, 16> RetVals; + unsigned j = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I, ++j) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(*this, I->getType(), ValueVTs); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + MVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForMVT(); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + getTargetData()->getABITypeAlignment(ArgTy); + + if (F.paramHasAttr(j, Attribute::ZExt)) + Flags.setZExt(); + if (F.paramHasAttr(j, Attribute::SExt)) + Flags.setSExt(); + if (F.paramHasAttr(j, Attribute::InReg)) + Flags.setInReg(); + if (F.paramHasAttr(j, Attribute::StructRet)) + Flags.setSRet(); + if (F.paramHasAttr(j, Attribute::ByVal)) { + Flags.setByVal(); + const PointerType *Ty = cast<PointerType>(I->getType()); + const Type *ElementTy = Ty->getElementType(); + unsigned FrameAlign = getByValTypeAlignment(ElementTy); + unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); + // For ByVal, alignment should be passed from FE. BE will guess if + // this info is not there but there are cases it cannot get right. + if (F.getParamAlignment(j)) + FrameAlign = F.getParamAlignment(j); + Flags.setByValAlign(FrameAlign); + Flags.setByValSize(FrameSize); + } + if (F.paramHasAttr(j, Attribute::Nest)) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + MVT RegisterVT = getRegisterType(VT); + unsigned NumRegs = getNumRegisters(VT); + for (unsigned i = 0; i != NumRegs; ++i) { + RetVals.push_back(RegisterVT); + ISD::ArgFlagsTy MyFlags = Flags; + if (NumRegs > 1 && i == 0) + MyFlags.setSplit(); + // if it isn't first piece, alignment must be 1 + else if (i > 0) + MyFlags.setOrigAlign(1); + Ops.push_back(DAG.getArgFlags(MyFlags)); + } + } + } + + RetVals.push_back(MVT::Other); + + // Create the node. + SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS, dl, + DAG.getVTList(&RetVals[0], RetVals.size()), + &Ops[0], Ops.size()).getNode(); + + // Prelower FORMAL_ARGUMENTS. This isn't required for functionality, but + // allows exposing the loads that may be part of the argument access to the + // first DAGCombiner pass. + SDValue TmpRes = LowerOperation(SDValue(Result, 0), DAG); + + // The number of results should match up, except that the lowered one may have + // an extra flag result. + assert((Result->getNumValues() == TmpRes.getNode()->getNumValues() || + (Result->getNumValues()+1 == TmpRes.getNode()->getNumValues() && + TmpRes.getValue(Result->getNumValues()).getValueType() == MVT::Flag)) + && "Lowering produced unexpected number of results!"); + + // The FORMAL_ARGUMENTS node itself is likely no longer needed. + if (Result != TmpRes.getNode() && Result->use_empty()) { + HandleSDNode Dummy(DAG.getRoot()); + DAG.RemoveDeadNode(Result); + } + + Result = TmpRes.getNode(); + + unsigned NumArgRegs = Result->getNumValues() - 1; + DAG.setRoot(SDValue(Result, NumArgRegs)); + + // Set up the return result vector. + unsigned i = 0; + unsigned Idx = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + ++I, ++Idx) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(*this, I->getType(), ValueVTs); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + MVT VT = ValueVTs[Value]; + MVT PartVT = getRegisterType(VT); + + unsigned NumParts = getNumRegisters(VT); + SmallVector<SDValue, 4> Parts(NumParts); + for (unsigned j = 0; j != NumParts; ++j) + Parts[j] = SDValue(Result, i++); + + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (F.paramHasAttr(Idx, Attribute::SExt)) + AssertOp = ISD::AssertSext; + else if (F.paramHasAttr(Idx, Attribute::ZExt)) + AssertOp = ISD::AssertZext; + + ArgValues.push_back(getCopyFromParts(DAG, dl, &Parts[0], NumParts, + PartVT, VT, AssertOp)); + } + } + assert(i == NumArgRegs && "Argument register count mismatch!"); +} + + +/// TargetLowering::LowerCallTo - This is the default LowerCallTo +/// implementation, which just inserts an ISD::CALL node, which is later custom +/// lowered by the target to something concrete. FIXME: When all targets are +/// migrated to using ISD::CALL, this hook should be integrated into SDISel. +std::pair<SDValue, SDValue> +TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, + bool RetSExt, bool RetZExt, bool isVarArg, + bool isInreg, + unsigned CallingConv, bool isTailCall, + SDValue Callee, + ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { + assert((!isTailCall || PerformTailCallOpt) && + "isTailCall set when tail-call optimizations are disabled!"); + + SmallVector<SDValue, 32> Ops; + Ops.push_back(Chain); // Op#0 - Chain + Ops.push_back(Callee); + + // Handle all of the outgoing arguments. + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(*this, Args[i].Ty, ValueVTs); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + MVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForMVT(); + SDValue Op = SDValue(Args[i].Node.getNode(), + Args[i].Node.getResNo() + Value); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + getTargetData()->getABITypeAlignment(ArgTy); + + if (Args[i].isZExt) + Flags.setZExt(); + if (Args[i].isSExt) + Flags.setSExt(); + if (Args[i].isInReg) + Flags.setInReg(); + if (Args[i].isSRet) + Flags.setSRet(); + if (Args[i].isByVal) { + Flags.setByVal(); + const PointerType *Ty = cast<PointerType>(Args[i].Ty); + const Type *ElementTy = Ty->getElementType(); + unsigned FrameAlign = getByValTypeAlignment(ElementTy); + unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); + // For ByVal, alignment should come from FE. BE will guess if this + // info is not there but there are cases it cannot get right. + if (Args[i].Alignment) + FrameAlign = Args[i].Alignment; + Flags.setByValAlign(FrameAlign); + Flags.setByValSize(FrameSize); + } + if (Args[i].isNest) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + MVT PartVT = getRegisterType(VT); + unsigned NumParts = getNumRegisters(VT); + SmallVector<SDValue, 4> Parts(NumParts); + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (Args[i].isSExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (Args[i].isZExt) + ExtendKind = ISD::ZERO_EXTEND; + + getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind); + + for (unsigned i = 0; i != NumParts; ++i) { + // if it isn't first piece, alignment must be 1 + ISD::ArgFlagsTy MyFlags = Flags; + if (NumParts > 1 && i == 0) + MyFlags.setSplit(); + else if (i != 0) + MyFlags.setOrigAlign(1); + + Ops.push_back(Parts[i]); + Ops.push_back(DAG.getArgFlags(MyFlags)); + } + } + } + + // Figure out the result value types. We start by making a list of + // the potentially illegal return value types. + SmallVector<MVT, 4> LoweredRetTys; + SmallVector<MVT, 4> RetTys; + ComputeValueVTs(*this, RetTy, RetTys); + + // Then we translate that to a list of legal types. + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + MVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(VT); + unsigned NumRegs = getNumRegisters(VT); + for (unsigned i = 0; i != NumRegs; ++i) + LoweredRetTys.push_back(RegisterVT); + } + + LoweredRetTys.push_back(MVT::Other); // Always has a chain. + + // Create the CALL node. + SDValue Res = DAG.getCall(CallingConv, dl, + isVarArg, isTailCall, isInreg, + DAG.getVTList(&LoweredRetTys[0], + LoweredRetTys.size()), + &Ops[0], Ops.size() + ); + Chain = Res.getValue(LoweredRetTys.size() - 1); + + // Gather up the call result into a single value. + if (RetTy != Type::VoidTy && !RetTys.empty()) { + ISD::NodeType AssertOp = ISD::DELETED_NODE; + + if (RetSExt) + AssertOp = ISD::AssertSext; + else if (RetZExt) + AssertOp = ISD::AssertZext; + + SmallVector<SDValue, 4> ReturnValues; + unsigned RegNo = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + MVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(VT); + unsigned NumRegs = getNumRegisters(VT); + unsigned RegNoEnd = NumRegs + RegNo; + SmallVector<SDValue, 4> Results; + for (; RegNo != RegNoEnd; ++RegNo) + Results.push_back(Res.getValue(RegNo)); + SDValue ReturnValue = + getCopyFromParts(DAG, dl, &Results[0], NumRegs, RegisterVT, VT, + AssertOp); + ReturnValues.push_back(ReturnValue); + } + Res = DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&RetTys[0], RetTys.size()), + &ReturnValues[0], ReturnValues.size()); + } + + return std::make_pair(Res, Chain); +} + +void TargetLowering::LowerOperationWrapper(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) { + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + if (Res.getNode()) + Results.push_back(Res); +} + +SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + assert(0 && "LowerOperation not implemented for this target!"); + abort(); + return SDValue(); +} + + +void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) { + SDValue Op = getValue(V); + assert((Op.getOpcode() != ISD::CopyFromReg || + cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && + "Copy from a reg to the same reg!"); + assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); + + RegsForValue RFV(TLI, Reg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); + PendingExports.push_back(Chain); +} + +#include "llvm/CodeGen/SelectionDAGISel.h" + +void SelectionDAGISel:: +LowerArguments(BasicBlock *LLVMBB) { + // If this is the entry block, emit arguments. + Function &F = *LLVMBB->getParent(); + SDValue OldRoot = SDL->DAG.getRoot(); + SmallVector<SDValue, 16> Args; + TLI.LowerArguments(F, SDL->DAG, Args, SDL->getCurDebugLoc()); + + unsigned a = 0; + for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); + AI != E; ++AI) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, AI->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (!AI->use_empty()) { + SDL->setValue(AI, SDL->DAG.getMergeValues(&Args[a], NumValues, + SDL->getCurDebugLoc())); + // If this argument is live outside of the entry block, insert a copy from + // whereever we got it to the vreg that other BB's will reference it as. + SDL->CopyToExportRegsIfNeeded(AI); + } + a += NumValues; + } + + // Finally, if the target has anything special to do, allow it to do so. + // FIXME: this should insert code into the DAG! + EmitFunctionEntryCode(F, SDL->DAG.getMachineFunction()); +} + +/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to +/// ensure constants are generated when needed. Remember the virtual registers +/// that need to be added to the Machine PHI nodes as input. We cannot just +/// directly add them, because expansion might result in multiple MBB's for one +/// BB. As such, the start of the BB might correspond to a different MBB than +/// the end. +/// +void +SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { + TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + PHINode *PN; + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::iterator I = SuccBB->begin(); + (PN = dyn_cast<PHINode>(I)); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + unsigned Reg; + Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + if (Constant *C = dyn_cast<Constant>(PHIOp)) { + unsigned &RegOut = SDL->ConstantsOut[C]; + if (RegOut == 0) { + RegOut = FuncInfo->CreateRegForValue(C); + SDL->CopyValueToVirtualRegister(C, RegOut); + } + Reg = RegOut; + } else { + Reg = FuncInfo->ValueMap[PHIOp]; + if (Reg == 0) { + assert(isa<AllocaInst>(PHIOp) && + FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && + "Didn't codegen value into a register!??"); + Reg = FuncInfo->CreateRegForValue(PHIOp); + SDL->CopyValueToVirtualRegister(PHIOp, Reg); + } + } + + // Remember that this register needs to added to the machine PHI node as + // the input for this MBB. + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + MVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(VT); + for (unsigned i = 0, e = NumRegisters; i != e; ++i) + SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + Reg += NumRegisters; + } + } + } + SDL->ConstantsOut.clear(); +} + +/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only +/// supports legal types, and it emits MachineInstrs directly instead of +/// creating SelectionDAG nodes. +/// +bool +SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, + FastISel *F) { + TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + unsigned OrigNumPHINodesToUpdate = SDL->PHINodesToUpdate.size(); + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + PHINode *PN; + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::iterator I = SuccBB->begin(); + (PN = dyn_cast<PHINode>(I)); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + // Only handle legal types. Two interesting things to note here. First, + // by bailing out early, we may leave behind some dead instructions, + // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its + // own moves. Second, this check is necessary becuase FastISel doesn't + // use CreateRegForValue to create registers, so it always creates + // exactly one register for each non-void instruction. + MVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); + if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { + // Promote MVT::i1. + if (VT == MVT::i1) + VT = TLI.getTypeToTransformTo(VT); + else { + SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + } + + Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + unsigned Reg = F->getRegForValue(PHIOp); + if (Reg == 0) { + SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + } + } + + return true; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h new file mode 100644 index 0000000..578aa591 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -0,0 +1,558 @@ +//===-- SelectionDAGBuild.h - Selection-DAG building ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef SELECTIONDAGBUILD_H +#define SELECTIONDAGBUILD_H + +#include "llvm/Constants.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#ifndef NDEBUG +#include "llvm/ADT/SmallSet.h" +#endif +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Target/TargetMachine.h" +#include <vector> +#include <set> + +namespace llvm { + +class AliasAnalysis; +class AllocaInst; +class BasicBlock; +class BitCastInst; +class BranchInst; +class CallInst; +class ExtractElementInst; +class ExtractValueInst; +class FCmpInst; +class FPExtInst; +class FPToSIInst; +class FPToUIInst; +class FPTruncInst; +class FreeInst; +class Function; +class GetElementPtrInst; +class GCFunctionInfo; +class ICmpInst; +class IntToPtrInst; +class InvokeInst; +class InsertElementInst; +class InsertValueInst; +class Instruction; +class LoadInst; +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class MachineModuleInfo; +class MachineRegisterInfo; +class MallocInst; +class PHINode; +class PtrToIntInst; +class ReturnInst; +class SDISelAsmOperandInfo; +class SExtInst; +class SelectInst; +class ShuffleVectorInst; +class SIToFPInst; +class StoreInst; +class SwitchInst; +class TargetData; +class TargetLowering; +class TruncInst; +class UIToFPInst; +class UnreachableInst; +class UnwindInst; +class VICmpInst; +class VFCmpInst; +class VAArgInst; +class ZExtInst; + +//===--------------------------------------------------------------------===// +/// FunctionLoweringInfo - This contains information that is global to a +/// function that is used when lowering a region of the function. +/// +class FunctionLoweringInfo { +public: + TargetLowering &TLI; + Function *Fn; + MachineFunction *MF; + MachineRegisterInfo *RegInfo; + + explicit FunctionLoweringInfo(TargetLowering &TLI); + + /// set - Initialize this FunctionLoweringInfo with the given Function + /// and its associated MachineFunction. + /// + void set(Function &Fn, MachineFunction &MF, SelectionDAG &DAG, + bool EnableFastISel); + + /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. + DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap; + + /// ValueMap - Since we emit code for the function a basic block at a time, + /// we must remember which virtual registers hold the values for + /// cross-basic-block values. + DenseMap<const Value*, unsigned> ValueMap; + + /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in + /// the entry block. This allows the allocas to be efficiently referenced + /// anywhere in the function. + DenseMap<const AllocaInst*, int> StaticAllocaMap; + +#ifndef NDEBUG + SmallSet<Instruction*, 8> CatchInfoLost; + SmallSet<Instruction*, 8> CatchInfoFound; +#endif + + unsigned MakeReg(MVT VT); + + /// isExportedInst - Return true if the specified value is an instruction + /// exported from its block. + bool isExportedInst(const Value *V) { + return ValueMap.count(V); + } + + unsigned CreateRegForValue(const Value *V); + + unsigned InitializeRegForValue(const Value *V) { + unsigned &R = ValueMap[V]; + assert(R == 0 && "Already initialized this value register!"); + return R = CreateRegForValue(V); + } + + struct LiveOutInfo { + unsigned NumSignBits; + APInt KnownOne, KnownZero; + LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {} + }; + + /// LiveOutRegInfo - Information about live out vregs, indexed by their + /// register number offset by 'FirstVirtualRegister'. + std::vector<LiveOutInfo> LiveOutRegInfo; + + /// clear - Clear out all the function-specific state. This returns this + /// FunctionLoweringInfo to an empty state, ready to be used for a + /// different function. + void clear() { + MBBMap.clear(); + ValueMap.clear(); + StaticAllocaMap.clear(); +#ifndef NDEBUG + CatchInfoLost.clear(); + CatchInfoFound.clear(); +#endif + LiveOutRegInfo.clear(); + } +}; + +//===----------------------------------------------------------------------===// +/// SelectionDAGLowering - This is the common target-independent lowering +/// implementation that is parameterized by a TargetLowering object. +/// Also, targets can overload any lowering method. +/// +class SelectionDAGLowering { + MachineBasicBlock *CurMBB; + + /// CurDebugLoc - current file + line number. Changes as we build the DAG. + DebugLoc CurDebugLoc; + + DenseMap<const Value*, SDValue> NodeMap; + + /// PendingLoads - Loads are not emitted to the program immediately. We bunch + /// them up and then emit token factor nodes when possible. This allows us to + /// get simple disambiguation between loads without worrying about alias + /// analysis. + SmallVector<SDValue, 8> PendingLoads; + + /// PendingExports - CopyToReg nodes that copy values to virtual registers + /// for export to other blocks need to be emitted before any terminator + /// instruction, but they have no other ordering requirements. We bunch them + /// up and the emit a single tokenfactor for them just before terminator + /// instructions. + SmallVector<SDValue, 8> PendingExports; + + /// Case - A struct to record the Value for a switch case, and the + /// case's target basic block. + struct Case { + Constant* Low; + Constant* High; + MachineBasicBlock* BB; + + Case() : Low(0), High(0), BB(0) { } + Case(Constant* low, Constant* high, MachineBasicBlock* bb) : + Low(low), High(high), BB(bb) { } + uint64_t size() const { + uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue(); + uint64_t rLow = cast<ConstantInt>(Low)->getSExtValue(); + return (rHigh - rLow + 1ULL); + } + }; + + struct CaseBits { + uint64_t Mask; + MachineBasicBlock* BB; + unsigned Bits; + + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits): + Mask(mask), BB(bb), Bits(bits) { } + }; + + typedef std::vector<Case> CaseVector; + typedef std::vector<CaseBits> CaseBitsVector; + typedef CaseVector::iterator CaseItr; + typedef std::pair<CaseItr, CaseItr> CaseRange; + + /// CaseRec - A struct with ctor used in lowering switches to a binary tree + /// of conditional branches. + struct CaseRec { + CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) : + CaseBB(bb), LT(lt), GE(ge), Range(r) {} + + /// CaseBB - The MBB in which to emit the compare and branch + MachineBasicBlock *CaseBB; + /// LT, GE - If nonzero, we know the current case value must be less-than or + /// greater-than-or-equal-to these Constants. + Constant *LT; + Constant *GE; + /// Range - A pair of iterators representing the range of case values to be + /// processed at this point in the binary search tree. + CaseRange Range; + }; + + typedef std::vector<CaseRec> CaseRecVector; + + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator () (const Case& C1, const Case& C2) { + assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; + + struct CaseBitsCmp { + bool operator () (const CaseBits& C1, const CaseBits& C2) { + return C1.Bits > C2.Bits; + } + }; + + size_t Clusterify(CaseVector& Cases, const SwitchInst &SI); + + /// CaseBlock - This structure is used to communicate between SDLowering and + /// SDISel for the code generation of additional basic blocks needed by multi- + /// case switch statements. + struct CaseBlock { + CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle, + MachineBasicBlock *truebb, MachineBasicBlock *falsebb, + MachineBasicBlock *me) + : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), + TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {} + // CC - the condition code to use for the case block's setcc node + ISD::CondCode CC; + // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit. + // Emit by default LHS op RHS. MHS is used for range comparisons: + // If MHS is not null: (LHS <= MHS) and (MHS <= RHS). + Value *CmpLHS, *CmpMHS, *CmpRHS; + // TrueBB/FalseBB - the block to branch to if the setcc is true/false. + MachineBasicBlock *TrueBB, *FalseBB; + // ThisBB - the block into which to emit the code for the setcc and branches + MachineBasicBlock *ThisBB; + }; + struct JumpTable { + JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, + MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} + + /// Reg - the virtual register containing the index of the jump table entry + //. to jump to. + unsigned Reg; + /// JTI - the JumpTableIndex for this jump table in the function. + unsigned JTI; + /// MBB - the MBB into which to emit the code for the indirect jump. + MachineBasicBlock *MBB; + /// Default - the MBB of the default bb, which is a successor of the range + /// check MBB. This is when updating PHI nodes in successors. + MachineBasicBlock *Default; + }; + struct JumpTableHeader { + JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H, + bool E = false): + First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} + APInt First; + APInt Last; + Value *SValue; + MachineBasicBlock *HeaderBB; + bool Emitted; + }; + typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock; + + struct BitTestCase { + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr): + Mask(M), ThisBB(T), TargetBB(Tr) { } + uint64_t Mask; + MachineBasicBlock* ThisBB; + MachineBasicBlock* TargetBB; + }; + + typedef SmallVector<BitTestCase, 3> BitTestInfo; + + struct BitTestBlock { + BitTestBlock(APInt F, APInt R, Value* SV, + unsigned Rg, bool E, + MachineBasicBlock* P, MachineBasicBlock* D, + const BitTestInfo& C): + First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E), + Parent(P), Default(D), Cases(C) { } + APInt First; + APInt Range; + Value *SValue; + unsigned Reg; + bool Emitted; + MachineBasicBlock *Parent; + MachineBasicBlock *Default; + BitTestInfo Cases; + }; + +public: + // TLI - This is information that describes the available target features we + // need for lowering. This indicates when operations are unavailable, + // implemented with a libcall, etc. + TargetLowering &TLI; + SelectionDAG &DAG; + const TargetData *TD; + AliasAnalysis *AA; + + /// SwitchCases - Vector of CaseBlock structures used to communicate + /// SwitchInst code generation information. + std::vector<CaseBlock> SwitchCases; + /// JTCases - Vector of JumpTable structures used to communicate + /// SwitchInst code generation information. + std::vector<JumpTableBlock> JTCases; + /// BitTestCases - Vector of BitTestBlock structures used to communicate + /// SwitchInst code generation information. + std::vector<BitTestBlock> BitTestCases; + + std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate; + + // Emit PHI-node-operand constants only once even if used by multiple + // PHI nodes. + DenseMap<Constant*, unsigned> ConstantsOut; + + /// FuncInfo - Information about the function as a whole. + /// + FunctionLoweringInfo &FuncInfo; + + /// OptLevel - What optimization level we're generating code for. + /// + CodeGenOpt::Level OptLevel; + + /// GFI - Garbage collection metadata for the function. + GCFunctionInfo *GFI; + + SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli, + FunctionLoweringInfo &funcinfo, + CodeGenOpt::Level ol) + : CurDebugLoc(DebugLoc::getUnknownLoc()), + TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol) { + } + + void init(GCFunctionInfo *gfi, AliasAnalysis &aa); + + /// clear - Clear out the curret SelectionDAG and the associated + /// state and prepare this SelectionDAGLowering object to be used + /// for a new block. This doesn't clear out information about + /// additional blocks that are needed to complete switch lowering + /// or PHI node updating; that information is cleared out as it is + /// consumed. + void clear(); + + /// getRoot - Return the current virtual root of the Selection DAG, + /// flushing any PendingLoad items. This must be done before emitting + /// a store or any other node that may need to be ordered after any + /// prior load instructions. + /// + SDValue getRoot(); + + /// getControlRoot - Similar to getRoot, but instead of flushing all the + /// PendingLoad items, flush all the PendingExports items. It is necessary + /// to do this before emitting a terminator instruction. + /// + SDValue getControlRoot(); + + DebugLoc getCurDebugLoc() const { return CurDebugLoc; } + void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; } + + void CopyValueToVirtualRegister(Value *V, unsigned Reg); + + void visit(Instruction &I); + + void visit(unsigned Opcode, User &I); + + void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; } + + SDValue getValue(const Value *V); + + void setValue(const Value *V, SDValue NewN) { + SDValue &N = NodeMap[V]; + assert(N.getNode() == 0 && "Already set a value for this node!"); + N = NewN; + } + + void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs); + + void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + unsigned Opc); + void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB); + bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); + bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB); + void CopyToExportRegsIfNeeded(Value *V); + void ExportFromCurrentBlock(Value *V); + void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall, + MachineBasicBlock *LandingPad = NULL); + +private: + // Terminator instructions. + void visitRet(ReturnInst &I); + void visitBr(BranchInst &I); + void visitSwitch(SwitchInst &I); + void visitUnreachable(UnreachableInst &I) { /* noop */ } + + // Helpers for visitSwitch + bool handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); +public: + void visitSwitchCase(CaseBlock &CB); + void visitBitTestHeader(BitTestBlock &B); + void visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + BitTestCase &B); + void visitJumpTable(JumpTable &JT); + void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH); + +private: + // These all get lowered before this pass. + void visitInvoke(InvokeInst &I); + void visitUnwind(UnwindInst &I); + + void visitBinary(User &I, unsigned OpCode); + void visitShift(User &I, unsigned Opcode); + void visitAdd(User &I); + void visitSub(User &I); + void visitMul(User &I); + void visitURem(User &I) { visitBinary(I, ISD::UREM); } + void visitSRem(User &I) { visitBinary(I, ISD::SREM); } + void visitFRem(User &I) { visitBinary(I, ISD::FREM); } + void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); } + void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); } + void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); } + void visitAnd (User &I) { visitBinary(I, ISD::AND); } + void visitOr (User &I) { visitBinary(I, ISD::OR); } + void visitXor (User &I) { visitBinary(I, ISD::XOR); } + void visitShl (User &I) { visitShift(I, ISD::SHL); } + void visitLShr(User &I) { visitShift(I, ISD::SRL); } + void visitAShr(User &I) { visitShift(I, ISD::SRA); } + void visitICmp(User &I); + void visitFCmp(User &I); + void visitVICmp(User &I); + void visitVFCmp(User &I); + // Visit the conversion instructions + void visitTrunc(User &I); + void visitZExt(User &I); + void visitSExt(User &I); + void visitFPTrunc(User &I); + void visitFPExt(User &I); + void visitFPToUI(User &I); + void visitFPToSI(User &I); + void visitUIToFP(User &I); + void visitSIToFP(User &I); + void visitPtrToInt(User &I); + void visitIntToPtr(User &I); + void visitBitCast(User &I); + + void visitExtractElement(User &I); + void visitInsertElement(User &I); + void visitShuffleVector(User &I); + + void visitExtractValue(ExtractValueInst &I); + void visitInsertValue(InsertValueInst &I); + + void visitGetElementPtr(User &I); + void visitSelect(User &I); + + void visitMalloc(MallocInst &I); + void visitFree(FreeInst &I); + void visitAlloca(AllocaInst &I); + void visitLoad(LoadInst &I); + void visitStore(StoreInst &I); + void visitPHI(PHINode &I) { } // PHI nodes are handled specially. + void visitCall(CallInst &I); + void visitInlineAsm(CallSite CS); + const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic); + void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic); + + void visitPow(CallInst &I); + void visitExp2(CallInst &I); + void visitExp(CallInst &I); + void visitLog(CallInst &I); + void visitLog2(CallInst &I); + void visitLog10(CallInst &I); + + void visitVAStart(CallInst &I); + void visitVAArg(VAArgInst &I); + void visitVAEnd(CallInst &I); + void visitVACopy(CallInst &I); + + void visitUserOp1(Instruction &I) { + assert(0 && "UserOp1 should not exist at instruction selection time!"); + abort(); + } + void visitUserOp2(Instruction &I) { + assert(0 && "UserOp2 should not exist at instruction selection time!"); + abort(); + } + + const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op); + const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op); +}; + +/// AddCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, + MachineBasicBlock *MBB); + +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp new file mode 100644 index 0000000..9d72a12 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -0,0 +1,1347 @@ +//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAGISel class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "ScheduleDAGSDNodes.h" +#include "SelectionDAGBuild.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Constants.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Timer.h" +#include <algorithm> +using namespace llvm; + +static cl::opt<bool> +DisableLegalizeTypes("disable-legalize-types", cl::Hidden); +static cl::opt<bool> +EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, + cl::desc("Enable verbose messages in the \"fast\" " + "instruction selector")); +static cl::opt<bool> +EnableFastISelAbort("fast-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction fails")); +static cl::opt<bool> +SchedLiveInCopies("schedule-livein-copies", + cl::desc("Schedule copies of livein registers"), + cl::init(false)); + +#ifndef NDEBUG +static cl::opt<bool> +ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the first " + "dag combine pass")); +static cl::opt<bool> +ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize types")); +static cl::opt<bool> +ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize")); +static cl::opt<bool> +ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the second " + "dag combine pass")); +static cl::opt<bool> +ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the post legalize types" + " dag combine pass")); +static cl::opt<bool> +ViewISelDAGs("view-isel-dags", cl::Hidden, + cl::desc("Pop up a window to show isel dags as they are selected")); +static cl::opt<bool> +ViewSchedDAGs("view-sched-dags", cl::Hidden, + cl::desc("Pop up a window to show sched dags as they are processed")); +static cl::opt<bool> +ViewSUnitDAGs("view-sunit-dags", cl::Hidden, + cl::desc("Pop up a window to show SUnit dags after they are processed")); +#else +static const bool ViewDAGCombine1 = false, + ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, + ViewDAGCombine2 = false, + ViewDAGCombineLT = false, + ViewISelDAGs = false, ViewSchedDAGs = false, + ViewSUnitDAGs = false; +#endif + +//===---------------------------------------------------------------------===// +/// +/// RegisterScheduler class - Track the registration of instruction schedulers. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry RegisterScheduler::Registry; + +//===---------------------------------------------------------------------===// +/// +/// ISHeuristic command line option for instruction schedulers. +/// +//===---------------------------------------------------------------------===// +static cl::opt<RegisterScheduler::FunctionPassCtor, false, + RegisterPassParser<RegisterScheduler> > +ISHeuristic("pre-RA-sched", + cl::init(&createDefaultScheduler), + cl::desc("Instruction schedulers available (before register" + " allocation):")); + +static RegisterScheduler +defaultListDAGScheduler("default", "Best scheduler for the target", + createDefaultScheduler); + +namespace llvm { + //===--------------------------------------------------------------------===// + /// createDefaultScheduler - This creates an instruction scheduler appropriate + /// for the target. + ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetLowering &TLI = IS->getTargetLowering(); + + if (OptLevel == CodeGenOpt::None) + return createFastDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency) + return createTDListDAGScheduler(IS, OptLevel); + assert(TLI.getSchedulingPreference() == + TargetLowering::SchedulingForRegPressure && "Unknown sched type!"); + return createBURRListDAGScheduler(IS, OptLevel); + } +} + +// EmitInstrWithCustomInserter - This method should be implemented by targets +// that mark instructions with the 'usesCustomDAGSchedInserter' flag. These +// instructions are special in various ways, which require special support to +// insert. The specified MachineInstr is created but not inserted into any +// basic blocks, and the scheduler passes ownership of it to this method. +MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { + cerr << "If a target marks an instruction with " + << "'usesCustomDAGSchedInserter', it must implement " + << "TargetLowering::EmitInstrWithCustomInserter!\n"; + abort(); + return 0; +} + +/// EmitLiveInCopy - Emit a copy for a live in physical register. If the +/// physical register has only a single copy use, then coalesced the copy +/// if possible. +static void EmitLiveInCopy(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &InsertPos, + unsigned VirtReg, unsigned PhysReg, + const TargetRegisterClass *RC, + DenseMap<MachineInstr*, unsigned> &CopyRegMap, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + unsigned NumUses = 0; + MachineInstr *UseMI = NULL; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg), + UE = MRI.use_end(); UI != UE; ++UI) { + UseMI = &*UI; + if (++NumUses > 1) + break; + } + + // If the number of uses is not one, or the use is not a move instruction, + // don't coalesce. Also, only coalesce away a virtual register to virtual + // register copy. + bool Coalesced = false; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (NumUses == 1 && + TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + TargetRegisterInfo::isVirtualRegister(DstReg)) { + VirtReg = DstReg; + Coalesced = true; + } + + // Now find an ideal location to insert the copy. + MachineBasicBlock::iterator Pos = InsertPos; + while (Pos != MBB->begin()) { + MachineInstr *PrevMI = prior(Pos); + DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI); + // copyRegToReg might emit multiple instructions to do a copy. + unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second; + if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg)) + // This is what the BB looks like right now: + // r1024 = mov r0 + // ... + // r1 = mov r1024 + // + // We want to insert "r1025 = mov r1". Inserting this copy below the + // move to r1024 makes it impossible for that move to be coalesced. + // + // r1025 = mov r1 + // r1024 = mov r0 + // ... + // r1 = mov 1024 + // r2 = mov 1025 + break; // Woot! Found a good location. + --Pos; + } + + TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); + CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); + if (Coalesced) { + if (&*InsertPos == UseMI) ++InsertPos; + MBB->erase(UseMI); + } +} + +/// EmitLiveInCopies - If this is the first basic block in the function, +/// and if it has live ins that need to be copied into vregs, emit the +/// copies into the block. +static void EmitLiveInCopies(MachineBasicBlock *EntryMBB, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + if (SchedLiveInCopies) { + // Emit the copies at a heuristically-determined location in the block. + DenseMap<MachineInstr*, unsigned> CopyRegMap; + MachineBasicBlock::iterator InsertPos = EntryMBB->begin(); + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + E = MRI.livein_end(); LI != E; ++LI) + if (LI->second) { + const TargetRegisterClass *RC = MRI.getRegClass(LI->second); + EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first, + RC, CopyRegMap, MRI, TRI, TII); + } + } else { + // Emit the copies into the top of the block. + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + E = MRI.livein_end(); LI != E; ++LI) + if (LI->second) { + const TargetRegisterClass *RC = MRI.getRegClass(LI->second); + TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), + LI->second, LI->first, RC, RC); + } + } +} + +//===----------------------------------------------------------------------===// +// SelectionDAGISel code +//===----------------------------------------------------------------------===// + +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : + FunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), + FuncInfo(new FunctionLoweringInfo(TLI)), + CurDAG(new SelectionDAG(TLI, *FuncInfo)), + SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)), + GFI(), + OptLevel(OL), + DAGSize(0) +{} + +SelectionDAGISel::~SelectionDAGISel() { + delete SDL; + delete CurDAG; + delete FuncInfo; +} + +unsigned SelectionDAGISel::MakeReg(MVT VT) { + return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); +} + +void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addRequired<GCModuleInfo>(); + AU.addRequired<DwarfWriter>(); + AU.setPreservesAll(); +} + +bool SelectionDAGISel::runOnFunction(Function &Fn) { + // Do some sanity-checking on the command-line options. + assert((!EnableFastISelVerbose || EnableFastISel) && + "-fast-isel-verbose requires -fast-isel"); + assert((!EnableFastISelAbort || EnableFastISel) && + "-fast-isel-abort requires -fast-isel"); + + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (Fn.hasAvailableExternallyLinkage()) + return false; + + + // Get alias analysis for load/store combining. + AA = &getAnalysis<AliasAnalysis>(); + + TargetMachine &TM = TLI.getTargetMachine(); + MF = &MachineFunction::construct(&Fn, TM); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + + if (MF->getFunction()->hasGC()) + GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF->getFunction()); + else + GFI = 0; + RegInfo = &MF->getRegInfo(); + DOUT << "\n\n\n=== " << Fn.getName() << "\n"; + + MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>(); + CurDAG->init(*MF, MMI, DW); + FuncInfo->set(Fn, *MF, *CurDAG, EnableFastISel); + SDL->init(GFI, *AA); + + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator())) + // Mark landing pad. + FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); + + SelectAllBasicBlocks(Fn, *MF, MMI, DW, TII); + + // If the first basic block in the function has live ins that need to be + // copied into vregs, emit the copies into the top of the block before + // emitting the code for the block. + EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII); + + // Add function live-ins to entry block live-in set. + for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(), + E = RegInfo->livein_end(); I != E; ++I) + MF->begin()->addLiveIn(I->first); + +#ifndef NDEBUG + assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() && + "Not all catch info was assigned to a landing pad!"); +#endif + + FuncInfo->clear(); + + return true; +} + +static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, + MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { + for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I) + if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { + // Apply the catch info to DestBB. + AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]); +#ifndef NDEBUG + if (!FLI.MBBMap[SrcBB]->isLandingPad()) + FLI.CatchInfoFound.insert(EHSel); +#endif + } +} + +/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and +/// whether object offset >= 0. +static bool +IsFixedFrameObjectWithPosOffset(MachineFrameInfo *MFI, SDValue Op) { + if (!isa<FrameIndexSDNode>(Op)) return false; + + FrameIndexSDNode * FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op); + int FrameIdx = FrameIdxNode->getIndex(); + return MFI->isFixedObjectIndex(FrameIdx) && + MFI->getObjectOffset(FrameIdx) >= 0; +} + +/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could +/// possibly be overwritten when lowering the outgoing arguments in a tail +/// call. Currently the implementation of this call is very conservative and +/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with +/// virtual registers would be overwritten by direct lowering. +static bool IsPossiblyOverwrittenArgumentOfTailCall(SDValue Op, + MachineFrameInfo *MFI) { + RegisterSDNode * OpReg = NULL; + if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS || + (Op.getOpcode()== ISD::CopyFromReg && + (OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) && + (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) || + (Op.getOpcode() == ISD::LOAD && + IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) || + (Op.getOpcode() == ISD::MERGE_VALUES && + Op.getOperand(Op.getResNo()).getOpcode() == ISD::LOAD && + IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.getResNo()). + getOperand(1)))) + return true; + return false; +} + +/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the +/// DAG and fixes their tailcall attribute operand. +static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG, + const TargetLowering& TLI) { + SDNode * Ret = NULL; + SDValue Terminator = DAG.getRoot(); + + // Find RET node. + if (Terminator.getOpcode() == ISD::RET) { + Ret = Terminator.getNode(); + } + + // Fix tail call attribute of CALL nodes. + for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(), + BI = DAG.allnodes_end(); BI != BE; ) { + --BI; + if (CallSDNode *TheCall = dyn_cast<CallSDNode>(BI)) { + SDValue OpRet(Ret, 0); + SDValue OpCall(BI, 0); + bool isMarkedTailCall = TheCall->isTailCall(); + // If CALL node has tail call attribute set to true and the call is not + // eligible (no RET or the target rejects) the attribute is fixed to + // false. The TargetLowering::IsEligibleForTailCallOptimization function + // must correctly identify tail call optimizable calls. + if (!isMarkedTailCall) continue; + if (Ret==NULL || + !TLI.IsEligibleForTailCallOptimization(TheCall, OpRet, DAG)) { + // Not eligible. Mark CALL node as non tail call. Note that we + // can modify the call node in place since calls are not CSE'd. + TheCall->setNotTailCall(); + } else { + // Look for tail call clobbered arguments. Emit a series of + // copyto/copyfrom virtual register nodes to protect them. + SmallVector<SDValue, 32> Ops; + SDValue Chain = TheCall->getChain(), InFlag; + Ops.push_back(Chain); + Ops.push_back(TheCall->getCallee()); + for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) { + SDValue Arg = TheCall->getArg(i); + bool isByVal = TheCall->getArgFlags(i).isByVal(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (!isByVal && + IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) { + MVT VT = Arg.getValueType(); + unsigned VReg = MF.getRegInfo(). + createVirtualRegister(TLI.getRegClassFor(VT)); + Chain = DAG.getCopyToReg(Chain, Arg.getDebugLoc(), + VReg, Arg, InFlag); + InFlag = Chain.getValue(1); + Arg = DAG.getCopyFromReg(Chain, Arg.getDebugLoc(), + VReg, VT, InFlag); + Chain = Arg.getValue(1); + InFlag = Arg.getValue(2); + } + Ops.push_back(Arg); + Ops.push_back(TheCall->getArgFlagsVal(i)); + } + // Link in chain of CopyTo/CopyFromReg. + Ops[0] = Chain; + DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size()); + } + } + } +} + +void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, + BasicBlock::iterator Begin, + BasicBlock::iterator End) { + SDL->setCurrentBasicBlock(BB); + + // Lower all of the non-terminator instructions. + for (BasicBlock::iterator I = Begin; I != End; ++I) + if (!isa<TerminatorInst>(I)) + SDL->visit(*I); + + // Ensure that all instructions which are used outside of their defining + // blocks are available as virtual registers. Invoke is handled elsewhere. + for (BasicBlock::iterator I = Begin; I != End; ++I) + if (!isa<PHINode>(I) && !isa<InvokeInst>(I)) + SDL->CopyToExportRegsIfNeeded(I); + + // Handle PHI nodes in successor blocks. + if (End == LLVMBB->end()) { + HandlePHINodesInSuccessorBlocks(LLVMBB); + + // Lower the terminator after the copies are emitted. + SDL->visit(*LLVMBB->getTerminator()); + } + + // Make sure the root of the DAG is up-to-date. + CurDAG->setRoot(SDL->getControlRoot()); + + // Check whether calls in this block are real tail calls. Fix up CALL nodes + // with correct tailcall attribute so that the target can rely on the tailcall + // attribute indicating whether the call is really eligible for tail call + // optimization. + if (PerformTailCallOpt) + CheckDAGForTailCallsAndFixThem(*CurDAG, TLI); + + // Final step, emit the lowered DAG as machine code. + CodeGenAndEmitDAG(); + SDL->clear(); +} + +void SelectionDAGISel::ComputeLiveOutVRegInfo() { + SmallPtrSet<SDNode*, 128> VisitedNodes; + SmallVector<SDNode*, 128> Worklist; + + Worklist.push_back(CurDAG->getRoot().getNode()); + + APInt Mask; + APInt KnownZero; + APInt KnownOne; + + while (!Worklist.empty()) { + SDNode *N = Worklist.back(); + Worklist.pop_back(); + + // If we've already seen this node, ignore it. + if (!VisitedNodes.insert(N)) + continue; + + // Otherwise, add all chain operands to the worklist. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + Worklist.push_back(N->getOperand(i).getNode()); + + // If this is a CopyToReg with a vreg dest, process it. + if (N->getOpcode() != ISD::CopyToReg) + continue; + + unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + continue; + + // Ignore non-scalar or non-integer values. + SDValue Src = N->getOperand(2); + MVT SrcVT = Src.getValueType(); + if (!SrcVT.isInteger() || SrcVT.isVector()) + continue; + + unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); + Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); + CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); + + // Only install this information if it tells us something. + if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) { + DestReg -= TargetRegisterInfo::FirstVirtualRegister; + FunctionLoweringInfo &FLI = CurDAG->getFunctionLoweringInfo(); + if (DestReg >= FLI.LiveOutRegInfo.size()) + FLI.LiveOutRegInfo.resize(DestReg+1); + FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[DestReg]; + LOI.NumSignBits = NumSignBits; + LOI.KnownOne = KnownOne; + LOI.KnownZero = KnownZero; + } + } +} + +void SelectionDAGISel::CodeGenAndEmitDAG() { + std::string GroupName; + if (TimePassesIsEnabled) + GroupName = "Instruction Selection and Scheduling"; + std::string BlockName; + if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || + ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || + ViewSUnitDAGs) + BlockName = CurDAG->getMachineFunction().getFunction()->getName() + ':' + + BB->getBasicBlock()->getName(); + + DOUT << "Initial selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); + + // Run the DAG combiner in pre-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining 1", GroupName); + CurDAG->Combine(Unrestricted, *AA, OptLevel); + } else { + CurDAG->Combine(Unrestricted, *AA, OptLevel); + } + + DOUT << "Optimized lowered selection DAG:\n"; + DEBUG(CurDAG->dump()); + + // Second step, hack on the DAG until it only uses operations and types that + // the target supports. + if (!DisableLegalizeTypes) { + if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + + BlockName); + + bool Changed; + if (TimePassesIsEnabled) { + NamedRegionTimer T("Type Legalization", GroupName); + Changed = CurDAG->LegalizeTypes(); + } else { + Changed = CurDAG->LegalizeTypes(); + } + + DOUT << "Type-legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (Changed) { + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lt input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining after legalize types", GroupName); + CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + } else { + CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + } + + DOUT << "Optimized type-legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + } + + if (TimePassesIsEnabled) { + NamedRegionTimer T("Vector Legalization", GroupName); + Changed = CurDAG->LegalizeVectors(); + } else { + Changed = CurDAG->LegalizeVectors(); + } + + if (Changed) { + if (TimePassesIsEnabled) { + NamedRegionTimer T("Type Legalization 2", GroupName); + Changed = CurDAG->LegalizeTypes(); + } else { + Changed = CurDAG->LegalizeTypes(); + } + + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lv input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName); + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } else { + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } + + DOUT << "Optimized vector-legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + } + } + + if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); + + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Legalization", GroupName); + CurDAG->Legalize(DisableLegalizeTypes, OptLevel); + } else { + CurDAG->Legalize(DisableLegalizeTypes, OptLevel); + } + + DOUT << "Legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); + + // Run the DAG combiner in post-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining 2", GroupName); + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } else { + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } + + DOUT << "Optimized legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); + + if (OptLevel != CodeGenOpt::None) + ComputeLiveOutVRegInfo(); + + // Third, instruction select all of the operations to machine code, adding the + // code to the MachineBasicBlock. + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Selection", GroupName); + InstructionSelect(); + } else { + InstructionSelect(); + } + + DOUT << "Selected selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); + + // Schedule machine code. + ScheduleDAGSDNodes *Scheduler = CreateScheduler(); + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Scheduling", GroupName); + Scheduler->Run(CurDAG, BB, BB->end()); + } else { + Scheduler->Run(CurDAG, BB, BB->end()); + } + + if (ViewSUnitDAGs) Scheduler->viewGraph(); + + // Emit machine code to BB. This can change 'BB' to the last block being + // inserted into. + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Creation", GroupName); + BB = Scheduler->EmitSchedule(); + } else { + BB = Scheduler->EmitSchedule(); + } + + // Free the scheduler state. + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName); + delete Scheduler; + } else { + delete Scheduler; + } + + DOUT << "Selected machine code:\n"; + DEBUG(BB->dump()); +} + +void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, + MachineFunction &MF, + MachineModuleInfo *MMI, + DwarfWriter *DW, + const TargetInstrInfo &TII) { + // Initialize the Fast-ISel state, if needed. + FastISel *FastIS = 0; + if (EnableFastISel) + FastIS = TLI.createFastISel(MF, MMI, DW, + FuncInfo->ValueMap, + FuncInfo->MBBMap, + FuncInfo->StaticAllocaMap +#ifndef NDEBUG + , FuncInfo->CatchInfoLost +#endif + ); + + // Iterate over all basic blocks in the function. + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + BasicBlock *LLVMBB = &*I; + BB = FuncInfo->MBBMap[LLVMBB]; + + BasicBlock::iterator const Begin = LLVMBB->begin(); + BasicBlock::iterator const End = LLVMBB->end(); + BasicBlock::iterator BI = Begin; + + // Lower any arguments needed in this block if this is the entry block. + bool SuppressFastISel = false; + if (LLVMBB == &Fn.getEntryBlock()) { + LowerArguments(LLVMBB); + + // If any of the arguments has the byval attribute, forgo + // fast-isel in the entry block. + if (FastIS) { + unsigned j = 1; + for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); + I != E; ++I, ++j) + if (Fn.paramHasAttr(j, Attribute::ByVal)) { + if (EnableFastISelVerbose || EnableFastISelAbort) + cerr << "FastISel skips entry block due to byval argument\n"; + SuppressFastISel = true; + break; + } + } + } + + if (MMI && BB->isLandingPad()) { + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + unsigned LabelID = MMI->addLandingPad(BB); + + const TargetInstrDesc &II = TII.get(TargetInstrInfo::EH_LABEL); + BuildMI(BB, SDL->getCurDebugLoc(), II).addImm(LabelID); + + // Mark exception register as live in. + unsigned Reg = TLI.getExceptionAddressRegister(); + if (Reg) BB->addLiveIn(Reg); + + // Mark exception selector register as live in. + Reg = TLI.getExceptionSelectorRegister(); + if (Reg) BB->addLiveIn(Reg); + + // FIXME: Hack around an exception handling flaw (PR1508): the personality + // function and list of typeids logically belong to the invoke (or, if you + // like, the basic block containing the invoke), and need to be associated + // with it in the dwarf exception handling tables. Currently however the + // information is provided by an intrinsic (eh.selector) that can be moved + // to unexpected places by the optimizers: if the unwind edge is critical, + // then breaking it can result in the intrinsics being in the successor of + // the landing pad, not the landing pad itself. This results in exceptions + // not being caught because no typeids are associated with the invoke. + // This may not be the only way things can go wrong, but it is the only way + // we try to work around for the moment. + BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); + + if (Br && Br->isUnconditional()) { // Critical edge? + BasicBlock::iterator I, E; + for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) + if (isa<EHSelectorInst>(I)) + break; + + if (I == E) + // No catch info found - try to extract some from the successor. + copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo); + } + } + + // Before doing SelectionDAG ISel, see if FastISel has been requested. + if (FastIS && !SuppressFastISel) { + // Emit code for any incoming arguments. This must happen before + // beginning FastISel on the entry block. + if (LLVMBB == &Fn.getEntryBlock()) { + CurDAG->setRoot(SDL->getControlRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + FastIS->startNewBlock(BB); + // Do FastISel on as many instructions as possible. + for (; BI != End; ++BI) { + // Just before the terminator instruction, insert instructions to + // feed PHI nodes in successor blocks. + if (isa<TerminatorInst>(BI)) + if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { + if (EnableFastISelVerbose || EnableFastISelAbort) { + cerr << "FastISel miss: "; + BI->dump(); + } + if (EnableFastISelAbort) + assert(0 && "FastISel didn't handle a PHI in a successor"); + break; + } + + // First try normal tablegen-generated "fast" selection. + if (FastIS->SelectInstruction(BI)) + continue; + + // Next, try calling the target to attempt to handle the instruction. + if (FastIS->TargetSelectInstruction(BI)) + continue; + + // Then handle certain instructions as single-LLVM-Instruction blocks. + if (isa<CallInst>(BI)) { + if (EnableFastISelVerbose || EnableFastISelAbort) { + cerr << "FastISel missed call: "; + BI->dump(); + } + + if (BI->getType() != Type::VoidTy) { + unsigned &R = FuncInfo->ValueMap[BI]; + if (!R) + R = FuncInfo->CreateRegForValue(BI); + } + + SDL->setCurDebugLoc(FastIS->getCurDebugLoc()); + SelectBasicBlock(LLVMBB, BI, next(BI)); + // If the instruction was codegen'd with multiple blocks, + // inform the FastISel object where to resume inserting. + FastIS->setCurrentBlock(BB); + continue; + } + + // Otherwise, give up on FastISel for the rest of the block. + // For now, be a little lenient about non-branch terminators. + if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) { + if (EnableFastISelVerbose || EnableFastISelAbort) { + cerr << "FastISel miss: "; + BI->dump(); + } + if (EnableFastISelAbort) + // The "fast" selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + assert(0 && "FastISel didn't select the entire block"); + } + break; + } + } + + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + if (BI != End) { + // If FastISel is run and it has known DebugLoc then use it. + if (FastIS && !FastIS->getCurDebugLoc().isUnknown()) + SDL->setCurDebugLoc(FastIS->getCurDebugLoc()); + SelectBasicBlock(LLVMBB, BI, End); + } + + FinishBasicBlock(); + } + + delete FastIS; +} + +void +SelectionDAGISel::FinishBasicBlock() { + + DOUT << "Target-post-processed machine code:\n"; + DEBUG(BB->dump()); + + DOUT << "Total amount of phi nodes to update: " + << SDL->PHINodesToUpdate.size() << "\n"; + DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) + DOUT << "Node " << i << " : (" << SDL->PHINodesToUpdate[i].first + << ", " << SDL->PHINodesToUpdate[i].second << ")\n";); + + // Next, now that we know what the last MBB the LLVM BB expanded is, update + // PHI nodes in successors. + if (SDL->SwitchCases.empty() && + SDL->JTCases.empty() && + SDL->BitTestCases.empty()) { + for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = SDL->PHINodesToUpdate[i].first; + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(BB)); + } + SDL->PHINodesToUpdate.clear(); + return; + } + + for (unsigned i = 0, e = SDL->BitTestCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDL->BitTestCases[i].Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->BitTestCases[i].Parent; + SDL->setCurrentBasicBlock(BB); + // Emit the code + SDL->visitBitTestHeader(SDL->BitTestCases[i]); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + + for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->BitTestCases[i].Cases[j].ThisBB; + SDL->setCurrentBasicBlock(BB); + // Emit the code + if (j+1 != ej) + SDL->visitBitTestCase(SDL->BitTestCases[i].Cases[j+1].ThisBB, + SDL->BitTestCases[i].Reg, + SDL->BitTestCases[i].Cases[j]); + else + SDL->visitBitTestCase(SDL->BitTestCases[i].Default, + SDL->BitTestCases[i].Reg, + SDL->BitTestCases[i].Cases[j]); + + + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + + // Update PHI Nodes + for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) { + MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first; + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + // This is "default" BB. We have two jumps to it. From "header" BB and + // from last "case" BB. + if (PHIBB == SDL->BitTestCases[i].Default) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Parent)); + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Cases. + back().ThisBB)); + } + // One of "cases" BB. + for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); + j != ej; ++j) { + MachineBasicBlock* cBB = SDL->BitTestCases[i].Cases[j].ThisBB; + if (cBB->succ_end() != + std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(cBB)); + } + } + } + } + SDL->BitTestCases.clear(); + + // If the JumpTable record is filled in, then we need to emit a jump table. + // Updating the PHI nodes is tricky in this case, since we need to determine + // whether the PHI is a successor of the range check MBB or the jump table MBB + for (unsigned i = 0, e = SDL->JTCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDL->JTCases[i].first.Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->JTCases[i].first.HeaderBB; + SDL->setCurrentBasicBlock(BB); + // Emit the code + SDL->visitJumpTableHeader(SDL->JTCases[i].second, SDL->JTCases[i].first); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->JTCases[i].second.MBB; + SDL->setCurrentBasicBlock(BB); + // Emit the code + SDL->visitJumpTable(SDL->JTCases[i].second); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + + // Update PHI Nodes + for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) { + MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first; + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + // "default" BB. We can go there only from header BB. + if (PHIBB == SDL->JTCases[i].second.Default) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB)); + } + // JT BB. Just iterate over successors here + if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(BB)); + } + } + } + SDL->JTCases.clear(); + + // If the switch block involved a branch to one of the actual successors, we + // need to update PHI nodes in that block. + for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = SDL->PHINodesToUpdate[i].first; + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + if (BB->isSuccessor(PHI->getParent())) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(BB)); + } + } + + // If we generated any switch lowering information, build and codegen any + // additional DAGs necessary. + for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->SwitchCases[i].ThisBB; + SDL->setCurrentBasicBlock(BB); + + // Emit the code + SDL->visitSwitchCase(SDL->SwitchCases[i]); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + + // Handle any PHI nodes in successors of this chunk, as if we were coming + // from the original BB before switch expansion. Note that PHI nodes can + // occur multiple times in PHINodesToUpdate. We have to be very careful to + // handle them the right number of times. + while ((BB = SDL->SwitchCases[i].TrueBB)) { // Handle LHS and RHS. + for (MachineBasicBlock::iterator Phi = BB->begin(); + Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){ + // This value for this PHI node is recorded in PHINodesToUpdate, get it. + for (unsigned pn = 0; ; ++pn) { + assert(pn != SDL->PHINodesToUpdate.size() && + "Didn't find PHI entry!"); + if (SDL->PHINodesToUpdate[pn].first == Phi) { + Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn]. + second, false)); + Phi->addOperand(MachineOperand::CreateMBB(SDL->SwitchCases[i].ThisBB)); + break; + } + } + } + + // Don't process RHS if same block as LHS. + if (BB == SDL->SwitchCases[i].FalseBB) + SDL->SwitchCases[i].FalseBB = 0; + + // If we haven't handled the RHS, do so now. Otherwise, we're done. + SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB; + SDL->SwitchCases[i].FalseBB = 0; + } + assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0); + } + SDL->SwitchCases.clear(); + + SDL->PHINodesToUpdate.clear(); +} + + +/// Create the scheduler. If a specific scheduler was specified +/// via the SchedulerRegistry, use it, otherwise select the +/// one preferred by the target. +/// +ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { + RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault(); + + if (!Ctor) { + Ctor = ISHeuristic; + RegisterScheduler::setDefault(Ctor); + } + + return Ctor(this, OptLevel); +} + +ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { + return new ScheduleHazardRecognizer(); +} + +//===----------------------------------------------------------------------===// +// Helper functions used by the generated instruction selector. +//===----------------------------------------------------------------------===// +// Calls to these methods are generated by tblgen. + +/// CheckAndMask - The isel is trying to match something like (and X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + +/// CheckOrMask - The isel is trying to match something like (or X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); + + // If all the missing bits in the or are already known to be set, match! + if ((NeededMask & KnownOne) == NeededMask) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + + +/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated +/// by tblgen. Others should not call it. +void SelectionDAGISel:: +SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { + std::vector<SDValue> InOps; + std::swap(InOps, Ops); + + Ops.push_back(InOps[0]); // input chain. + Ops.push_back(InOps[1]); // input asm string. + + unsigned i = 2, e = InOps.size(); + if (InOps[e-1].getValueType() == MVT::Flag) + --e; // Don't process a flag operand if it is here. + + while (i != e) { + unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); + if ((Flags & 7) != 4 /*MEM*/) { + // Just skip over this operand, copying the operands verbatim. + Ops.insert(Ops.end(), InOps.begin()+i, + InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); + i += InlineAsm::getNumOperandRegisters(Flags) + 1; + } else { + assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && + "Memory operand with multiple values?"); + // Otherwise, this is a memory operand. Ask the target to select it. + std::vector<SDValue> SelOps; + if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) { + cerr << "Could not match memory address. Inline asm failure!\n"; + exit(1); + } + + // Add this to the output node. + MVT IntPtrTy = CurDAG->getTargetLoweringInfo().getPointerTy(); + Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3), + IntPtrTy)); + Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); + i += 2; + } + } + + // Add the flag input back if present. + if (e != InOps.size()) + Ops.push_back(InOps.back()); +} + +/// findFlagUse - Return use of MVT::Flag value produced by the specified +/// SDNode. +/// +static SDNode *findFlagUse(SDNode *N) { + unsigned FlagResNo = N->getNumValues()-1; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDUse &Use = I.getUse(); + if (Use.getResNo() == FlagResNo) + return Use.getUser(); + } + return NULL; +} + +/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". +/// This function recursively traverses up the operand chain, ignoring +/// certain nodes. +static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, + SDNode *Root, + SmallPtrSet<SDNode*, 16> &Visited) { + if (Use->getNodeId() < Def->getNodeId() || + !Visited.insert(Use)) + return false; + + for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { + SDNode *N = Use->getOperand(i).getNode(); + if (N == Def) { + if (Use == ImmedUse || Use == Root) + continue; // We are not looking for immediate use. + assert(N != Root); + return true; + } + + // Traverse up the operand chain. + if (findNonImmUse(N, Def, ImmedUse, Root, Visited)) + return true; + } + return false; +} + +/// isNonImmUse - Start searching from Root up the DAG to check is Def can +/// be reached. Return true if that's the case. However, ignore direct uses +/// by ImmedUse (which would be U in the example illustrated in +/// IsLegalAndProfitableToFold) and by Root (which can happen in the store +/// case). +/// FIXME: to be really generic, we should allow direct use by any node +/// that is being folded. But realisticly since we only fold loads which +/// have one non-chain use, we only need to watch out for load/op/store +/// and load/op/cmp case where the root (store / cmp) may reach the load via +/// its chain operand. +static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) { + SmallPtrSet<SDNode*, 16> Visited; + return findNonImmUse(Root, Def, ImmedUse, Root, Visited); +} + +/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of +/// U can be folded during instruction selection that starts at Root and +/// folding N is profitable. +bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; + + // If Root use can somehow reach N through a path that that doesn't contain + // U then folding N would create a cycle. e.g. In the following + // diagram, Root can reach N through X. If N is folded into into Root, then + // X is both a predecessor and a successor of U. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ / // + // \ / // + // [Root*] // + // + // * indicates nodes to be folded together. + // + // If Root produces a flag, then it gets (even more) interesting. Since it + // will be "glued" together with its flag use in the scheduler, we need to + // check if it might reach N. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ \ // + // \ | // + // [Root*] | // + // ^ | // + // f | // + // | / // + // [Y] / // + // ^ / // + // f / // + // | / // + // [FU] // + // + // If FU (flag use) indirectly reaches N (the load), and Root folds N + // (call it Fold), then X is a predecessor of FU and a successor of + // Fold. But since Fold and FU are flagged together, this will create + // a cycle in the scheduling graph. + + MVT VT = Root->getValueType(Root->getNumValues()-1); + while (VT == MVT::Flag) { + SDNode *FU = findFlagUse(Root); + if (FU == NULL) + break; + Root = FU; + VT = Root->getValueType(Root->getNumValues()-1); + } + + return !isNonImmUse(Root, N, U); +} + + +char SelectionDAGISel::ID = 0; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp new file mode 100644 index 0000000..3eec684 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -0,0 +1,416 @@ +//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" +#include <fstream> +using namespace llvm; + +namespace llvm { + template<> + struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits { + static bool hasEdgeDestLabels() { + return true; + } + + static unsigned numEdgeDestLabels(const void *Node) { + return ((const SDNode *) Node)->getNumValues(); + } + + static std::string getEdgeDestLabel(const void *Node, unsigned i) { + return ((const SDNode *) Node)->getValueType(i).getMVTString(); + } + + /// edgeTargetsEdgeSource - This method returns true if this outgoing edge + /// should actually target another edge source, not a node. If this method is + /// implemented, getEdgeTarget should be implemented. + template<typename EdgeIter> + static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) { + return true; + } + + /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is + /// called to determine which outgoing edge of Node is the target of this + /// edge. + template<typename EdgeIter> + static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) { + SDNode *TargetNode = *I; + SDNodeIterator NI = SDNodeIterator::begin(TargetNode); + std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo()); + return NI; + } + + static std::string getGraphName(const SelectionDAG *G) { + return G->getMachineFunction().getFunction()->getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool hasNodeAddressLabel(const SDNode *Node, + const SelectionDAG *Graph) { + return true; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + template<typename EdgeIter> + static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { + SDValue Op = EI.getNode()->getOperand(EI.getOperand()); + MVT VT = Op.getValueType(); + if (VT == MVT::Flag) + return "color=red,style=bold"; + else if (VT == MVT::Other) + return "color=blue,style=dashed"; + return ""; + } + + + static std::string getNodeLabel(const SDNode *Node, + const SelectionDAG *Graph); + static std::string getNodeAttributes(const SDNode *N, + const SelectionDAG *Graph) { +#ifndef NDEBUG + const std::string &Attrs = Graph->getGraphAttrs(N); + if (!Attrs.empty()) { + if (Attrs.find("shape=") == std::string::npos) + return std::string("shape=Mrecord,") + Attrs; + else + return Attrs; + } +#endif + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(SelectionDAG *G, + GraphWriter<SelectionDAG*> &GW) { + GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + if (G->getRoot().getNode()) + GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(), + "color=blue,style=dashed"); + } + }; +} + +std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + std::string Op = Node->getOperationName(G); + + if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Node)) { + Op += ": " + utostr(CSDN->getZExtValue()); + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(Node)) { + Op += ": " + ftostr(CSDN->getValueAPF()); + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(Node)) { + Op += ": " + GADN->getGlobal()->getName(); + if (int64_t Offset = GADN->getOffset()) { + if (Offset > 0) + Op += "+" + itostr(Offset); + else + Op += itostr(Offset); + } + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(Node)) { + Op += " " + itostr(FIDN->getIndex()); + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(Node)) { + Op += " " + itostr(JTDN->getIndex()); + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Node)){ + if (CP->isMachineConstantPoolEntry()) { + Op += '<'; + { + raw_string_ostream OSS(Op); + OSS << *CP->getMachineCPVal(); + } + Op += '>'; + } else { + if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) + Op += "<" + ftostr(CFP->getValueAPF()) + ">"; + else if (ConstantInt *CI = dyn_cast<ConstantInt>(CP->getConstVal())) + Op += "<" + utostr(CI->getZExtValue()) + ">"; + else { + Op += '<'; + { + raw_string_ostream OSS(Op); + WriteAsOperand(OSS, CP->getConstVal(), false); + } + Op += '>'; + } + } + Op += " A=" + itostr(CP->getAlignment()); + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) { + Op = "BB: "; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + Op += LBB->getName(); + //Op += " " + (const void*)BBDN->getBasicBlock(); + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node)) { + if (G && R->getReg() != 0 && + TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + Op = Op + " " + + G->getTarget().getRegisterInfo()->getName(R->getReg()); + } else { + Op += " #" + utostr(R->getReg()); + } + } else if (const DbgStopPointSDNode *D = dyn_cast<DbgStopPointSDNode>(Node)) { + DICompileUnit CU(cast<GlobalVariable>(D->getCompileUnit())); + std::string FN; + Op += ": " + CU.getFilename(FN); + Op += ":" + utostr(D->getLine()); + if (D->getColumn() != 0) + Op += ":" + utostr(D->getColumn()); + } else if (const LabelSDNode *L = dyn_cast<LabelSDNode>(Node)) { + Op += ": LabelID=" + utostr(L->getLabelID()); + } else if (const CallSDNode *C = dyn_cast<CallSDNode>(Node)) { + Op += ": CallingConv=" + utostr(C->getCallingConv()); + if (C->isVarArg()) + Op += ", isVarArg"; + if (C->isTailCall()) + Op += ", isTailCall"; + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(Node)) { + Op += "'" + std::string(ES->getSymbol()) + "'"; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(Node)) { + if (M->getValue()) + Op += "<" + M->getValue()->getName() + ">"; + else + Op += "<null>"; + } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(Node)) { + const Value *V = M->MO.getValue(); + Op += '<'; + if (!V) { + Op += "(unknown)"; + } else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + // PseudoSourceValues don't have names, so use their print method. + raw_string_ostream OSS(Op); + PSV->print(OSS); + } else { + Op += V->getName(); + } + Op += '+' + itostr(M->MO.getOffset()) + '>'; + } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(Node)) { + Op = Op + " AF=" + N->getArgFlags().getArgFlagsString(); + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(Node)) { + Op = Op + " VT=" + N->getVT().getMVTString(); + } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node)) { + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: + Op = Op + "<anyext "; + break; + case ISD::SEXTLOAD: + Op = Op + " <sext "; + break; + case ISD::ZEXTLOAD: + Op = Op + " <zext "; + break; + } + if (doExt) + Op += LD->getMemoryVT().getMVTString() + ">"; + if (LD->isVolatile()) + Op += "<V>"; + Op += LD->getIndexedModeName(LD->getAddressingMode()); + if (LD->getAlignment() > 1) + Op += " A=" + utostr(LD->getAlignment()); + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Node)) { + if (ST->isTruncatingStore()) + Op += "<trunc " + ST->getMemoryVT().getMVTString() + ">"; + if (ST->isVolatile()) + Op += "<V>"; + Op += ST->getIndexedModeName(ST->getAddressingMode()); + if (ST->getAlignment() > 1) + Op += " A=" + utostr(ST->getAlignment()); + } + +#if 0 + Op += " Id=" + itostr(Node->getNodeId()); +#endif + + return Op; +} + + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void SelectionDAG::viewGraph(const std::string &Title) { +// This code is only for debugging! +#ifndef NDEBUG + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), + Title); +#else + cerr << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +// This overload is defined out-of-line here instead of just using a +// default parameter because this is easiest for gdb to call. +void SelectionDAG::viewGraph() { + viewGraph(""); +} + +/// clearGraphAttrs - Clear all previously defined node graph attributes. +/// Intended to be used from a debugging tool (eg. gdb). +void SelectionDAG::clearGraphAttrs() { +#ifndef NDEBUG + NodeGraphAttrs.clear(); +#else + cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".) +/// +void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { +#ifndef NDEBUG + NodeGraphAttrs[N] = Attrs; +#else + cerr << "SelectionDAG::setGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".) +/// Used from getNodeAttributes. +const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { +#ifndef NDEBUG + std::map<const SDNode *, std::string>::const_iterator I = + NodeGraphAttrs.find(N); + + if (I != NodeGraphAttrs.end()) + return I->second; + else + return ""; +#else + cerr << "SelectionDAG::getGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; + return std::string(""); +#endif +} + +/// setGraphColor - Convenience for setting node color attribute. +/// +void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { +#ifndef NDEBUG + NodeGraphAttrs[N] = std::string("color=") + Color; +#else + cerr << "SelectionDAG::setGraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +/// setSubgraphColorHelper - Implement setSubgraphColor. Return +/// whether we truncated the search. +/// +bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited, + int level, bool &printed) { + bool hit_limit = false; + +#ifndef NDEBUG + if (level >= 20) { + if (!printed) { + printed = true; + DOUT << "setSubgraphColor hit max level\n"; + } + return true; + } + + unsigned oldSize = visited.size(); + visited.insert(N); + if (visited.size() != oldSize) { + setGraphColor(N, Color); + for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N); + i != iend; + ++i) { + hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit; + } + } +#else + cerr << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif + return hit_limit; +} + +/// setSubgraphColor - Convenience for setting subgraph color attribute. +/// +void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) { +#ifndef NDEBUG + DenseSet<SDNode *> visited; + bool printed = false; + if (setSubgraphColorHelper(N, Color, visited, 0, printed)) { + // Visually mark that we hit the limit + if (strcmp(Color, "red") == 0) { + setSubgraphColorHelper(N, "blue", visited, 0, printed); + } + else if (strcmp(Color, "yellow") == 0) { + setSubgraphColorHelper(N, "green", visited, 0, printed); + } + } + +#else + cerr << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream O(s); + O << "SU(" << SU->NodeNum << "): "; + if (SU->getNode()) { + SmallVector<SDNode *, 4> FlaggedNodes; + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + O << DOTGraphTraits<SelectionDAG*>::getNodeLabel(FlaggedNodes.back(), DAG); + FlaggedNodes.pop_back(); + if (!FlaggedNodes.empty()) + O << "\n "; + } + } else { + O << "CROSS RC COPY"; + } + return O.str(); +} + +void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const { + if (DAG) { + // Draw a special "GraphRoot" node to indicate the root of the graph. + GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + const SDNode *N = DAG->getRoot().getNode(); + if (N && N->getNodeId() != -1) + GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1, + "color=blue,style=dashed"); + } +} diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp new file mode 100644 index 0000000..3334e53 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -0,0 +1,2592 @@ +//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/GlobalVariable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +namespace llvm { +TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) { + bool isLocal = GV->hasLocalLinkage(); + bool isDeclaration = GV->isDeclaration(); + // FIXME: what should we do for protected and internal visibility? + // For variables, is internal different from hidden? + bool isHidden = GV->hasHiddenVisibility(); + + if (reloc == Reloc::PIC_) { + if (isLocal || isHidden) + return TLSModel::LocalDynamic; + else + return TLSModel::GeneralDynamic; + } else { + if (!isDeclaration || isHidden) + return TLSModel::LocalExec; + else + return TLSModel::InitialExec; + } +} +} + +/// InitLibcallNames - Set default libcall names. +/// +static void InitLibcallNames(const char **Names) { + Names[RTLIB::SHL_I16] = "__ashlhi3"; + Names[RTLIB::SHL_I32] = "__ashlsi3"; + Names[RTLIB::SHL_I64] = "__ashldi3"; + Names[RTLIB::SHL_I128] = "__ashlti3"; + Names[RTLIB::SRL_I16] = "__lshrhi3"; + Names[RTLIB::SRL_I32] = "__lshrsi3"; + Names[RTLIB::SRL_I64] = "__lshrdi3"; + Names[RTLIB::SRL_I128] = "__lshrti3"; + Names[RTLIB::SRA_I16] = "__ashrhi3"; + Names[RTLIB::SRA_I32] = "__ashrsi3"; + Names[RTLIB::SRA_I64] = "__ashrdi3"; + Names[RTLIB::SRA_I128] = "__ashrti3"; + Names[RTLIB::MUL_I16] = "__mulhi3"; + Names[RTLIB::MUL_I32] = "__mulsi3"; + Names[RTLIB::MUL_I64] = "__muldi3"; + Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::SDIV_I16] = "__divhi3"; + Names[RTLIB::SDIV_I32] = "__divsi3"; + Names[RTLIB::SDIV_I64] = "__divdi3"; + Names[RTLIB::SDIV_I128] = "__divti3"; + Names[RTLIB::UDIV_I16] = "__udivhi3"; + Names[RTLIB::UDIV_I32] = "__udivsi3"; + Names[RTLIB::UDIV_I64] = "__udivdi3"; + Names[RTLIB::UDIV_I128] = "__udivti3"; + Names[RTLIB::SREM_I16] = "__modhi3"; + Names[RTLIB::SREM_I32] = "__modsi3"; + Names[RTLIB::SREM_I64] = "__moddi3"; + Names[RTLIB::SREM_I128] = "__modti3"; + Names[RTLIB::UREM_I16] = "__umodhi3"; + Names[RTLIB::UREM_I32] = "__umodsi3"; + Names[RTLIB::UREM_I64] = "__umoddi3"; + Names[RTLIB::UREM_I128] = "__umodti3"; + Names[RTLIB::NEG_I32] = "__negsi2"; + Names[RTLIB::NEG_I64] = "__negdi2"; + Names[RTLIB::ADD_F32] = "__addsf3"; + Names[RTLIB::ADD_F64] = "__adddf3"; + Names[RTLIB::ADD_F80] = "__addxf3"; + Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; + Names[RTLIB::SUB_F32] = "__subsf3"; + Names[RTLIB::SUB_F64] = "__subdf3"; + Names[RTLIB::SUB_F80] = "__subxf3"; + Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; + Names[RTLIB::MUL_F32] = "__mulsf3"; + Names[RTLIB::MUL_F64] = "__muldf3"; + Names[RTLIB::MUL_F80] = "__mulxf3"; + Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; + Names[RTLIB::DIV_F32] = "__divsf3"; + Names[RTLIB::DIV_F64] = "__divdf3"; + Names[RTLIB::DIV_F80] = "__divxf3"; + Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; + Names[RTLIB::REM_F32] = "fmodf"; + Names[RTLIB::REM_F64] = "fmod"; + Names[RTLIB::REM_F80] = "fmodl"; + Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::POWI_F32] = "__powisf2"; + Names[RTLIB::POWI_F64] = "__powidf2"; + Names[RTLIB::POWI_F80] = "__powixf2"; + Names[RTLIB::POWI_PPCF128] = "__powitf2"; + Names[RTLIB::SQRT_F32] = "sqrtf"; + Names[RTLIB::SQRT_F64] = "sqrt"; + Names[RTLIB::SQRT_F80] = "sqrtl"; + Names[RTLIB::SQRT_PPCF128] = "sqrtl"; + Names[RTLIB::LOG_F32] = "logf"; + Names[RTLIB::LOG_F64] = "log"; + Names[RTLIB::LOG_F80] = "logl"; + Names[RTLIB::LOG_PPCF128] = "logl"; + Names[RTLIB::LOG2_F32] = "log2f"; + Names[RTLIB::LOG2_F64] = "log2"; + Names[RTLIB::LOG2_F80] = "log2l"; + Names[RTLIB::LOG2_PPCF128] = "log2l"; + Names[RTLIB::LOG10_F32] = "log10f"; + Names[RTLIB::LOG10_F64] = "log10"; + Names[RTLIB::LOG10_F80] = "log10l"; + Names[RTLIB::LOG10_PPCF128] = "log10l"; + Names[RTLIB::EXP_F32] = "expf"; + Names[RTLIB::EXP_F64] = "exp"; + Names[RTLIB::EXP_F80] = "expl"; + Names[RTLIB::EXP_PPCF128] = "expl"; + Names[RTLIB::EXP2_F32] = "exp2f"; + Names[RTLIB::EXP2_F64] = "exp2"; + Names[RTLIB::EXP2_F80] = "exp2l"; + Names[RTLIB::EXP2_PPCF128] = "exp2l"; + Names[RTLIB::SIN_F32] = "sinf"; + Names[RTLIB::SIN_F64] = "sin"; + Names[RTLIB::SIN_F80] = "sinl"; + Names[RTLIB::SIN_PPCF128] = "sinl"; + Names[RTLIB::COS_F32] = "cosf"; + Names[RTLIB::COS_F64] = "cos"; + Names[RTLIB::COS_F80] = "cosl"; + Names[RTLIB::COS_PPCF128] = "cosl"; + Names[RTLIB::POW_F32] = "powf"; + Names[RTLIB::POW_F64] = "pow"; + Names[RTLIB::POW_F80] = "powl"; + Names[RTLIB::POW_PPCF128] = "powl"; + Names[RTLIB::CEIL_F32] = "ceilf"; + Names[RTLIB::CEIL_F64] = "ceil"; + Names[RTLIB::CEIL_F80] = "ceill"; + Names[RTLIB::CEIL_PPCF128] = "ceill"; + Names[RTLIB::TRUNC_F32] = "truncf"; + Names[RTLIB::TRUNC_F64] = "trunc"; + Names[RTLIB::TRUNC_F80] = "truncl"; + Names[RTLIB::TRUNC_PPCF128] = "truncl"; + Names[RTLIB::RINT_F32] = "rintf"; + Names[RTLIB::RINT_F64] = "rint"; + Names[RTLIB::RINT_F80] = "rintl"; + Names[RTLIB::RINT_PPCF128] = "rintl"; + Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; + Names[RTLIB::NEARBYINT_F64] = "nearbyint"; + Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; + Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::FLOOR_F32] = "floorf"; + Names[RTLIB::FLOOR_F64] = "floor"; + Names[RTLIB::FLOOR_F80] = "floorl"; + Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; + Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; + Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; + Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; + Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; + Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; + Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; + Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; + Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; + Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; + Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; + Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; + Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; + Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; + Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; + Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; + Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; + Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; + Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; + Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; + Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; + Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; + Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; + Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; + Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; + Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; + Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; + Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; + Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; + Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; + Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; + Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; + Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; + Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; + Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; + Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; + Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; + Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; + Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; + Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; + Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; + Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; + Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; + Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; + Names[RTLIB::OEQ_F32] = "__eqsf2"; + Names[RTLIB::OEQ_F64] = "__eqdf2"; + Names[RTLIB::UNE_F32] = "__nesf2"; + Names[RTLIB::UNE_F64] = "__nedf2"; + Names[RTLIB::OGE_F32] = "__gesf2"; + Names[RTLIB::OGE_F64] = "__gedf2"; + Names[RTLIB::OLT_F32] = "__ltsf2"; + Names[RTLIB::OLT_F64] = "__ltdf2"; + Names[RTLIB::OLE_F32] = "__lesf2"; + Names[RTLIB::OLE_F64] = "__ledf2"; + Names[RTLIB::OGT_F32] = "__gtsf2"; + Names[RTLIB::OGT_F64] = "__gtdf2"; + Names[RTLIB::UO_F32] = "__unordsf2"; + Names[RTLIB::UO_F64] = "__unorddf2"; + Names[RTLIB::O_F32] = "__unordsf2"; + Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; +} + +/// getFPEXT - Return the FPEXT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::f64) + return FPEXT_F32_F64; + } + return UNKNOWN_LIBCALL; +} + +/// getFPROUND - Return the FPROUND_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) { + if (RetVT == MVT::f32) { + if (OpVT == MVT::f64) + return FPROUND_F64_F32; + if (OpVT == MVT::f80) + return FPROUND_F80_F32; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F32; + } else if (RetVT == MVT::f64) { + if (OpVT == MVT::f80) + return FPROUND_F80_F64; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F64; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i32) + return FPTOSINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i32) + return FPTOSINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOSINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F80_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOSINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i32) + return FPTOUINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i32) + return FPTOUINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOUINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F80_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOUINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return SINTTOFP_I32_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I32_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I32_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return SINTTOFP_I64_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I64_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I64_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return SINTTOFP_I128_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I128_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I128_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getUINTTOFP(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return UINTTOFP_I32_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I32_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I32_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return UINTTOFP_I64_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I64_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I64_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return UINTTOFP_I128_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I128_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I128_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// InitCmpLibcallCCs - Set default comparison libcall CC. +/// +static void InitCmpLibcallCCs(ISD::CondCode *CCs) { + memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); + CCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CCs[RTLIB::UNE_F32] = ISD::SETNE; + CCs[RTLIB::UNE_F64] = ISD::SETNE; + CCs[RTLIB::OGE_F32] = ISD::SETGE; + CCs[RTLIB::OGE_F64] = ISD::SETGE; + CCs[RTLIB::OLT_F32] = ISD::SETLT; + CCs[RTLIB::OLT_F64] = ISD::SETLT; + CCs[RTLIB::OLE_F32] = ISD::SETLE; + CCs[RTLIB::OLE_F64] = ISD::SETLE; + CCs[RTLIB::OGT_F32] = ISD::SETGT; + CCs[RTLIB::OGT_F64] = ISD::SETGT; + CCs[RTLIB::UO_F32] = ISD::SETNE; + CCs[RTLIB::UO_F64] = ISD::SETNE; + CCs[RTLIB::O_F32] = ISD::SETEQ; + CCs[RTLIB::O_F64] = ISD::SETEQ; +} + +TargetLowering::TargetLowering(TargetMachine &tm) + : TM(tm), TD(TM.getTargetData()) { + // All operations default to being supported. + memset(OpActions, 0, sizeof(OpActions)); + memset(LoadExtActions, 0, sizeof(LoadExtActions)); + memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); + memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); + memset(ConvertActions, 0, sizeof(ConvertActions)); + memset(CondCodeActions, 0, sizeof(CondCodeActions)); + + // Set default actions for various operations. + for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + // Default all indexed load / store to expand. + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); + setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); + } + + // These operations default to expand. + setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + } + + // Most targets ignore the @llvm.prefetch intrinsic. + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); + + // ConstantFP nodes default to expand. Targets can either change this to + // Legal, in which case all fp constants are legal, or use addLegalFPImmediate + // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f80, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10,MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10,MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + + // Default ISD::TRAP to expand (which turns it into abort). + setOperationAction(ISD::TRAP, MVT::Other, Expand); + + IsLittleEndian = TD->isLittleEndian(); + UsesGlobalOffsetTable = false; + ShiftAmountTy = PointerTy = getValueType(TD->getIntPtrType()); + ShiftAmtHandling = Undefined; + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); + maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; + allowUnalignedMemoryAccesses = false; + benefitFromCodePlacementOpt = false; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; + StackPointerRegisterToSaveRestore = 0; + ExceptionPointerRegister = 0; + ExceptionSelectorRegister = 0; + BooleanContents = UndefinedBooleanContent; + SchedPreferenceInfo = SchedulingForLatency; + JumpBufSize = 0; + JumpBufAlignment = 0; + IfCvtBlockSizeLimit = 2; + IfCvtDupBlockSizeLimit = 0; + PrefLoopAlignment = 0; + + InitLibcallNames(LibcallRoutineNames); + InitCmpLibcallCCs(CmpLibcallCCs); + + // Tell Legalize whether the assembler supports DEBUG_LOC. + const TargetAsmInfo *TASM = TM.getTargetAsmInfo(); + if (!TASM || !TASM->hasDotLocAndDotFile()) + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); +} + +TargetLowering::~TargetLowering() {} + +/// computeRegisterProperties - Once all of the register classes are added, +/// this allows us to compute derived properties we expose. +void TargetLowering::computeRegisterProperties() { + assert(MVT::LAST_VALUETYPE <= 32 && + "Too many value types for ValueTypeActions to hold!"); + + // Everything defaults to needing one register. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + NumRegistersForVT[i] = 1; + RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; + } + // ...except isVoid, which doesn't need any registers. + NumRegistersForVT[MVT::isVoid] = 0; + + // Find the largest integer register class. + unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; + for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) + assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); + + // Every integer value type larger than this largest register takes twice as + // many registers to represent as the previous ValueType. + for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) { + MVT EVT = (MVT::SimpleValueType)ExpandedReg; + if (!EVT.isInteger()) + break; + NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; + RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; + TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); + ValueTypeActions.setTypeAction(EVT, Expand); + } + + // Inspect all of the ValueType's smaller than the largest integer + // register to see which ones need promotion. + unsigned LegalIntReg = LargestIntReg; + for (unsigned IntReg = LargestIntReg - 1; + IntReg >= (unsigned)MVT::i1; --IntReg) { + MVT IVT = (MVT::SimpleValueType)IntReg; + if (isTypeLegal(IVT)) { + LegalIntReg = IntReg; + } else { + RegisterTypeForVT[IntReg] = TransformToType[IntReg] = + (MVT::SimpleValueType)LegalIntReg; + ValueTypeActions.setTypeAction(IVT, Promote); + } + } + + // ppcf128 type is really two f64's. + if (!isTypeLegal(MVT::ppcf128)) { + NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::ppcf128] = MVT::f64; + TransformToType[MVT::ppcf128] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::ppcf128, Expand); + } + + // Decide how to handle f64. If the target does not have native f64 support, + // expand it to i64 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; + RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; + TransformToType[MVT::f64] = MVT::i64; + ValueTypeActions.setTypeAction(MVT::f64, Expand); + } + + // Decide how to handle f32. If the target does not have native support for + // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. + if (!isTypeLegal(MVT::f32)) { + if (isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; + TransformToType[MVT::f32] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::f32, Promote); + } else { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, Expand); + } + } + + // Loop over all of the vector value types to see which need transformations. + for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + if (!isTypeLegal(VT)) { + MVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdown(VT, + IntermediateVT, NumIntermediates, + RegisterVT); + RegisterTypeForVT[i] = RegisterVT; + + // Determine if there is a legal wider type. + bool IsLegalWiderType = false; + MVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts) { + TransformToType[i] = SVT; + ValueTypeActions.setTypeAction(VT, Promote); + IsLegalWiderType = true; + break; + } + } + if (!IsLegalWiderType) { + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + ValueTypeActions.setTypeAction(VT, Expand); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, Promote); + } + } + } + } +} + +const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { + return NULL; +} + + +MVT TargetLowering::getSetCCResultType(MVT VT) const { + return getValueType(TD->getIntPtrType()); +} + + +/// getVectorTypeBreakdown - Vector types are broken down into some number of +/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 +/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. +/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. +/// +/// This method returns the number of registers needed, and the VT for each +/// register. It also returns the VT and quantity of the intermediate values +/// before they are promoted/expanded. +/// +unsigned TargetLowering::getVectorTypeBreakdown(MVT VT, + MVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + if (!isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + MVT DestVT = getRegisterType(NewVT); + RegisterVT = DestVT; + if (DestVT.bitsLT(NewVT)) { + // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + } else { + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; + } + + return 1; +} + +/// getWidenVectorType: given a vector type, returns the type to widen to +/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself. +/// If there is no vector type that we want to widen to, returns MVT::Other +/// When and where to widen is target dependent based on the cost of +/// scalarizing vs using the wider vector type. +MVT TargetLowering::getWidenVectorType(MVT VT) const { + assert(VT.isVector()); + if (isTypeLegal(VT)) + return VT; + + // Default is not to widen until moved to LegalizeTypes + return MVT::Other; +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. This is the actual +/// alignment, not its logarithm. +unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const { + return TD->getCallFrameTypeAlignment(Ty); +} + +SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + if (usesGlobalOffsetTable()) + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return Table; +} + +bool +TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // Assume that everything is safe in static mode. + if (getTargetMachine().getRelocationModel() == Reloc::Static) + return true; + + // In dynamic-no-pic mode, assume that known defined values are safe. + if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC && + GA && + !GA->getGlobal()->isDeclaration() && + !GA->getGlobal()->isWeakForLinker()) + return true; + + // Otherwise assume nothing is safe. + return false; +} + +//===----------------------------------------------------------------------===// +// Optimization Methods +//===----------------------------------------------------------------------===// + +/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// specified instruction is a constant integer. If so, check to see if there +/// are any bits set in the constant that are not demanded. If so, shrink the +/// constant and return true. +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, + const APInt &Demanded) { + DebugLoc dl = Op.getDebugLoc(); + + // FIXME: ISD::SELECT, ISD::SELECT_CC + switch (Op.getOpcode()) { + default: break; + case ISD::XOR: + case ISD::AND: + case ISD::OR: { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!C) return false; + + if (Op.getOpcode() == ISD::XOR && + (C->getAPIntValue() | (~Demanded)).isAllOnesValue()) + return false; + + // if we can expand it to have all bits set, do it + if (C->getAPIntValue().intersects(~Demanded)) { + MVT VT = Op.getValueType(); + SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), + DAG.getConstant(Demanded & + C->getAPIntValue(), + VT)); + return CombineTo(Op, New); + } + + break; + } + } + + return false; +} + +/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the +/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening +/// cast, but it could be generalized for targets with other types of +/// implicit widening casts. +bool +TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, + unsigned BitWidth, + const APInt &Demanded, + DebugLoc dl) { + assert(Op.getNumOperands() == 2 && + "ShrinkDemandedOp only supports binary operators!"); + assert(Op.getNode()->getNumValues() == 1 && + "ShrinkDemandedOp only supports nodes with one result!"); + + // Don't do this if the node has another user, which may require the + // full value. + if (!Op.getNode()->hasOneUse()) + return false; + + // Search for the smallest integer type with free casts to and from + // Op's type. For expedience, just check power-of-2 integer types. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros(); + if (!isPowerOf2_32(SmallVTBits)) + SmallVTBits = NextPowerOf2(SmallVTBits); + for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { + MVT SmallVT = MVT::getIntegerVT(SmallVTBits); + if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && + TLI.isZExtFree(SmallVT, Op.getValueType())) { + // We found a type with free casts. + SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT, + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(0)), + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(1))); + SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X); + return CombineTo(Op, Z); + } + } + return false; +} + +/// SimplifyDemandedBits - Look at Op. At this point, we know that only the +/// DemandedMask bits of the result of Op are ever used downstream. If we can +/// use this information to simplify Op, create a new simplified DAG node and +/// return true, returning the original and new nodes in Old and New. Otherwise, +/// analyze the expression and return a mask of KnownOne and KnownZero bits for +/// the expression (used to simplify the caller). The KnownZero/One bits may +/// only be accurate for those bits in the DemandedMask. +bool TargetLowering::SimplifyDemandedBits(SDValue Op, + const APInt &DemandedMask, + APInt &KnownZero, + APInt &KnownOne, + TargetLoweringOpt &TLO, + unsigned Depth) const { + unsigned BitWidth = DemandedMask.getBitWidth(); + assert(Op.getValueSizeInBits() == BitWidth && + "Mask size mismatches value type size!"); + APInt NewMask = DemandedMask; + DebugLoc dl = Op.getDebugLoc(); + + // Don't know anything. + KnownZero = KnownOne = APInt(BitWidth, 0); + + // Other users may use these bits. + if (!Op.getNode()->hasOneUse()) { + if (Depth != 0) { + // If not at the root, Just compute the KnownZero/KnownOne bits to + // simplify things downstream. + TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + return false; + } + // If this is the root being simplified, allow it to have multiple uses, + // just set the NewMask to all bits. + NewMask = APInt::getAllOnesValue(BitWidth); + } else if (DemandedMask == 0) { + // Not demanding any bits from Op. + if (Op.getOpcode() != ISD::UNDEF) + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); + return false; + } else if (Depth == 6) { // Limit search depth. + return false; + } + + APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask; + KnownZero = ~KnownOne & NewMask; + return false; // Don't fall through, will infinitely loop. + case ISD::AND: + // If the RHS is a constant, check to see if the LHS would be zero without + // using the bits from the RHS. Below, we use knowledge about the RHS to + // simplify the LHS, here we're using information from the LHS to simplify + // the RHS. + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + APInt LHSZero, LHSOne; + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, + LHSZero, LHSOne, Depth+1); + // If the LHS already has zeros where RHSC does, this and is dead. + if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + // If any of the set bits in the RHS are known zero on the LHS, shrink + // the constant. + if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) + return true; + } + + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known one on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((NewMask & (KnownZero|KnownZero2)) == NewMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + break; + case ISD::OR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + break; + case ISD::XOR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((KnownZero & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((KnownZero2 & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // If all of the unknown bits are known to be zero on one side or the other + // (but not both) turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if ((NewMask & ~KnownZero & ~KnownZero2) == 0) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), + Op.getOperand(0), + Op.getOperand(1))); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + + // If all of the demanded bits on one side are known, and all of the set + // bits on that side are also known to be set on the other side, turn this + // into an AND, as we know the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known + if ((KnownOne & KnownOne2) == KnownOne) { + MVT VT = Op.getValueType(); + SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, + Op.getOperand(0), ANDC)); + } + } + + // If the RHS is a constant, see if we can simplify it. + // for XOR, we prefer to force bits to 1 if they will make a -1. + // if we can't force bits, try to shrink constant + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + APInt Expanded = C->getAPIntValue() | (~NewMask); + // if we can expand it to have all bits set, do it + if (Expanded.isAllOnesValue()) { + if (Expanded != C->getAPIntValue()) { + MVT VT = Op.getValueType(); + SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), + TLO.DAG.getConstant(Expanded, VT)); + return TLO.CombineTo(Op, New); + } + // if it already has all the bits set, nothing to change + // but don't shrink either! + } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { + return true; + } + } + + KnownZero = KnownZeroOut; + KnownOne = KnownOneOut; + break; + case ISD::SELECT: + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SELECT_CC: + if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SHL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a + // single shift. We can do this if the bottom bits (which are shifted + // out) are never demanded. + if (InOp.getOpcode() == ISD::SRL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + MVT VT = Op.getValueType(); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt), + KnownZero, KnownOne, TLO, Depth+1)) + return true; + KnownZero <<= SA->getZExtValue(); + KnownOne <<= SA->getZExtValue(); + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue()); + } + break; + case ISD::SRL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + MVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + unsigned VTSize = VT.getSizeInBits(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a + // single shift. We can do this if the top bits (which are shifted out) + // are never demanded. + if (InOp.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) { + unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + // Compute the new bits that are at the top now. + if (SimplifyDemandedBits(InOp, (NewMask << ShAmt), + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + KnownZero |= HighBits; // High bits known zero. + } + break; + case ISD::SRA: + // If this is an arithmetic shift right and only the low-bit is set, we can + // always convert this into a logical shr, even if the shift amount is + // variable. The low bit of the shift cannot be an input sign bit unless + // the shift amount is >= the size of the datatype, which is undefined. + if (DemandedMask == 1) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1))); + + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + MVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + APInt InDemandedMask = (NewMask << ShAmt); + + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + if (HighBits.intersects(NewMask)) + InDemandedMask |= APInt::getSignBit(VT.getSizeInBits()); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bit, adjusted to where it is now in the mask. + APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + Op.getOperand(0), + Op.getOperand(1))); + } else if (KnownOne.intersects(SignBit)) { // New bits are known one. + KnownOne |= HighBits; + } + } + break; + case ISD::SIGN_EXTEND_INREG: { + MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getSizeInBits()) & + NewMask; + + // If none of the extended bits are demanded, eliminate the sextinreg. + if (NewBits == 0) + return TLO.CombineTo(Op, Op.getOperand(0)); + + APInt InSignBit = APInt::getSignBit(EVT.getSizeInBits()); + InSignBit.zext(BitWidth); + APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, + EVT.getSizeInBits()) & + NewMask; + + // Since the sign extended bits are demanded, we know that the sign + // bit is demanded. + InputDemandedBits |= InSignBit; + + if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, convert this into a zero extension. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, + TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); + + if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + break; + } + case ISD::ZERO_EXTEND: { + unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); + APInt InMask = NewMask; + InMask.trunc(OperandBitWidth); + + // If none of the top bits are demanded, convert this into an any_extend. + APInt NewBits = + APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; + if (!NewBits.intersects(NewMask)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + KnownZero |= NewBits; + break; + } + case ISD::SIGN_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); + APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); + APInt NewBits = ~InMask & NewMask; + + // If none of the top bits are demanded, convert this into an any_extend. + if (NewBits == 0) + return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // Since some of the sign extended bits are demanded, we know that the sign + // bit is demanded. + APInt InDemandedBits = InMask & NewMask; + InDemandedBits |= InSignBit; + InDemandedBits.trunc(InBits); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + + // If the sign bit is known zero, convert this to a zero extend. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // If the sign bit is known one, the top bits match. + if (KnownOne.intersects(InSignBit)) { + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Otherwise, top bits aren't known. + KnownOne &= ~NewBits; + KnownZero &= ~NewBits; + } + break; + } + case ISD::ANY_EXTEND: { + unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); + APInt InMask = NewMask; + InMask.trunc(OperandBitWidth); + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + break; + } + case ISD::TRUNCATE: { + // Simplify the input, using demanded bit information, and compute the known + // zero/one bits live out. + APInt TruncMask = NewMask; + TruncMask.zext(Op.getOperand(0).getValueSizeInBits()); + if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + KnownZero.trunc(BitWidth); + KnownOne.trunc(BitWidth); + + // If the input is only used by this truncate, see if we can shrink it based + // on the known demanded bits. + if (Op.getOperand(0).getNode()->hasOneUse()) { + SDValue In = Op.getOperand(0); + unsigned InBitWidth = In.getValueSizeInBits(); + switch (In.getOpcode()) { + default: break; + case ISD::SRL: + // Shrink SRL by a constant if none of the high bits shifted in are + // demanded. + if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){ + APInt HighBits = APInt::getHighBitsSet(InBitWidth, + InBitWidth - BitWidth); + HighBits = HighBits.lshr(ShAmt->getZExtValue()); + HighBits.trunc(BitWidth); + + if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, + Op.getValueType(), + NewTrunc, + In.getOperand(1))); + } + } + break; + } + } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + break; + } + case ISD::AssertZext: { + MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); + if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero |= ~InMask & NewMask; + break; + } + case ISD::BIT_CONVERT: +#if 0 + // If this is an FP->Int bitcast and if the sign bit is the only thing that + // is demanded, turn this into a FGETSIGN. + if (NewMask == MVT::getIntegerVTSignBit(Op.getValueType()) && + MVT::isFloatingPoint(Op.getOperand(0).getValueType()) && + !MVT::isVector(Op.getOperand(0).getValueType())) { + // Only do this xform if FGETSIGN is valid or if before legalize. + if (!TLO.AfterLegalize || + isOperationLegal(ISD::FGETSIGN, Op.getValueType())) { + // Make a FGETSIGN + SHL to move the sign bit into the appropriate + // place. We expect the SHL to be eliminated by other optimizations. + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), + Op.getOperand(0)); + unsigned ShVal = Op.getValueType().getSizeInBits()-1; + SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(), + Sign, ShAmt)); + } + } +#endif + break; + case ISD::ADD: + case ISD::MUL: + case ISD::SUB: { + // Add, Sub, and Mul don't demand any bits in positions beyond that + // of the highest bit demanded of them. + APInt LoMask = APInt::getLowBitsSet(BitWidth, + BitWidth - NewMask.countLeadingZeros()); + if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + // See if the operation should be performed at a smaller bit width. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + } + // FALL THROUGH + default: + // Just use ComputeMaskedBits to compute output bits. + TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); + break; + } + + // If we know the value of all of the demanded bits, return this as a + // constant. + if ((NewMask & (KnownZero|KnownOne)) == NewMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); + + return false; +} + +/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// in Mask are known to be either zero or one and return them in the +/// KnownZero/KnownOne bitsets. +void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); +} + +/// ComputeNumSignBitsForTargetNode - This method can be implemented by +/// targets that want to expose additional information about sign bits to the +/// DAG Combiner. +unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use ComputeNumSignBits if you don't know whether Op" + " is a target node!"); + return 1; +} + +/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly +/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to +/// determine which bit is set. +/// +static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { + // A left-shift of a constant one will have exactly one bit set, because + // shifting the bit off the end is undefined. + if (Val.getOpcode() == ISD::SHL) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) + if (C->getAPIntValue() == 1) + return true; + + // Similarly, a right-shift of a constant sign-bit will have exactly + // one bit set. + if (Val.getOpcode() == ISD::SRL) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) + if (C->getAPIntValue().isSignBit()) + return true; + + // More could be done here, though the above checks are enough + // to handle some common cases. + + // Fall back to ComputeMaskedBits to catch other known cases. + MVT OpVT = Val.getValueType(); + unsigned BitWidth = OpVT.getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); + return (KnownZero.countPopulation() == BitWidth - 1) && + (KnownOne.countPopulation() == 1); +} + +/// SimplifySetCC - Try to simplify a setcc built with the specified operands +/// and cc. If it is unable to simplify it, return a null SDValue. +SDValue +TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, bool foldBooleans, + DAGCombinerInfo &DCI, DebugLoc dl) const { + SelectionDAG &DAG = DCI.DAG; + + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return DAG.getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return DAG.getConstant(1, VT); + } + + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + if (isa<ConstantSDNode>(N0.getNode())) { + return DAG.FoldSetCC(VT, N0, N1, Cond, dl); + } else { + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an + // equality comparison, then we're just comparing whether X itself is + // zero. + if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + N0.getOperand(0).getOpcode() == ISD::CTLZ && + N0.getOperand(1).getOpcode() == ISD::Constant) { + unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; + } + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), + Zero, Cond); + } + } + + // If the LHS is '(and load, const)', the RHS is 0, + // the test is for equality or unsigned, and all 1 bits of the const are + // in the same partial word, see if we can shorten the load. + if (DCI.isBeforeLegalize() && + N0.getOpcode() == ISD::AND && C1 == 0 && + N0.getNode()->hasOneUse() && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(0).getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); + uint64_t bestMask = 0; + unsigned bestWidth = 0, bestOffset = 0; + if (!Lod->isVolatile() && Lod->isUnindexed() && + // FIXME: This uses getZExtValue() below so it only works on i64 and + // below. + N0.getValueType().getSizeInBits() <= 64) { + unsigned origWidth = N0.getValueType().getSizeInBits(); + // We can narrow (e.g.) 16-bit extending loads on 32-bit target to + // 8 bits, but have to be careful... + if (Lod->getExtensionType() != ISD::NON_EXTLOAD) + origWidth = Lod->getMemoryVT().getSizeInBits(); + uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + for (unsigned width = origWidth / 2; width>=8; width /= 2) { + uint64_t newMask = (1ULL << width) - 1; + for (unsigned offset=0; offset<origWidth/width; offset++) { + if ((newMask & Mask) == Mask) { + if (!TD->isLittleEndian()) + bestOffset = (origWidth/width - offset - 1) * (width/8); + else + bestOffset = (uint64_t)offset * (width/8); + bestMask = Mask >> (offset * (width/8) * 8); + bestWidth = width; + break; + } + newMask = newMask << width; + } + } + } + if (bestWidth) { + MVT newVT = MVT::getIntegerVT(bestWidth); + if (newVT.isRound()) { + MVT PtrType = Lod->getOperand(1).getValueType(); + SDValue Ptr = Lod->getBasePtr(); + if (bestOffset != 0) + Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), + DAG.getConstant(bestOffset, PtrType)); + unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); + SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getSrcValue(), + Lod->getSrcValueOffset() + bestOffset, + false, NewAlign); + return DAG.getSetCC(dl, VT, + DAG.getNode(ISD::AND, dl, newVT, NewLoad, + DAG.getConstant(bestMask, newVT)), + DAG.getConstant(0LL, newVT), Cond); + } + } + } + + // If the LHS is a ZERO_EXTEND, perform the comparison on the input. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); + + // If the comparison constant has bits in the upper part, the + // zero-extended value could never match. + if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), + C1.getBitWidth() - InSize))) { + switch (Cond) { + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETEQ: return DAG.getConstant(0, VT); + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETNE: return DAG.getConstant(1, VT); + case ISD::SETGT: + case ISD::SETGE: + // True if the sign bit of C1 is set. + return DAG.getConstant(C1.isNegative(), VT); + case ISD::SETLT: + case ISD::SETLE: + // True if the sign bit of C1 isn't set. + return DAG.getConstant(C1.isNonNegative(), VT); + default: + break; + } + } + + // Otherwise, we can perform the comparison with the low bits. + switch (Cond) { + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(APInt(C1).trunc(InSize), + N0.getOperand(0).getValueType()), + Cond); + default: + break; // todo, be more careful with signed comparisons + } + } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + MVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); + unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); + MVT ExtDstTy = N0.getValueType(); + unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); + + // If the extended part has any inconsistent bits, it cannot ever + // compare equal. In other words, they have to be all ones or all + // zeros. + APInt ExtBits = + APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits); + if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits) + return DAG.getConstant(Cond == ISD::SETNE, VT); + + SDValue ZextOp; + MVT Op0Ty = N0.getOperand(0).getValueType(); + if (Op0Ty == ExtSrcTy) { + ZextOp = N0.getOperand(0); + } else { + APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); + ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), + DAG.getConstant(Imm, Op0Ty)); + } + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(ZextOp.getNode()); + // Otherwise, make this a use of a zext. + return DAG.getSetCC(dl, VT, ZextOp, + DAG.getConstant(C1 & APInt::getLowBitsSet( + ExtDstTyBits, + ExtSrcTyBits), + ExtDstTy), + Cond); + } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + + // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC + if (N0.getOpcode() == ISD::SETCC) { + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1); + if (TrueWhenTrue) + return N0; + + // Invert the condition. + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + CC = ISD::getSetCCInverse(CC, + N0.getOperand(0).getValueType().isInteger()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + } + + if ((N0.getOpcode() == ISD::XOR || + (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR && + N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && + isa<ConstantSDNode>(N0.getOperand(1)) && + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We + // can only do this if the top bits are known zero. + unsigned BitWidth = N0.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N0, + APInt::getHighBitsSet(BitWidth, + BitWidth-1))) { + // Okay, get the un-inverted input value. + SDValue Val; + if (N0.getOpcode() == ISD::XOR) + Val = N0.getOperand(0); + else { + assert(N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR); + // ((X^1)&1)^1 -> X & 1 + Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), + N0.getOperand(0).getOperand(0), + N0.getOperand(1)); + } + return DAG.getSetCC(dl, VT, Val, N1, + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } + } + + APInt MinVal, MaxVal; + unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + if (ISD::isSignedIntSetCC(Cond)) { + MinVal = APInt::getSignedMinValue(OperandBitSize); + MaxVal = APInt::getSignedMaxValue(OperandBitSize); + } else { + MinVal = APInt::getMinValue(OperandBitSize); + MaxVal = APInt::getMaxValue(OperandBitSize); + } + + // Canonicalize GE/LE comparisons to use GT/LT comparisons. + if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { + if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true + // X >= C0 --> X > (C0-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1-1, N1.getValueType()), + (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + } + + if (Cond == ISD::SETLE || Cond == ISD::SETULE) { + if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true + // X <= C0 --> X < (C0+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1+1, N1.getValueType()), + (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + } + + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) + return DAG.getConstant(0, VT); // X < MIN --> false + if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) + return DAG.getConstant(1, VT); // X >= MIN --> true + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) + return DAG.getConstant(0, VT); // X > MAX --> false + if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) + return DAG.getConstant(1, VT); // X <= MAX --> true + + // Canonicalize setgt X, Min --> setne X, Min + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + // Canonicalize setlt X, Max --> setne X, Max + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, N0.getValueType()), + ISD::SETEQ); + // If we have setugt X, Max-1, turn it into seteq X, Max + else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, N0.getValueType()), + ISD::SETEQ); + + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } + + // Fold bit comparisons when we can. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + VT == N0.getValueType() && N0.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = + dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + MVT ShiftTy = DCI.isBeforeLegalize() ? + getPointerTy() : getShiftAmountTy(); + if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 + // Perform the xform if the AND RHS is a single bit. + if (isPowerOf2_64(AndRHS->getZExtValue())) { + return DAG.getNode(ISD::SRL, dl, VT, N0, + DAG.getConstant(Log2_64(AndRHS->getZExtValue()), + ShiftTy)); + } + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) { + // (X & 8) == 8 --> (X & 8) >> 3 + // Perform the xform if C1 is a single bit. + if (C1.isPowerOf2()) { + return DAG.getNode(ISD::SRL, dl, VT, N0, + DAG.getConstant(C1.logBase2(), ShiftTy)); + } + } + } + } + } else if (isa<ConstantSDNode>(N0.getNode())) { + // Ensure that the constant occurs on the RHS. + return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + } + + if (isa<ConstantFPSDNode>(N0.getNode())) { + // Constant fold or commute setcc. + SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl); + if (O.getNode()) return O; + } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) { + // If the RHS of an FP comparison is a constant, simplify it away in + // some cases. + if (CFP->getValueAPF().isNaN()) { + // If an operand is known to be a nan, we can fold it. + switch (ISD::getUnorderedFlavor(Cond)) { + default: assert(0 && "Unknown flavor!"); + case 0: // Known false. + return DAG.getConstant(0, VT); + case 1: // Known true. + return DAG.getConstant(1, VT); + case 2: // Undefined. + return DAG.getUNDEF(VT); + } + } + + // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the + // constant if knowing that the operand is non-nan is enough. We prefer to + // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to + // materialize 0.0. + if (Cond == ISD::SETO || Cond == ISD::SETUO) + return DAG.getSetCC(dl, VT, N0, N0, Cond); + } + + if (N0 == N1) { + // We can always fold X == X for integer setcc's. + if (N0.getValueType().isInteger()) + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + unsigned UOF = ISD::getUnorderedFlavor(Cond); + if (UOF == 2) // FP operators that are undefined on NaNs. + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) + return DAG.getConstant(UOF, VT); + // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO + // if it is not already. + ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; + if (NewCond != Cond) + return DAG.getSetCC(dl, VT, N0, N1, NewCond); + } + + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getValueType().isInteger()) { + if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || + N0.getOpcode() == ISD::XOR) { + // Simplify (X+Y) == (X+Z) --> Y == Z + if (N0.getOpcode() == N1.getOpcode()) { + if (N0.getOperand(0) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); + if (N0.getOperand(1) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); + if (DAG.isCommutativeBinOp(N0.getOpcode())) { + // If X op Y == Y op X, try other combinations. + if (N0.getOperand(0) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), + Cond); + if (N0.getOperand(1) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), + Cond); + } + } + + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { + if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + // Turn (X+C1) == C2 --> X == C2-C1 + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(RHSC->getAPIntValue()- + LHSR->getAPIntValue(), + N0.getValueType()), Cond); + } + + // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. + if (N0.getOpcode() == ISD::XOR) + // If we know that all of the inverted bits are zero, don't bother + // performing the inversion. + if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) + return + DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(LHSR->getAPIntValue() ^ + RHSC->getAPIntValue(), + N0.getValueType()), + Cond); + } + + // Turn (C1-X) == C2 --> X == C1-C2 + if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { + if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { + return + DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(SUBC->getAPIntValue() - + RHSC->getAPIntValue(), + N0.getValueType()), + Cond); + } + } + } + + // Simplify (X+Z) == X --> Z == 0 + if (N0.getOperand(0) == N1) + return DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, N0.getValueType()), Cond); + else if (N0.getNode()->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + // (Z-X) == X --> Z == X<<1 + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), + N1, + DAG.getConstant(1, getShiftAmountTy())); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + } + } + } + + if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || + N1.getOpcode() == ISD::XOR) { + // Simplify X == (X+Z) --> Z == 0 + if (N1.getOperand(0) == N0) { + return DAG.getSetCC(dl, VT, N1.getOperand(1), + DAG.getConstant(0, N1.getValueType()), Cond); + } else if (N1.getOperand(1) == N0) { + if (DAG.isCommutativeBinOp(N1.getOpcode())) { + return DAG.getSetCC(dl, VT, N1.getOperand(0), + DAG.getConstant(0, N1.getValueType()), Cond); + } else if (N1.getNode()->hasOneUse()) { + assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); + // X == (Z-X) --> X<<1 == Z + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, + DAG.getConstant(1, getShiftAmountTy())); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); + } + } + } + + // Simplify x&y == y to x&y != 0 if y has exactly one bit set. + // Note that where y is variable and is known to have at most + // one bit set (for example, if it is z&1) we cannot do this; + // the expressions are not equivalent when y==0. + if (N0.getOpcode() == ISD::AND) + if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { + if (ValueHasExactlyOneBitSet(N1, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + SDValue Zero = DAG.getConstant(0, N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, Zero, Cond); + } + } + if (N1.getOpcode() == ISD::AND) + if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { + if (ValueHasExactlyOneBitSet(N0, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N1, Zero, Cond); + } + } + } + + // Fold away ALL boolean setcc's. + SDValue Temp; + if (N0.getValueType() == MVT::i1 && foldBooleans) { + switch (Cond) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETEQ: // X == Y -> ~(X^Y) + Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + N0 = DAG.getNOT(dl, Temp, MVT::i1); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETNE: // X != Y --> (X^Y) + N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + break; + case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y + case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X + case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y + case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X + case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp); + break; + } + if (VT != MVT::i1) { + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(N0.getNode()); + // FIXME: If running after legalize, we probably can't do this. + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0); + } + return N0; + } + + // Could not fold it. + return SDValue(); +} + +/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the +/// node is a GlobalAddress + offset. +bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, + int64_t &Offset) const { + if (isa<GlobalAddressSDNode>(N)) { + GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); + GA = GASD->getGlobal(); + Offset += GASD->getOffset(); + return true; + } + + if (N->getOpcode() == ISD::ADD) { + SDValue N1 = N->getOperand(0); + SDValue N2 = N->getOperand(1); + if (isGAPlusOffset(N1.getNode(), GA, Offset)) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); + if (V) { + Offset += V->getSExtValue(); + return true; + } + } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); + if (V) { + Offset += V->getSExtValue(); + return true; + } + } + } + return false; +} + + +/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is +/// loading 'Bytes' bytes from a location that is 'Dist' units away from the +/// location that the 'Base' load is loading from. +bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, + unsigned Bytes, int Dist, + const MachineFrameInfo *MFI) const { + if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode()) + return false; + MVT VT = LD->getValueType(0); + if (VT.getSizeInBits() / 8 != Bytes) + return false; + + SDValue Loc = LD->getOperand(1); + SDValue BaseLoc = Base->getOperand(1); + if (Loc.getOpcode() == ISD::FrameIndex) { + if (BaseLoc.getOpcode() != ISD::FrameIndex) + return false; + int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); + int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); + int FS = MFI->getObjectSize(FI); + int BFS = MFI->getObjectSize(BFI); + if (FS != BFS || FS != (int)Bytes) return false; + return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); + } + + GlobalValue *GV1 = NULL; + GlobalValue *GV2 = NULL; + int64_t Offset1 = 0; + int64_t Offset2 = 0; + bool isGA1 = isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + if (isGA1 && isGA2 && GV1 == GV2) + return Offset1 == (Offset2 + Dist*Bytes); + return false; +} + + +SDValue TargetLowering:: +PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + // Default implementation: no optimization. + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembler Implementation Methods +//===----------------------------------------------------------------------===// + + +TargetLowering::ConstraintType +TargetLowering::getConstraintType(const std::string &Constraint) const { + // FIXME: lots more standard ones to handle. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'r': return C_RegisterClass; + case 'm': // memory + case 'o': // offsetable + case 'V': // not offsetable + return C_Memory; + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': // Relocatable Constant + case 'X': // Allow ANY value. + case 'I': // Target registers. + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + return C_Other; + } + } + + if (Constraint.size() > 1 && Constraint[0] == '{' && + Constraint[Constraint.size()-1] == '}') + return C_Register; + return C_Unknown; +} + +/// LowerXConstraint - try to replace an X constraint, which matches anything, +/// with another that has more specific requirements based on the type of the +/// corresponding operand. +const char *TargetLowering::LowerXConstraint(MVT ConstraintVT) const{ + if (ConstraintVT.isInteger()) + return "r"; + if (ConstraintVT.isFloatingPoint()) + return "f"; // works for many targets + return 0; +} + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + char ConstraintLetter, + bool hasMemory, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + switch (ConstraintLetter) { + default: break; + case 'X': // Allows any operand; labels (basic block) use this. + if (Op.getOpcode() == ISD::BasicBlock) { + Ops.push_back(Op); + return; + } + // fall through + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': { // Relocatable Constant + // These operands are interested in values of the form (GV+C), where C may + // be folded in as an offset of GV, or it may be explicitly added. Also, it + // is possible and fine if either GV or C are missing. + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); + + // If we have "(add GV, C)", pull out GV/C + if (Op.getOpcode() == ISD::ADD) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); + if (C == 0 || GA == 0) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); + } + if (C == 0 || GA == 0) + C = 0, GA = 0; + } + + // If we find a valid operand, map to the TargetXXX version so that the + // value itself doesn't get selected. + if (GA) { // Either &GV or &GV+C + if (ConstraintLetter != 'n') { + int64_t Offs = GA->getOffset(); + if (C) Offs += C->getZExtValue(); + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + Op.getValueType(), Offs)); + return; + } + } + if (C) { // just C, no GV. + // Simple constants are not allowed for 's'. + if (ConstraintLetter != 's') { + // gcc prints these as sign extended. Sign extend value to 64 bits + // now; without this it would get ZExt'd later in + // ScheduleDAGSDNodes::EmitNode, which is very generic. + Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), + MVT::i64)); + return; + } + } + break; + } + } +} + +std::vector<unsigned> TargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + return std::vector<unsigned>(); +} + + +std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint[0] != '{') + return std::pair<unsigned, const TargetRegisterClass*>(0, 0); + assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); + + // Remove the braces from around the name. + std::string RegName(Constraint.begin()+1, Constraint.end()-1); + + // Figure out which register class contains this reg. + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), + E = RI->regclass_end(); RCI != E; ++RCI) { + const TargetRegisterClass *RC = *RCI; + + // If none of the the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + bool isLegal = false; + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) { + isLegal = true; + break; + } + } + + if (!isLegal) continue; + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + if (StringsEqualNoCase(RegName, RI->get(*I).AsmName)) + return std::make_pair(*I, RC); + } + } + + return std::pair<unsigned, const TargetRegisterClass*>(0, 0); +} + +//===----------------------------------------------------------------------===// +// Constraint Selection. + +/// isMatchingInputConstraint - Return true of this is an input operand that is +/// a matching constraint like "4". +bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return isdigit(ConstraintCode[0]); +} + +/// getMatchedOperand - If this is an input matching constraint, this method +/// returns the output operand it matches. +unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return atoi(ConstraintCode.c_str()); +} + + +/// getConstraintGenerality - Return an integer indicating how general CT +/// is. +static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { + switch (CT) { + default: assert(0 && "Unknown constraint type!"); + case TargetLowering::C_Other: + case TargetLowering::C_Unknown: + return 0; + case TargetLowering::C_Register: + return 1; + case TargetLowering::C_RegisterClass: + return 2; + case TargetLowering::C_Memory: + return 3; + } +} + +/// ChooseConstraint - If there are multiple different constraints that we +/// could pick for this operand (e.g. "imr") try to pick the 'best' one. +/// This is somewhat tricky: constraints fall into four classes: +/// Other -> immediates and magic values +/// Register -> one specific register +/// RegisterClass -> a group of regs +/// Memory -> memory +/// Ideally, we would pick the most specific constraint possible: if we have +/// something that fits into a register, we would pick it. The problem here +/// is that if we have something that could either be in a register or in +/// memory that use of the register could cause selection of *other* +/// operands to fail: they might only succeed if we pick memory. Because of +/// this the heuristic we use is: +/// +/// 1) If there is an 'other' constraint, and if the operand is valid for +/// that constraint, use it. This makes us take advantage of 'i' +/// constraints when available. +/// 2) Otherwise, pick the most general constraint present. This prefers +/// 'm' over 'r', for example. +/// +static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, + bool hasMemory, const TargetLowering &TLI, + SDValue Op, SelectionDAG *DAG) { + assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); + unsigned BestIdx = 0; + TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; + int BestGenerality = -1; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { + TargetLowering::ConstraintType CType = + TLI.getConstraintType(OpInfo.Codes[i]); + + // If this is an 'other' constraint, see if the operand is valid for it. + // For example, on X86 we might have an 'rI' constraint. If the operand + // is an integer in the range [0..31] we want to use I (saving a load + // of a register), otherwise we must use 'r'. + if (CType == TargetLowering::C_Other && Op.getNode()) { + assert(OpInfo.Codes[i].size() == 1 && + "Unhandled multi-letter 'other' constraint"); + std::vector<SDValue> ResultOps; + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory, + ResultOps, *DAG); + if (!ResultOps.empty()) { + BestType = CType; + BestIdx = i; + break; + } + } + + // This constraint letter is more general than the previous one, use it. + int Generality = getConstraintGenerality(CType); + if (Generality > BestGenerality) { + BestType = CType; + BestIdx = i; + BestGenerality = Generality; + } + } + + OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; + OpInfo.ConstraintType = BestType; +} + +/// ComputeConstraintToUse - Determines the constraint code and constraint +/// type to use for the specific AsmOperandInfo, setting +/// OpInfo.ConstraintCode and OpInfo.ConstraintType. +void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, + SDValue Op, + bool hasMemory, + SelectionDAG *DAG) const { + assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); + + // Single-letter constraints ('r') are very common. + if (OpInfo.Codes.size() == 1) { + OpInfo.ConstraintCode = OpInfo.Codes[0]; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } else { + ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG); + } + + // 'X' matches anything. + if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { + // Labels and constants are handled elsewhere ('X' is the only thing + // that matches labels). + if (isa<BasicBlock>(OpInfo.CallOperandVal) || + isa<ConstantInt>(OpInfo.CallOperandVal)) + return; + + // Otherwise, try to resolve it to something we know about by looking at + // the actual operand type. + if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { + OpInfo.ConstraintCode = Repl; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } + } +} + +//===----------------------------------------------------------------------===// +// Loop Strength Reduction hooks +//===----------------------------------------------------------------------===// + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + // The default implementation of this implements a conservative RISCy, r+r and + // r+i addr mode. + + // Allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + } + + return true; +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, + std::vector<SDNode*>* Created) const { + MVT VT = N->getValueType(0); + DebugLoc dl= N->getDebugLoc(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + APInt::ms magics = d.magic(); + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + SDValue Q; + if (isOperationLegalOrCustom(ISD::MULHS, VT)) + Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, VT)).getNode(), 1); + else + return SDValue(); // No mulhs or equvialent + // If d > 0 and m < 0, add the numerator + if (d.isStrictlyPositive() && magics.m.isNegative()) { + Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.getNode()); + } + // If d < 0 and m > 0, subtract the numerator. + if (d.isNegative() && magics.m.isStrictlyPositive()) { + Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.getNode()); + } + // Shift right algebraic if shift value is nonzero + if (magics.s > 0) { + Q = DAG.getNode(ISD::SRA, dl, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy())); + if (Created) + Created->push_back(Q.getNode()); + } + // Extract the sign bit and add it to the quotient + SDValue T = + DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1, + getShiftAmountTy())); + if (Created) + Created->push_back(T.getNode()); + return DAG.getNode(ISD::ADD, dl, VT, Q, T); +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, + std::vector<SDNode*>* Created) const { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + // FIXME: We should use a narrower constant when the upper + // bits are known to be zero. + ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1)); + APInt::mu magics = N1C->getAPIntValue().magicu(); + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + SDValue Q; + if (isOperationLegalOrCustom(ISD::MULHU, VT)) + Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, VT)).getNode(), 1); + else + return SDValue(); // No mulhu or equvialent + if (Created) + Created->push_back(Q.getNode()); + + if (magics.a == 0) { + assert(magics.s < N1C->getAPIntValue().getBitWidth() && + "We shouldn't generate an undefined shift!"); + return DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy())); + } else { + SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); + if (Created) + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, + DAG.getConstant(1, getShiftAmountTy())); + if (Created) + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); + if (Created) + Created->push_back(NPQ.getNode()); + return DAG.getNode(ISD::SRL, dl, VT, NPQ, + DAG.getConstant(magics.s-1, getShiftAmountTy())); + } +} + +/// IgnoreHarmlessInstructions - Ignore instructions between a CALL and RET +/// node that don't prevent tail call optimization. +static SDValue IgnoreHarmlessInstructions(SDValue node) { + // Found call return. + if (node.getOpcode() == ISD::CALL) return node; + // Ignore MERGE_VALUES. Will have at least one operand. + if (node.getOpcode() == ISD::MERGE_VALUES) + return IgnoreHarmlessInstructions(node.getOperand(0)); + // Ignore ANY_EXTEND node. + if (node.getOpcode() == ISD::ANY_EXTEND) + return IgnoreHarmlessInstructions(node.getOperand(0)); + if (node.getOpcode() == ISD::TRUNCATE) + return IgnoreHarmlessInstructions(node.getOperand(0)); + // Any other node type. + return node; +} + +bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall, + SDValue Ret) { + unsigned NumOps = Ret.getNumOperands(); + // ISD::CALL results:(value0, ..., valuen, chain) + // ISD::RET operands:(chain, value0, flag0, ..., valuen, flagn) + // Value return: + // Check that operand of the RET node sources from the CALL node. The RET node + // has at least two operands. Operand 0 holds the chain. Operand 1 holds the + // value. + if (NumOps > 1 && + IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0)) + return true; + // void return: The RET node has the chain result value of the CALL node as + // input. + if (NumOps == 1 && + Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1)) + return true; + + return false; +} diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp new file mode 100644 index 0000000..2402f81 --- /dev/null +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -0,0 +1,439 @@ +//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics for targets that do +// not natively support them (which includes the C backend). Note that the code +// generated is not quite as efficient as algorithms which generate stack maps +// to identify roots. +// +// This pass implements the code transformation described in this paper: +// "Accurate Garbage Collection in an Uncooperative Environment" +// Fergus Henderson, ISMM, 2002 +// +// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with +// ShadowStackGC. +// +// In order to support this particular transformation, all stack roots are +// coallocated in the stack. This allows a fully target-independent stack map +// while introducing only minor runtime overhead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "shadowstackgc" +#include "llvm/CodeGen/GCs.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/IRBuilder.h" + +using namespace llvm; + +namespace { + + class VISIBILITY_HIDDEN ShadowStackGC : public GCStrategy { + /// RootChain - This is the global linked-list that contains the chain of GC + /// roots. + GlobalVariable *Head; + + /// StackEntryTy - Abstract type of a link in the shadow stack. + /// + const StructType *StackEntryTy; + + /// Roots - GC roots in the current function. Each is a pair of the + /// intrinsic call and its corresponding alloca. + std::vector<std::pair<CallInst*,AllocaInst*> > Roots; + + public: + ShadowStackGC(); + + bool initializeCustomLowering(Module &M); + bool performCustomLowering(Function &F); + + private: + bool IsNullValue(Value *V); + Constant *GetFrameMap(Function &F); + const Type* GetConcreteStackEntryType(Function &F); + void CollectRoots(Function &F); + static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr, + int Idx1, const char *Name); + static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr, + int Idx1, int Idx2, const char *Name); + }; + +} + +static GCRegistry::Add<ShadowStackGC> +X("shadow-stack", "Very portable GC for uncooperative code generators"); + +namespace { + /// EscapeEnumerator - This is a little algorithm to find all escape points + /// from a function so that "finally"-style code can be inserted. In addition + /// to finding the existing return and unwind instructions, it also (if + /// necessary) transforms any call instructions into invokes and sends them to + /// a landing pad. + /// + /// It's wrapped up in a state machine using the same transform C# uses for + /// 'yield return' enumerators, This transform allows it to be non-allocating. + class VISIBILITY_HIDDEN EscapeEnumerator { + Function &F; + const char *CleanupBBName; + + // State. + int State; + Function::iterator StateBB, StateE; + IRBuilder<> Builder; + + public: + EscapeEnumerator(Function &F, const char *N = "cleanup") + : F(F), CleanupBBName(N), State(0) {} + + IRBuilder<> *Next() { + switch (State) { + default: + return 0; + + case 0: + StateBB = F.begin(); + StateE = F.end(); + State = 1; + + case 1: + // Find all 'return' and 'unwind' instructions. + while (StateBB != StateE) { + BasicBlock *CurBB = StateBB++; + + // Branches and invokes do not escape, only unwind and return do. + TerminatorInst *TI = CurBB->getTerminator(); + if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI)) + continue; + + Builder.SetInsertPoint(TI->getParent(), TI); + return &Builder; + } + + State = 2; + + // Find all 'call' instructions. + SmallVector<Instruction*,16> Calls; + for (Function::iterator BB = F.begin(), + E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), + EE = BB->end(); II != EE; ++II) + if (CallInst *CI = dyn_cast<CallInst>(II)) + if (!CI->getCalledFunction() || + !CI->getCalledFunction()->getIntrinsicID()) + Calls.push_back(CI); + + if (Calls.empty()) + return 0; + + // Create a cleanup block. + BasicBlock *CleanupBB = BasicBlock::Create(CleanupBBName, &F); + UnwindInst *UI = new UnwindInst(CleanupBB); + + // Transform the 'call' instructions into 'invoke's branching to the + // cleanup block. Go in reverse order to make prettier BB names. + SmallVector<Value*,16> Args; + for (unsigned I = Calls.size(); I != 0; ) { + CallInst *CI = cast<CallInst>(Calls[--I]); + + // Split the basic block containing the function call. + BasicBlock *CallBB = CI->getParent(); + BasicBlock *NewBB = + CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); + + // Remove the unconditional branch inserted at the end of CallBB. + CallBB->getInstList().pop_back(); + NewBB->getInstList().remove(CI); + + // Create a new invoke instruction. + Args.clear(); + Args.append(CI->op_begin() + 1, CI->op_end()); + + InvokeInst *II = InvokeInst::Create(CI->getOperand(0), + NewBB, CleanupBB, + Args.begin(), Args.end(), + CI->getName(), CallBB); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + CI->replaceAllUsesWith(II); + delete CI; + } + + Builder.SetInsertPoint(UI->getParent(), UI); + return &Builder; + } + } + }; +} + +// ----------------------------------------------------------------------------- + +void llvm::linkShadowStackGC() { } + +ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) { + InitRoots = true; + CustomRoots = true; +} + +Constant *ShadowStackGC::GetFrameMap(Function &F) { + // doInitialization creates the abstract type of this value. + + Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty); + + // Truncate the ShadowStackDescriptor if some metadata is null. + unsigned NumMeta = 0; + SmallVector<Constant*,16> Metadata; + for (unsigned I = 0; I != Roots.size(); ++I) { + Constant *C = cast<Constant>(Roots[I].first->getOperand(2)); + if (!C->isNullValue()) + NumMeta = I + 1; + Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); + } + + Constant *BaseElts[] = { + ConstantInt::get(Type::Int32Ty, Roots.size(), false), + ConstantInt::get(Type::Int32Ty, NumMeta, false), + }; + + Constant *DescriptorElts[] = { + ConstantStruct::get(BaseElts, 2), + ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), + Metadata.begin(), NumMeta) + }; + + Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2); + + std::string TypeName("gc_map."); + TypeName += utostr(NumMeta); + F.getParent()->addTypeName(TypeName, FrameMap->getType()); + + // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems + // that, short of multithreaded LLVM, it should be safe; all that is + // necessary is that a simple Module::iterator loop not be invalidated. + // Appending to the GlobalVariable list is safe in that sense. + // + // All of the output passes emit globals last. The ExecutionEngine + // explicitly supports adding globals to the module after + // initialization. + // + // Still, if it isn't deemed acceptable, then this transformation needs + // to be a ModulePass (which means it cannot be in the 'llc' pipeline + // (which uses a FunctionPassManager (which segfaults (not asserts) if + // provided a ModulePass))). + Constant *GV = new GlobalVariable(FrameMap->getType(), true, + GlobalVariable::InternalLinkage, + FrameMap, "__gc_" + F.getName(), + F.getParent()); + + Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0), + ConstantInt::get(Type::Int32Ty, 0) }; + return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2); +} + +const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { + // doInitialization creates the generic version of this type. + std::vector<const Type*> EltTys; + EltTys.push_back(StackEntryTy); + for (size_t I = 0; I != Roots.size(); I++) + EltTys.push_back(Roots[I].second->getAllocatedType()); + Type *Ty = StructType::get(EltTys); + + std::string TypeName("gc_stackentry."); + TypeName += F.getName(); + F.getParent()->addTypeName(TypeName, Ty); + + return Ty; +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. If +/// not, exit fast. +bool ShadowStackGC::initializeCustomLowering(Module &M) { + // struct FrameMap { + // int32_t NumRoots; // Number of roots in stack frame. + // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. + // void *Meta[]; // May be absent for roots without metadata. + // }; + std::vector<const Type*> EltTys; + EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :) + EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array. + StructType *FrameMapTy = StructType::get(EltTys); + M.addTypeName("gc_map", FrameMapTy); + PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); + + // struct StackEntry { + // ShadowStackEntry *Next; // Caller's stack entry. + // FrameMap *Map; // Pointer to constant FrameMap. + // void *Roots[]; // Stack roots (in-place array, so we pretend). + // }; + OpaqueType *RecursiveTy = OpaqueType::get(); + + EltTys.clear(); + EltTys.push_back(PointerType::getUnqual(RecursiveTy)); + EltTys.push_back(FrameMapPtrTy); + PATypeHolder LinkTyH = StructType::get(EltTys); + + RecursiveTy->refineAbstractTypeTo(LinkTyH.get()); + StackEntryTy = cast<StructType>(LinkTyH.get()); + const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); + M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from + // a FunctionPass? + + // Get the root chain if it already exists. + Head = M.getGlobalVariable("llvm_gc_root_chain"); + if (!Head) { + // If the root chain does not exist, insert a new one with linkonce + // linkage! + Head = new GlobalVariable(StackEntryPtrTy, false, + GlobalValue::LinkOnceAnyLinkage, + Constant::getNullValue(StackEntryPtrTy), + "llvm_gc_root_chain", &M); + } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { + Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); + Head->setLinkage(GlobalValue::LinkOnceAnyLinkage); + } + + return true; +} + +bool ShadowStackGC::IsNullValue(Value *V) { + if (Constant *C = dyn_cast<Constant>(V)) + return C->isNullValue(); + return false; +} + +void ShadowStackGC::CollectRoots(Function &F) { + // FIXME: Account for original alignment. Could fragment the root array. + // Approach 1: Null initialize empty slots at runtime. Yuck. + // Approach 2: Emit a map of the array instead of just a count. + + assert(Roots.empty() && "Not cleaned up?"); + + SmallVector<std::pair<CallInst*,AllocaInst*>,16> MetaRoots; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::gcroot) { + std::pair<CallInst*,AllocaInst*> Pair = std::make_pair( + CI, cast<AllocaInst>(CI->getOperand(1)->stripPointerCasts())); + if (IsNullValue(CI->getOperand(2))) + Roots.push_back(Pair); + else + MetaRoots.push_back(Pair); + } + + // Number roots with metadata (usually empty) at the beginning, so that the + // FrameMap::Meta array can be elided. + Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); +} + +GetElementPtrInst * +ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, + int Idx, int Idx2, const char *Name) { + Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), + ConstantInt::get(Type::Int32Ty, Idx), + ConstantInt::get(Type::Int32Ty, Idx2) }; + Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +GetElementPtrInst * +ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, + int Idx, const char *Name) { + Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), + ConstantInt::get(Type::Int32Ty, Idx) }; + Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +/// runOnFunction - Insert code to maintain the shadow stack. +bool ShadowStackGC::performCustomLowering(Function &F) { + // Find calls to llvm.gcroot. + CollectRoots(F); + + // If there are no roots in this function, then there is no need to add a + // stack map entry for it. + if (Roots.empty()) + return false; + + // Build the constant map and figure the type of the shadow stack entry. + Value *FrameMap = GetFrameMap(F); + const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); + + // Build the shadow stack entry at the very start of the function. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + IRBuilder<> AtEntry(IP->getParent(), IP); + + Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0, + "gc_frame"); + + while (isa<AllocaInst>(IP)) ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Initialize the map pointer and load the current head of the shadow stack. + Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); + Instruction *EntryMapPtr = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map"); + AtEntry.CreateStore(FrameMap, EntryMapPtr); + + // After all the allocas... + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + // For each root, find the corresponding slot in the aggregate... + Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root"); + + // And use it in lieu of the alloca. + AllocaInst *OriginalAlloca = Roots[I].second; + SlotPtr->takeName(OriginalAlloca); + OriginalAlloca->replaceAllUsesWith(SlotPtr); + } + + // Move past the original stores inserted by GCStrategy::InitRoots. This isn't + // really necessary (the collector would never see the intermediate state at + // runtime), but it's nicer not to push the half-initialized entry onto the + // shadow stack. + while (isa<StoreInst>(IP)) ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Push the entry onto the shadow stack. + Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next"); + Instruction *NewHeadVal = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead"); + AtEntry.CreateStore(CurrentHead, EntryNextPtr); + AtEntry.CreateStore(NewHeadVal, Head); + + // For each instruction that escapes... + EscapeEnumerator EE(F, "gc_cleanup"); + while (IRBuilder<> *AtExit = EE.Next()) { + // Pop the entry from the shadow stack. Don't reuse CurrentHead from + // AtEntry, since that would make the value live for the entire function. + Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0, + "gc_frame.next"); + Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); + AtExit->CreateStore(SavedHead, Head); + } + + // Delete the original allocas (which are no longer used) and the intrinsic + // calls (which are no longer valid). Doing this last avoids invalidating + // iterators. + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + Roots[I].first->eraseFromParent(); + Roots[I].second->eraseFromParent(); + } + + Roots.clear(); + return true; +} diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp new file mode 100644 index 0000000..e44a138 --- /dev/null +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -0,0 +1,1141 @@ +//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a shrink wrapping variant of prolog/epilog insertion: +// - Spills and restores of callee-saved registers (CSRs) are placed in the +// machine CFG to tightly surround their uses so that execution paths that +// do not use CSRs do not pay the spill/restore penalty. +// +// - Avoiding placment of spills/restores in loops: if a CSR is used inside a +// loop the spills are placed in the loop preheader, and restores are +// placed in the loop exit nodes (the successors of loop _exiting_ nodes). +// +// - Covering paths without CSR uses: +// If a region in a CFG uses CSRs and has multiple entry and/or exit points, +// the use info for the CSRs inside the region is propagated outward in the +// CFG to ensure validity of the spill/restore placements. This decreases +// the effectiveness of shrink wrapping but does not require edge splitting +// in the machine CFG. +// +// This shrink wrapping implementation uses an iterative analysis to determine +// which basic blocks require spills and restores for CSRs. +// +// This pass uses MachineDominators and MachineLoopInfo. Loop information +// is used to prevent placement of callee-saved register spills/restores +// in the bodies of loops. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "shrink-wrap" + +#include "PrologEpilogInserter.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include <sstream> + +using namespace llvm; + +STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); + +// Shrink Wrapping: +static cl::opt<bool> +ShrinkWrapping("shrink-wrap", + cl::desc("Shrink wrap callee-saved register spills/restores")); + +// Shrink wrap only the specified function, a debugging aid. +static cl::opt<std::string> +ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, + cl::desc("Shrink wrap the specified function"), + cl::value_desc("funcname"), + cl::init("")); + +// Debugging level for shrink wrapping. +enum ShrinkWrapDebugLevel { + None, BasicInfo, Iterations, Details +}; + +static cl::opt<enum ShrinkWrapDebugLevel> +ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, + cl::desc("Print shrink wrapping debugging information"), + cl::values( + clEnumVal(None , "disable debug output"), + clEnumVal(BasicInfo , "print basic DF sets"), + clEnumVal(Iterations, "print SR sets for each iteration"), + clEnumVal(Details , "print all DF sets"), + clEnumValEnd)); + + +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + if (ShrinkWrapping || ShrinkWrapFunc != "") { + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + } + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +//===----------------------------------------------------------------------===// +// ShrinkWrapping implementation +//===----------------------------------------------------------------------===// + +// Convienences for dealing with machine loops. +MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { + assert(LP && "Machine loop is NULL."); + MachineBasicBlock* PHDR = LP->getLoopPreheader(); + MachineLoop* PLP = LP->getParentLoop(); + while (PLP) { + PHDR = PLP->getLoopPreheader(); + PLP = PLP->getParentLoop(); + } + return PHDR; +} + +MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { + if (LP == 0) + return 0; + MachineLoop* PLP = LP->getParentLoop(); + while (PLP) { + LP = PLP; + PLP = PLP->getParentLoop(); + } + return LP; +} + +bool PEI::isReturnBlock(MachineBasicBlock* MBB) { + return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn()); +} + +// Initialize shrink wrapping DFA sets, called before iterations. +void PEI::clearAnticAvailSets() { + AnticIn.clear(); + AnticOut.clear(); + AvailIn.clear(); + AvailOut.clear(); +} + +// Clear all sets constructed by shrink wrapping. +void PEI::clearAllSets() { + ReturnBlocks.clear(); + clearAnticAvailSets(); + UsedCSRegs.clear(); + CSRUsed.clear(); + TLLoops.clear(); + CSRSave.clear(); + CSRRestore.clear(); +} + +// Initialize all shrink wrapping data. +void PEI::initShrinkWrappingInfo() { + clearAllSets(); + EntryBlock = 0; +#ifndef NDEBUG + HasFastExitPath = false; +#endif + ShrinkWrapThisFunction = ShrinkWrapping; + // DEBUG: enable or disable shrink wrapping for the current function + // via --shrink-wrap-func=<funcname>. +#ifndef NDEBUG + if (ShrinkWrapFunc != "") { + std::string MFName = MF->getFunction()->getName(); + ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); + } +#endif +} + + +/// placeCSRSpillsAndRestores - determine which MBBs of the function +/// need save, restore code for callee-saved registers by doing a DF analysis +/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs +/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo +/// is used to ensure that CSR save/restore code is not placed inside loops. +/// This function computes the maps of MBBs -> CSRs to spill and restore +/// in CSRSave, CSRRestore. +/// +/// If shrink wrapping is not being performed, place all spills in +/// the entry block, all restores in return blocks. In this case, +/// CSRSave has a single mapping, CSRRestore has mappings for each +/// return block. +/// +void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { + + DEBUG(MF = &Fn); + + initShrinkWrappingInfo(); + + DEBUG(if (ShrinkWrapThisFunction) { + DOUT << "Place CSR spills/restores for " + << MF->getFunction()->getName() << "\n"; + }); + + if (calculateSets(Fn)) + placeSpillsAndRestores(Fn); +} + +/// calcAnticInOut - calculate the anticipated in/out reg sets +/// for the given MBB by looking forward in the MCFG at MBB's +/// successors. +/// +bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { + bool changed = false; + + // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) + SmallVector<MachineBasicBlock*, 4> successors; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC != MBB) + successors.push_back(SUCC); + } + + unsigned i = 0, e = successors.size(); + if (i != e) { + CSRegSet prevAnticOut = AnticOut[MBB]; + MachineBasicBlock* SUCC = successors[i]; + + AnticOut[MBB] = AnticIn[SUCC]; + for (++i; i != e; ++i) { + SUCC = successors[i]; + AnticOut[MBB] &= AnticIn[SUCC]; + } + if (prevAnticOut != AnticOut[MBB]) + changed = true; + } + + // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); + CSRegSet prevAnticIn = AnticIn[MBB]; + AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; + if (prevAnticIn |= AnticIn[MBB]) + changed = true; + return changed; +} + +/// calcAvailInOut - calculate the available in/out reg sets +/// for the given MBB by looking backward in the MCFG at MBB's +/// predecessors. +/// +bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { + bool changed = false; + + // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) + SmallVector<MachineBasicBlock*, 4> predecessors; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED != MBB) + predecessors.push_back(PRED); + } + + unsigned i = 0, e = predecessors.size(); + if (i != e) { + CSRegSet prevAvailIn = AvailIn[MBB]; + MachineBasicBlock* PRED = predecessors[i]; + + AvailIn[MBB] = AvailOut[PRED]; + for (++i; i != e; ++i) { + PRED = predecessors[i]; + AvailIn[MBB] &= AvailOut[PRED]; + } + if (prevAvailIn != AvailIn[MBB]) + changed = true; + } + + // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); + CSRegSet prevAvailOut = AvailOut[MBB]; + AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; + if (prevAvailOut |= AvailOut[MBB]) + changed = true; + return changed; +} + +/// calculateAnticAvail - build the sets anticipated and available +/// registers in the MCFG of the current function iteratively, +/// doing a combined forward and backward analysis. +/// +void PEI::calculateAnticAvail(MachineFunction &Fn) { + // Initialize data flow sets. + clearAnticAvailSets(); + + // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. + bool changed = true; + unsigned iterations = 0; + while (changed) { + changed = false; + ++iterations; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + + // Calculate anticipated in, out regs at MBB from + // anticipated at successors of MBB. + changed |= calcAnticInOut(MBB); + + // Calculate available in, out regs at MBB from + // available at predecessors of MBB. + changed |= calcAvailInOut(MBB); + } + } + + DEBUG(if (ShrinkWrapDebugging >= Details) { + DOUT << "-----------------------------------------------------------\n"; + DOUT << " Antic/Avail Sets:\n"; + DOUT << "-----------------------------------------------------------\n"; + DOUT << "iterations = " << iterations << "\n"; + DOUT << "-----------------------------------------------------------\n"; + DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"; + DOUT << "-----------------------------------------------------------\n"; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpSets(MBB); + } + DOUT << "-----------------------------------------------------------\n"; + }); +} + +/// propagateUsesAroundLoop - copy used register info from MBB to all blocks +/// of the loop given by LP and its parent loops. This prevents spills/restores +/// from being placed in the bodies of loops. +/// +void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { + if (! MBB || !LP) + return; + + std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); + for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { + MachineBasicBlock* LBB = loopBlocks[i]; + if (LBB == MBB) + continue; + if (CSRUsed[LBB].contains(CSRUsed[MBB])) + continue; + CSRUsed[LBB] |= CSRUsed[MBB]; + } +} + +/// calculateSets - collect the CSRs used in this function, compute +/// the DF sets that describe the initial minimal regions in the +/// Machine CFG around which CSR spills and restores must be placed. +/// +/// Additionally, this function decides if shrink wrapping should +/// be disabled for the current function, checking the following: +/// 1. the current function has more than 500 MBBs: heuristic limit +/// on function size to reduce compile time impact of the current +/// iterative algorithm. +/// 2. all CSRs are used in the entry block. +/// 3. all CSRs are used in all immediate successors of the entry block. +/// 4. all CSRs are used in a subset of blocks, each of which dominates +/// all return blocks. These blocks, taken as a subgraph of the MCFG, +/// are equivalent to the entry block since all execution paths pass +/// through them. +/// +bool PEI::calculateSets(MachineFunction &Fn) { + // Sets used to compute spill, restore placement sets. + const std::vector<CalleeSavedInfo> CSI = + Fn.getFrameInfo()->getCalleeSavedInfo(); + + // If no CSRs used, we are done. + if (CSI.empty()) { + DEBUG(if (ShrinkWrapThisFunction) + DOUT << "DISABLED: " << Fn.getFunction()->getName() + << ": uses no callee-saved registers\n"); + return false; + } + + // Save refs to entry and return blocks. + EntryBlock = Fn.begin(); + for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); + MBB != E; ++MBB) + if (isReturnBlock(MBB)) + ReturnBlocks.push_back(MBB); + + // Determine if this function has fast exit paths. + DEBUG(if (ShrinkWrapThisFunction) + findFastExitPath()); + + // Limit shrink wrapping via the current iterative bit vector + // implementation to functions with <= 500 MBBs. + if (Fn.size() > 500) { + DEBUG(if (ShrinkWrapThisFunction) + DOUT << "DISABLED: " << Fn.getFunction()->getName() + << ": too large (" << Fn.size() << " MBBs)\n"); + ShrinkWrapThisFunction = false; + } + + // Return now if not shrink wrapping. + if (! ShrinkWrapThisFunction) + return false; + + // Collect set of used CSRs. + for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { + UsedCSRegs.set(inx); + } + + // Walk instructions in all MBBs, create CSRUsed[] sets, choose + // whether or not to shrink wrap this function. + MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); + MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + + bool allCSRUsesInEntryBlock = true; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { + for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { + unsigned Reg = CSI[inx].getReg(); + // If instruction I reads or modifies Reg, add it to UsedCSRegs, + // CSRUsed map for the current block. + for (unsigned opInx = 0, opEnd = I->getNumOperands(); + opInx != opEnd; ++opInx) { + const MachineOperand &MO = I->getOperand(opInx); + if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MOReg == Reg || + (TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(Reg, MOReg))) { + // CSR Reg is defined/used in block MBB. + CSRUsed[MBB].set(inx); + // Check for uses in EntryBlock. + if (MBB != EntryBlock) + allCSRUsesInEntryBlock = false; + } + } + } + } + + if (CSRUsed[MBB].empty()) + continue; + + // Propagate CSRUsed[MBB] in loops + if (MachineLoop* LP = LI.getLoopFor(MBB)) { + // Add top level loop to work list. + MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); + MachineLoop* PLP = getTopLevelLoopParent(LP); + + if (! HDR) { + HDR = PLP->getHeader(); + assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); + MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); + HDR = *PI; + } + TLLoops[HDR] = PLP; + + // Push uses from inside loop to its parent loops, + // or to all other MBBs in its loop. + if (LP->getLoopDepth() > 1) { + for (MachineLoop* PLP = LP->getParentLoop(); PLP; + PLP = PLP->getParentLoop()) { + propagateUsesAroundLoop(MBB, PLP); + } + } else { + propagateUsesAroundLoop(MBB, LP); + } + } + } + + if (allCSRUsesInEntryBlock) { + DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in EntryBlock\n"); + ShrinkWrapThisFunction = false; + } else { + bool allCSRsUsedInEntryFanout = true; + for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), + SE = EntryBlock->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (CSRUsed[SUCC] != UsedCSRegs) + allCSRsUsedInEntryFanout = false; + } + if (allCSRsUsedInEntryFanout) { + DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in imm successors of EntryBlock\n"); + ShrinkWrapThisFunction = false; + } + } + + if (ShrinkWrapThisFunction) { + // Check if MBB uses CSRs and dominates all exit nodes. + // Such nodes are equiv. to the entry node w.r.t. + // CSR uses: every path through the function must + // pass through this node. If each CSR is used at least + // once by these nodes, shrink wrapping is disabled. + CSRegSet CSRUsedInChokePoints; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) + continue; + bool dominatesExitNodes = true; + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) + if (! DT.dominates(MBB, ReturnBlocks[ri])) { + dominatesExitNodes = false; + break; + } + if (dominatesExitNodes) { + CSRUsedInChokePoints |= CSRUsed[MBB]; + if (CSRUsedInChokePoints == UsedCSRegs) { + DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in choke point(s) at " + << getBasicBlockName(MBB) << "\n"); + ShrinkWrapThisFunction = false; + break; + } + } + } + } + + // Return now if we have decided not to apply shrink wrapping + // to the current function. + if (! ShrinkWrapThisFunction) + return false; + + DEBUG({ + DOUT << "ENABLED: " << Fn.getFunction()->getName(); + if (HasFastExitPath) + DOUT << " (fast exit path)"; + DOUT << "\n"; + if (ShrinkWrapDebugging >= BasicInfo) { + DOUT << "------------------------------" + << "-----------------------------\n"; + DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; + if (ShrinkWrapDebugging >= Details) { + DOUT << "------------------------------" + << "-----------------------------\n"; + dumpAllUsed(); + } + } + }); + + // Build initial DF sets to determine minimal regions in the + // Machine CFG around which CSRs must be spilled and restored. + calculateAnticAvail(Fn); + + return true; +} + +/// addUsesForMEMERegion - add uses of CSRs spilled or restored in +/// multi-entry, multi-exit (MEME) regions so spill and restore +/// placement will not break code that enters or leaves a +/// shrink-wrapped region by inducing spills with no matching +/// restores or restores with no matching spills. A MEME region +/// is a subgraph of the MCFG with multiple entry edges, multiple +/// exit edges, or both. This code propagates use information +/// through the MCFG until all paths requiring spills and restores +/// _outside_ the computed minimal placement regions have been covered. +/// +bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4>& blks) { + if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { + bool processThisBlock = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC->pred_size() > 1) { + processThisBlock = true; + break; + } + } + if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED->succ_size() > 1) { + processThisBlock = true; + break; + } + } + } + if (! processThisBlock) + return false; + } + + CSRegSet prop; + if (!CSRSave[MBB].empty()) + prop = CSRSave[MBB]; + else if (!CSRRestore[MBB].empty()) + prop = CSRRestore[MBB]; + else + prop = CSRUsed[MBB]; + if (prop.empty()) + return false; + + // Propagate selected bits to successors, predecessors of MBB. + bool addedUses = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + // Self-loop + if (SUCC == MBB) + continue; + if (! CSRUsed[SUCC].contains(prop)) { + CSRUsed[SUCC] |= prop; + addedUses = true; + blks.push_back(SUCC); + DEBUG(if (ShrinkWrapDebugging >= Iterations) + DOUT << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(prop) << ")->" + << "successor " << getBasicBlockName(SUCC) << "\n"); + } + } + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + // Self-loop + if (PRED == MBB) + continue; + if (! CSRUsed[PRED].contains(prop)) { + CSRUsed[PRED] |= prop; + addedUses = true; + blks.push_back(PRED); + DEBUG(if (ShrinkWrapDebugging >= Iterations) + DOUT << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(prop) << ")->" + << "predecessor " << getBasicBlockName(PRED) << "\n"); + } + } + return addedUses; +} + +/// addUsesForTopLevelLoops - add uses for CSRs used inside top +/// level loops to the exit blocks of those loops. +/// +bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { + bool addedUses = false; + + // Place restores for top level loops where needed. + for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator + I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { + MachineBasicBlock* MBB = I->first; + MachineLoop* LP = I->second; + MachineBasicBlock* HDR = LP->getHeader(); + SmallVector<MachineBasicBlock*, 4> exitBlocks; + CSRegSet loopSpills; + + loopSpills = CSRSave[MBB]; + if (CSRSave[MBB].empty()) { + loopSpills = CSRUsed[HDR]; + assert(!loopSpills.empty() && "No CSRs used in loop?"); + } else if (CSRRestore[MBB].contains(CSRSave[MBB])) + continue; + + LP->getExitBlocks(exitBlocks); + assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); + for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { + MachineBasicBlock* EXB = exitBlocks[i]; + if (! CSRUsed[EXB].contains(loopSpills)) { + CSRUsed[EXB] |= loopSpills; + addedUses = true; + DEBUG(if (ShrinkWrapDebugging >= Iterations) + DOUT << "LOOP " << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(loopSpills) << ")->" + << getBasicBlockName(EXB) << "\n"); + if (EXB->succ_size() > 1 || EXB->pred_size() > 1) + blks.push_back(EXB); + } + } + } + return addedUses; +} + +/// calcSpillPlacements - determine which CSRs should be spilled +/// in MBB using AnticIn sets of MBB's predecessors, keeping track +/// of changes to spilled reg sets. Add MBB to the set of blocks +/// that need to be processed for propagating use info to cover +/// multi-entry/exit regions. +/// +bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevSpills) { + bool placedSpills = false; + // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) + CSRegSet anticInPreds; + SmallVector<MachineBasicBlock*, 4> predecessors; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED != MBB) + predecessors.push_back(PRED); + } + unsigned i = 0, e = predecessors.size(); + if (i != e) { + MachineBasicBlock* PRED = predecessors[i]; + anticInPreds = UsedCSRegs - AnticIn[PRED]; + for (++i; i != e; ++i) { + PRED = predecessors[i]; + anticInPreds &= (UsedCSRegs - AnticIn[PRED]); + } + } else { + // Handle uses in entry blocks (which have no predecessors). + // This is necessary because the DFA formulation assumes the + // entry and (multiple) exit nodes cannot have CSR uses, which + // is not the case in the real world. + anticInPreds = UsedCSRegs; + } + // Compute spills required at MBB: + CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; + + if (! CSRSave[MBB].empty()) { + if (MBB == EntryBlock) { + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) + CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; + } else { + // Reset all regs spilled in MBB that are also spilled in EntryBlock. + if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { + CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; + } + } + } + placedSpills = (CSRSave[MBB] != prevSpills[MBB]); + prevSpills[MBB] = CSRSave[MBB]; + // Remember this block for adding restores to successor + // blocks for multi-entry region. + if (placedSpills) + blks.push_back(MBB); + + DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) + DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]) << "\n"); + + return placedSpills; +} + +/// calcRestorePlacements - determine which CSRs should be restored +/// in MBB using AvailOut sets of MBB's succcessors, keeping track +/// of changes to restored reg sets. Add MBB to the set of blocks +/// that need to be processed for propagating use info to cover +/// multi-entry/exit regions. +/// +bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevRestores) { + bool placedRestores = false; + // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) + CSRegSet availOutSucc; + SmallVector<MachineBasicBlock*, 4> successors; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC != MBB) + successors.push_back(SUCC); + } + unsigned i = 0, e = successors.size(); + if (i != e) { + MachineBasicBlock* SUCC = successors[i]; + availOutSucc = UsedCSRegs - AvailOut[SUCC]; + for (++i; i != e; ++i) { + SUCC = successors[i]; + availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); + } + } else { + if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { + // Handle uses in return blocks (which have no successors). + // This is necessary because the DFA formulation assumes the + // entry and (multiple) exit nodes cannot have CSR uses, which + // is not the case in the real world. + availOutSucc = UsedCSRegs; + } + } + // Compute restores required at MBB: + CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; + + // Postprocess restore placements at MBB. + // Remove the CSRs that are restored in the return blocks. + // Lest this be confusing, note that: + // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. + if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { + if (! CSRSave[EntryBlock].empty()) + CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; + } + placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); + prevRestores[MBB] = CSRRestore[MBB]; + // Remember this block for adding saves to predecessor + // blocks for multi-entry region. + if (placedRestores) + blks.push_back(MBB); + + DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) + DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); + + return placedRestores; +} + +/// placeSpillsAndRestores - place spills and restores of CSRs +/// used in MBBs in minimal regions that contain the uses. +/// +void PEI::placeSpillsAndRestores(MachineFunction &Fn) { + CSRegBlockMap prevCSRSave; + CSRegBlockMap prevCSRRestore; + SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; + bool changed = true; + unsigned iterations = 0; + + // Iterate computation of spill and restore placements in the MCFG until: + // 1. CSR use info has been fully propagated around the MCFG, and + // 2. computation of CSRSave[], CSRRestore[] reach fixed points. + while (changed) { + changed = false; + ++iterations; + + DEBUG(if (ShrinkWrapDebugging >= Iterations) + DOUT << "iter " << iterations + << " --------------------------------------------------\n"); + + // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, + // which determines the placements of spills and restores. + // Keep track of changes to spills, restores in each iteration to + // minimize the total iterations. + bool SRChanged = false; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + + // Place spills for CSRs in MBB. + SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); + + // Place restores for CSRs in MBB. + SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); + } + + // Add uses of CSRs used inside loops where needed. + changed |= addUsesForTopLevelLoops(cvBlocks); + + // Add uses for CSRs spilled or restored at branch, join points. + if (changed || SRChanged) { + while (! cvBlocks.empty()) { + MachineBasicBlock* MBB = cvBlocks.pop_back_val(); + changed |= addUsesForMEMERegion(MBB, ncvBlocks); + } + if (! ncvBlocks.empty()) { + cvBlocks = ncvBlocks; + ncvBlocks.clear(); + } + } + + if (changed) { + calculateAnticAvail(Fn); + CSRSave.clear(); + CSRRestore.clear(); + } + } + + // Check for effectiveness: + // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} + // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] + // Gives a measure of how many CSR spills have been moved from EntryBlock + // to minimal regions enclosing their uses. + CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); + unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); + numSRReduced += numSRReducedThisFunc; + DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { + DOUT << "-----------------------------------------------------------\n"; + DOUT << "total iterations = " << iterations << " ( " + << Fn.getFunction()->getName() + << " " << numSRReducedThisFunc + << " " << Fn.size() + << " )\n"; + DOUT << "-----------------------------------------------------------\n"; + dumpSRSets(); + DOUT << "-----------------------------------------------------------\n"; + if (numSRReducedThisFunc) + verifySpillRestorePlacement(); + }); +} + +// Debugging methods. +#ifndef NDEBUG +/// findFastExitPath - debugging method used to detect functions +/// with at least one path from the entry block to a return block +/// directly or which has a very small number of edges. +/// +void PEI::findFastExitPath() { + if (! EntryBlock) + return; + // Fina a path from EntryBlock to any return block that does not branch: + // Entry + // | ... + // v | + // B1<-----+ + // | + // v + // Return + for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), + SE = EntryBlock->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + + // Assume positive, disprove existence of fast path. + HasFastExitPath = true; + + // Check the immediate successors. + if (isReturnBlock(SUCC)) { + if (ShrinkWrapDebugging >= BasicInfo) + DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) + << "->" << getBasicBlockName(SUCC) << "\n"; + break; + } + // Traverse df from SUCC, look for a branch block. + std::string exitPath = getBasicBlockName(SUCC); + for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), + BE = df_end(SUCC); BI != BE; ++BI) { + MachineBasicBlock* SBB = *BI; + // Reject paths with branch nodes. + if (SBB->succ_size() > 1) { + HasFastExitPath = false; + break; + } + exitPath += "->" + getBasicBlockName(SBB); + } + if (HasFastExitPath) { + if (ShrinkWrapDebugging >= BasicInfo) + DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) + << "->" << exitPath << "\n"; + break; + } + } +} + +/// verifySpillRestorePlacement - check the current spill/restore +/// sets for safety. Attempt to find spills without restores or +/// restores without spills. +/// Spills: walk df from each MBB in spill set ensuring that +/// all CSRs spilled at MMBB are restored on all paths +/// from MBB to all exit blocks. +/// Restores: walk idf from each MBB in restore set ensuring that +/// all CSRs restored at MBB are spilled on all paths +/// reaching MBB. +/// +void PEI::verifySpillRestorePlacement() { + unsigned numReturnBlocks = 0; + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + if (isReturnBlock(MBB) || MBB->succ_size() == 0) + ++numReturnBlocks; + } + for (CSRegBlockMap::iterator BI = CSRSave.begin(), + BE = CSRSave.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet spilled = BI->second; + CSRegSet restored; + + if (spilled.empty()) + continue; + + DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(spilled) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + + if (CSRRestore[MBB].intersects(spilled)) { + restored |= (CSRRestore[MBB] & spilled); + } + + // Walk depth first from MBB to find restores of all CSRs spilled at MBB: + // we must find restores for all spills w/no intervening spills on all + // paths from MBB to all return blocks. + for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), + BE = df_end(MBB); BI != BE; ++BI) { + MachineBasicBlock* SBB = *BI; + if (SBB == MBB) + continue; + // Stop when we encounter spills of any CSRs spilled at MBB that + // have not yet been seen to be restored. + if (CSRSave[SBB].intersects(spilled) && + !restored.contains(CSRSave[SBB] & spilled)) + break; + // Collect the CSRs spilled at MBB that are restored + // at this DF successor of MBB. + if (CSRRestore[SBB].intersects(spilled)) + restored |= (CSRRestore[SBB] & spilled); + // If we are at a retun block, check that the restores + // we have seen so far exhaust the spills at MBB, then + // reset the restores. + if (isReturnBlock(SBB) || SBB->succ_size() == 0) { + if (restored != spilled) { + CSRegSet notRestored = (spilled - restored); + DOUT << MF->getFunction()->getName() << ": " + << stringifyCSRegSet(notRestored) + << " spilled at " << getBasicBlockName(MBB) + << " are never restored on path to return " + << getBasicBlockName(SBB) << "\n"; + } + restored.clear(); + } + } + } + + // Check restore placements. + for (CSRegBlockMap::iterator BI = CSRRestore.begin(), + BE = CSRRestore.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet restored = BI->second; + CSRegSet spilled; + + if (restored.empty()) + continue; + + DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(restored) << "\n"; + + if (CSRSave[MBB].intersects(restored)) { + spilled |= (CSRSave[MBB] & restored); + } + // Walk inverse depth first from MBB to find spills of all + // CSRs restored at MBB: + for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), + BE = idf_end(MBB); BI != BE; ++BI) { + MachineBasicBlock* PBB = *BI; + if (PBB == MBB) + continue; + // Stop when we encounter restores of any CSRs restored at MBB that + // have not yet been seen to be spilled. + if (CSRRestore[PBB].intersects(restored) && + !spilled.contains(CSRRestore[PBB] & restored)) + break; + // Collect the CSRs restored at MBB that are spilled + // at this DF predecessor of MBB. + if (CSRSave[PBB].intersects(restored)) + spilled |= (CSRSave[PBB] & restored); + } + if (spilled != restored) { + CSRegSet notSpilled = (restored - spilled); + DOUT << MF->getFunction()->getName() << ": " + << stringifyCSRegSet(notSpilled) + << " restored at " << getBasicBlockName(MBB) + << " are never spilled\n"; + } + } +} + +// Debugging print methods. +std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { + std::ostringstream name; + if (MBB) { + if (MBB->getBasicBlock()) + name << MBB->getBasicBlock()->getName(); + else + name << "_MBB_" << MBB->getNumber(); + } + return name.str(); +} + +std::string PEI::stringifyCSRegSet(const CSRegSet& s) { + const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); + const std::vector<CalleeSavedInfo> CSI = + MF->getFrameInfo()->getCalleeSavedInfo(); + + std::ostringstream srep; + if (CSI.size() == 0) { + srep << "[]"; + return srep.str(); + } + srep << "["; + CSRegSet::iterator I = s.begin(), E = s.end(); + if (I != E) { + unsigned reg = CSI[*I].getReg(); + srep << TRI->getName(reg); + for (++I; I != E; ++I) { + reg = CSI[*I].getReg(); + srep << ","; + srep << TRI->getName(reg); + } + } + srep << "]"; + return srep.str(); +} + +void PEI::dumpSet(const CSRegSet& s) { + DOUT << stringifyCSRegSet(s) << "\n"; +} + +void PEI::dumpUsed(MachineBasicBlock* MBB) { + if (MBB) { + DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; + } +} + +void PEI::dumpAllUsed() { + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpUsed(MBB); + } +} + +void PEI::dumpSets(MachineBasicBlock* MBB) { + if (MBB) { + DOUT << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << "\n"; + } +} + +void PEI::dumpSets1(MachineBasicBlock* MBB) { + if (MBB) { + DOUT << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << " | " + << stringifyCSRegSet(CSRSave[MBB]) << " | " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + } +} + +void PEI::dumpAllSets() { + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpSets1(MBB); + } +} + +void PEI::dumpSRSets() { + for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); + MBB != E; ++MBB) { + if (! CSRSave[MBB].empty()) { + DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]); + if (CSRRestore[MBB].empty()) + DOUT << "\n"; + } + if (! CSRRestore[MBB].empty()) { + if (! CSRSave[MBB].empty()) + DOUT << " "; + DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + } + } +} +#endif diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp new file mode 100644 index 0000000..2bc234f --- /dev/null +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -0,0 +1,2827 @@ +//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple register coalescing pass that attempts to +// aggressively coalesce every register copy that it can. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regcoalescing" +#include "SimpleRegisterCoalescing.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Value.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <cmath> +using namespace llvm; + +STATISTIC(numJoins , "Number of interval joins performed"); +STATISTIC(numCrossRCs , "Number of cross class joins performed"); +STATISTIC(numCommutes , "Number of instruction commuting performed"); +STATISTIC(numExtends , "Number of copies extended"); +STATISTIC(NumReMats , "Number of instructions re-materialized"); +STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); +STATISTIC(numAborts , "Number of times interval joining aborted"); +STATISTIC(numDeadValNo, "Number of valno def marked dead"); + +char SimpleRegisterCoalescing::ID = 0; +static cl::opt<bool> +EnableJoining("join-liveintervals", + cl::desc("Coalesce copies (default=true)"), + cl::init(true)); + +static cl::opt<bool> +NewHeuristic("new-coalescer-heuristic", + cl::desc("Use new coalescer heuristic"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +CrossClassJoin("join-cross-class-copies", + cl::desc("Coalesce cross register class copies"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +PhysJoinTweak("tweak-phys-join-heuristics", + cl::desc("Tweak heuristics for joining phys reg with vr"), + cl::init(false), cl::Hidden); + +static RegisterPass<SimpleRegisterCoalescing> +X("simple-register-coalescing", "Simple Register Coalescing"); + +// Declare that we implement the RegisterCoalescer interface +static RegisterAnalysisGroup<RegisterCoalescer, true/*The Default*/> V(X); + +const PassInfo *const llvm::SimpleRegisterCoalescingID = &X; + +void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + if (StrongPHIElim) + AU.addPreservedID(StrongPHIEliminationID); + else + AU.addPreservedID(PHIEliminationID); + AU.addPreservedID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// being the source and IntB being the dest, thus this defines a value number +/// in IntB. If the source value number (in IntA) is defined by a copy from B, +/// see if we can merge these two pieces of B into a single value number, +/// eliminating a copy. For example: +/// +/// A3 = B0 +/// ... +/// B1 = A3 <- this copy +/// +/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 +/// value number to be replaced with B0 (which simplifies the B liveinterval). +/// +/// This returns true if an interval was modified. +/// +bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, + LiveInterval &IntB, + MachineInstr *CopyMI) { + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); + assert(BLR != IntB.end() && "Live range not found!"); + VNInfo *BValNo = BLR->valno; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + if (!BValNo->copy) return false; + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1); + assert(ALR != IntA.end() && "Live range not found!"); + VNInfo *AValNo = ALR->valno; + // If it's re-defined by an early clobber somewhere in the live range, then + // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. + // See PR3149: + // 172 %ECX<def> = MOV32rr %reg1039<kill> + // 180 INLINEASM <es:subl $5,$1 + // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, %EAX<kill>, + // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0 + // 188 %EAX<def> = MOV32rr %EAX<kill> + // 196 %ECX<def> = MOV32rr %ECX<kill> + // 204 %ECX<def> = MOV32rr %ECX<kill> + // 212 %EAX<def> = MOV32rr %EAX<kill> + // 220 %EAX<def> = MOV32rr %EAX + // 228 %reg1039<def> = MOV32rr %ECX<kill> + // The early clobber operand ties ECX input to the ECX def. + // + // The live interval of ECX is represented as this: + // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) + // The coalescer has no idea there was a def in the middle of [174,230]. + if (AValNo->redefByEC) + return false; + + // If AValNo is defined as a copy from IntB, we can potentially process this. + // Get the instruction that defines this value number. + unsigned SrcReg = li_->getVNInfoSourceReg(AValNo); + if (!SrcReg) return false; // Not defined by a copy. + + // If the value number is not defined by a copy instruction, ignore it. + + // If the source register comes from an interval other than IntB, we can't + // handle this. + if (SrcReg != IntB.reg) return false; + + // Get the LiveRange in IntB that this value number starts with. + LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNo->def-1); + assert(ValLR != IntB.end() && "Live range not found!"); + + // Make sure that the end of the live range is inside the same block as + // CopyMI. + MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end-1); + if (!ValLREndInst || + ValLREndInst->getParent() != CopyMI->getParent()) return false; + + // Okay, we now know that ValLR ends in the same block that the CopyMI + // live-range starts. If there are no intervening live ranges between them in + // IntB, we can merge them. + if (ValLR+1 != BLR) return false; + + // If a live interval is a physical register, conservatively check if any + // of its sub-registers is overlapping the live interval of the virtual + // register. If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) && + *tri_->getSubRegisters(IntB.reg)) { + for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) + if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + return false; + } + } + + DOUT << "\nExtending: "; IntB.print(DOUT, tri_); + + unsigned FillerStart = ValLR->end, FillerEnd = BLR->start; + // We are about to delete CopyMI, so need to remove it as the 'instruction + // that defines this value #'. Update the the valnum with the new defining + // instruction #. + BValNo->def = FillerStart; + BValNo->copy = NULL; + + // Okay, we can merge them. We need to insert a new liverange: + // [ValLR.end, BLR.begin) of either value number, then we merge the + // two value numbers. + IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + + // If the IntB live range is assigned to a physical register, and if that + // physreg has sub-registers, update their live intervals as well. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + LiveInterval &SRLI = li_->getInterval(*SR); + SRLI.addRange(LiveRange(FillerStart, FillerEnd, + SRLI.getNextValue(FillerStart, 0, li_->getVNInfoAllocator()))); + } + } + + // Okay, merge "B1" into the same value number as "B0". + if (BValNo != ValLR->valno) { + IntB.addKills(ValLR->valno, BValNo->kills); + IntB.MergeValueNumberInto(BValNo, ValLR->valno); + } + DOUT << " result = "; IntB.print(DOUT, tri_); + DOUT << "\n"; + + // If the source instruction was killing the source register before the + // merge, unset the isKill marker given the live range has been extended. + int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + if (UIdx != -1) { + ValLREndInst->getOperand(UIdx).setIsKill(false); + IntB.removeKill(ValLR->valno, FillerStart); + } + + ++numExtends; + return true; +} + +/// HasOtherReachingDefs - Return true if there are definitions of IntB +/// other than BValNo val# that can reach uses of AValno val# of IntA. +bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, + LiveInterval &IntB, + VNInfo *AValNo, + VNInfo *BValNo) { + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + LiveInterval::Ranges::iterator BI = + std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); + if (BI != IntB.ranges.begin()) + --BI; + for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { + if (BI->valno == BValNo) + continue; + if (BI->start <= AI->start && BI->end > AI->start) + return true; + if (BI->start > AI->start && BI->start < AI->end) + return true; + } + } + return false; +} + +/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA +/// being the source and IntB being the dest, thus this defines a value number +/// in IntB. If the source value number (in IntA) is defined by a commutable +/// instruction and its other operand is coalesced to the copy dest register, +/// see if we can transform the copy into a noop by commuting the definition. For +/// example, +/// +/// A3 = op A2 B0<kill> +/// ... +/// B1 = A3 <- this copy +/// ... +/// = op A3 <- more uses +/// +/// ==> +/// +/// B2 = op B0 A2<kill> +/// ... +/// B1 = B2 <- now an identify copy +/// ... +/// = op B2 <- more uses +/// +/// This returns true if an interval was modified. +/// +bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, + LiveInterval &IntB, + MachineInstr *CopyMI) { + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + + // FIXME: For now, only eliminate the copy by commuting its def when the + // source register is a virtual register. We want to guard against cases + // where the copy is a back edge copy and commuting the def lengthen the + // live interval of the source register to the entire loop. + if (TargetRegisterInfo::isPhysicalRegister(IntA.reg)) + return false; + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); + assert(BLR != IntB.end() && "Live range not found!"); + VNInfo *BValNo = BLR->valno; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + if (!BValNo->copy) return false; + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1); + assert(ALR != IntA.end() && "Live range not found!"); + VNInfo *AValNo = ALR->valno; + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. + if (AValNo->def == ~0U || AValNo->def == ~1U || AValNo->hasPHIKill) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); + const TargetInstrDesc &TID = DefMI->getDesc(); + unsigned NewDstIdx; + if (!TID.isCommutable() || + !tii_->CommuteChangesDestination(DefMI, NewDstIdx)) + return false; + + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (NewReg != IntB.reg || !NewDstMO.isKill()) + return false; + + // Make sure there are no other definitions of IntB that would reach the + // uses which the new definition can reach. + if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) + return false; + + // If some of the uses of IntA.reg is already coalesced away, return false. + // It's not possible to determine whether it's safe to perform the coalescing. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), + UE = mri_->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + unsigned UseIdx = li_->getInstructionIndex(UseMI); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end()) + continue; + if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) + return false; + } + + // At this point we have decided that it is legal to do this + // transformation. Start by commuting the instruction. + MachineBasicBlock *MBB = DefMI->getParent(); + MachineInstr *NewMI = tii_->commuteInstruction(DefMI); + if (!NewMI) + return false; + if (NewMI != DefMI) { + li_->ReplaceMachineInstrInMaps(DefMI, NewMI); + MBB->insert(DefMI, NewMI); + MBB->erase(DefMI); + } + unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); + NewMI->getOperand(OpIdx).setIsKill(); + + bool BHasPHIKill = BValNo->hasPHIKill; + SmallVector<VNInfo*, 4> BDeadValNos; + SmallVector<unsigned, 4> BKills; + std::map<unsigned, unsigned> BExtend; + + // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. + // A = or A, B + // ... + // B = A + // ... + // C = A<kill> + // ... + // = B + // + // then do not add kills of A to the newly created B interval. + bool Extended = BLR->end > ALR->end && ALR->end != ALR->start; + if (Extended) + BExtend[ALR->end] = BLR->end; + + // Update uses of IntA of the specific Val# with IntB. + bool BHasSubRegs = false; + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) + BHasSubRegs = *tri_->getSubRegisters(IntB.reg); + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), + UE = mri_->use_end(); UI != UE;) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (JoinedCopies.count(UseMI)) + continue; + unsigned UseIdx = li_->getInstructionIndex(UseMI); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end() || ULR->valno != AValNo) + continue; + UseMO.setReg(NewReg); + if (UseMI == CopyMI) + continue; + if (UseMO.isKill()) { + if (Extended) + UseMO.setIsKill(false); + else + BKills.push_back(li_->getUseIndex(UseIdx)+1); + } + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + continue; + if (DstReg == IntB.reg) { + // This copy will become a noop. If it's defining a new val#, + // remove that val# as well. However this live range is being + // extended to the end of the existing live range defined by the copy. + unsigned DefIdx = li_->getDefIndex(UseIdx); + const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); + BHasPHIKill |= DLR->valno->hasPHIKill; + assert(DLR->valno->def == DefIdx); + BDeadValNos.push_back(DLR->valno); + BExtend[DLR->start] = DLR->end; + JoinedCopies.insert(UseMI); + // If this is a kill but it's going to be removed, the last use + // of the same val# is the new kill. + if (UseMO.isKill()) + BKills.pop_back(); + } + } + + // We need to insert a new liverange: [ALR.start, LastUse). It may be we can + // simply extend BLR if CopyMI doesn't end the range. + DOUT << "\nExtending: "; IntB.print(DOUT, tri_); + + // Remove val#'s defined by copies that will be coalesced away. + for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) { + VNInfo *DeadVNI = BDeadValNos[i]; + if (BHasSubRegs) { + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + LiveInterval &SRLI = li_->getInterval(*SR); + const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def); + SRLI.removeValNo(SRLR->valno); + } + } + IntB.removeValNo(BDeadValNos[i]); + } + + // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition + // is updated. Kills are also updated. + VNInfo *ValNo = BValNo; + ValNo->def = AValNo->def; + ValNo->copy = NULL; + for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) { + unsigned Kill = ValNo->kills[j]; + if (Kill != BLR->end) + BKills.push_back(Kill); + } + ValNo->kills.clear(); + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + unsigned End = AI->end; + std::map<unsigned, unsigned>::iterator EI = BExtend.find(End); + if (EI != BExtend.end()) + End = EI->second; + IntB.addRange(LiveRange(AI->start, End, ValNo)); + + // If the IntB live range is assigned to a physical register, and if that + // physreg has sub-registers, update their live intervals as well. + if (BHasSubRegs) { + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + LiveInterval &SRLI = li_->getInterval(*SR); + SRLI.MergeInClobberRange(AI->start, End, li_->getVNInfoAllocator()); + } + } + } + IntB.addKills(ValNo, BKills); + ValNo->hasPHIKill = BHasPHIKill; + + DOUT << " result = "; IntB.print(DOUT, tri_); + DOUT << "\n"; + + DOUT << "\nShortening: "; IntA.print(DOUT, tri_); + IntA.removeValNo(AValNo); + DOUT << " result = "; IntA.print(DOUT, tri_); + DOUT << "\n"; + + ++numCommutes; + return true; +} + +/// isSameOrFallThroughBB - Return true if MBB == SuccMBB or MBB simply +/// fallthoughs to SuccMBB. +static bool isSameOrFallThroughBB(MachineBasicBlock *MBB, + MachineBasicBlock *SuccMBB, + const TargetInstrInfo *tii_) { + if (MBB == SuccMBB) + return true; + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + return !tii_->AnalyzeBranch(*MBB, TBB, FBB, Cond) && !TBB && !FBB && + MBB->isSuccessor(SuccMBB); +} + +/// removeRange - Wrapper for LiveInterval::removeRange. This removes a range +/// from a physical register live interval as well as from the live intervals +/// of its sub-registers. +static void removeRange(LiveInterval &li, unsigned Start, unsigned End, + LiveIntervals *li_, const TargetRegisterInfo *tri_) { + li.removeRange(Start, End, true); + if (TargetRegisterInfo::isPhysicalRegister(li.reg)) { + for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &sli = li_->getInterval(*SR); + unsigned RemoveEnd = Start; + while (RemoveEnd != End) { + LiveInterval::iterator LR = sli.FindLiveRangeContaining(Start); + if (LR == sli.end()) + break; + RemoveEnd = (LR->end < End) ? LR->end : End; + sli.removeRange(Start, RemoveEnd, true); + Start = RemoveEnd; + } + } + } +} + +/// TrimLiveIntervalToLastUse - If there is a last use in the same basic block +/// as the copy instruction, trim the live interval to the last use and return +/// true. +bool +SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx, + MachineBasicBlock *CopyMBB, + LiveInterval &li, + const LiveRange *LR) { + unsigned MBBStart = li_->getMBBStartIdx(CopyMBB); + unsigned LastUseIdx; + MachineOperand *LastUse = lastRegisterUse(LR->start, CopyIdx-1, li.reg, + LastUseIdx); + if (LastUse) { + MachineInstr *LastUseMI = LastUse->getParent(); + if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) { + // r1024 = op + // ... + // BB1: + // = r1024 + // + // BB2: + // r1025<dead> = r1024<kill> + if (MBBStart < LR->end) + removeRange(li, MBBStart, LR->end, li_, tri_); + return true; + } + + // There are uses before the copy, just shorten the live range to the end + // of last use. + LastUse->setIsKill(); + removeRange(li, li_->getDefIndex(LastUseIdx), LR->end, li_, tri_); + li.addKill(LR->valno, LastUseIdx+1); + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + DstReg == li.reg) { + // Last use is itself an identity code. + int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, false, tri_); + LastUseMI->getOperand(DeadIdx).setIsDead(); + } + return true; + } + + // Is it livein? + if (LR->start <= MBBStart && LR->end > MBBStart) { + if (LR->start == 0) { + assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); + // Live-in to the function but dead. Remove it from entry live-in set. + mf_->begin()->removeLiveIn(li.reg); + } + // FIXME: Shorten intervals in BBs that reaches this BB. + } + + return false; +} + +/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial +/// computation, replace the copy by rematerialize the definition. +bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, + unsigned DstReg, + MachineInstr *CopyMI) { + unsigned CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI)); + LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); + assert(SrcLR != SrcInt.end() && "Live range not found!"); + VNInfo *ValNo = SrcLR->valno; + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. + if (ValNo->def == ~0U || ValNo->def == ~1U || ValNo->hasPHIKill) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); + const TargetInstrDesc &TID = DefMI->getDesc(); + if (!TID.isAsCheapAsAMove()) + return false; + if (!DefMI->getDesc().isRematerializable() || + !tii_->isTriviallyReMaterializable(DefMI)) + return false; + bool SawStore = false; + if (!DefMI->isSafeToMove(tii_, SawStore)) + return false; + + unsigned DefIdx = li_->getDefIndex(CopyIdx); + const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx); + DLR->valno->copy = NULL; + // Don't forget to update sub-register intervals. + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx); + if (DLR && DLR->valno->copy == CopyMI) + DLR->valno->copy = NULL; + } + } + + // If copy kills the source register, find the last use and propagate + // kill. + bool checkForDeadDef = false; + MachineBasicBlock *MBB = CopyMI->getParent(); + if (CopyMI->killsRegister(SrcInt.reg)) + if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) { + checkForDeadDef = true; + } + + MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI)); + CopyMI->removeFromParent(); + tii_->reMaterialize(*MBB, MII, DstReg, DefMI); + MachineInstr *NewMI = prior(MII); + + if (checkForDeadDef) { + // PR4090 fix: Trim interval failed because there was no use of the + // source interval in this MBB. If the def is in this MBB too then we + // should mark it dead: + if (DefMI->getParent() == MBB) { + DefMI->addRegisterDead(SrcInt.reg, tri_); + SrcLR->end = SrcLR->start + 1; + } + + } + + // CopyMI may have implicit operands, transfer them over to the newly + // rematerialized instruction. And update implicit def interval valnos. + for (unsigned i = CopyMI->getDesc().getNumOperands(), + e = CopyMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = CopyMI->getOperand(i); + if (MO.isReg() && MO.isImplicit()) + NewMI->addOperand(MO); + if (MO.isDef() && li_->hasInterval(MO.getReg())) { + unsigned Reg = MO.getReg(); + DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx); + if (DLR && DLR->valno->copy == CopyMI) + DLR->valno->copy = NULL; + } + } + + li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); + MBB->getParent()->DeleteMachineInstr(CopyMI); + ReMatCopies.insert(CopyMI); + ReMatDefs.insert(DefMI); + ++NumReMats; + return true; +} + +/// isBackEdgeCopy - Returns true if CopyMI is a back edge copy. +/// +bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI, + unsigned DstReg) const { + MachineBasicBlock *MBB = CopyMI->getParent(); + const MachineLoop *L = loopInfo->getLoopFor(MBB); + if (!L) + return false; + if (MBB != L->getLoopLatch()) + return false; + + LiveInterval &LI = li_->getInterval(DstReg); + unsigned DefIdx = li_->getInstructionIndex(CopyMI); + LiveInterval::const_iterator DstLR = + LI.FindLiveRangeContaining(li_->getDefIndex(DefIdx)); + if (DstLR == LI.end()) + return false; + unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1; + if (DstLR->valno->kills.size() == 1 && + DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill) + return true; + return false; +} + +/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and +/// update the subregister number if it is not zero. If DstReg is a +/// physical register and the existing subregister number of the def / use +/// being updated is not zero, make sure to set it to the correct physical +/// subregister. +void +SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, + unsigned SubIdx) { + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + if (DstIsPhys && SubIdx) { + // Figure out the real physical register we are updating with. + DstReg = tri_->getSubReg(DstReg, SubIdx); + SubIdx = 0; + } + + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg), + E = mri_->reg_end(); I != E; ) { + MachineOperand &O = I.getOperand(); + MachineInstr *UseMI = &*I; + ++I; + unsigned OldSubIdx = O.getSubReg(); + if (DstIsPhys) { + unsigned UseDstReg = DstReg; + if (OldSubIdx) + UseDstReg = tri_->getSubReg(DstReg, OldSubIdx); + + unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; + if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, + CopySrcSubIdx, CopyDstSubIdx) && + CopySrcReg != CopyDstReg && + CopySrcReg == SrcReg && CopyDstReg != UseDstReg) { + // If the use is a copy and it won't be coalesced away, and its source + // is defined by a trivial computation, try to rematerialize it instead. + if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,UseMI)) + continue; + } + + O.setReg(UseDstReg); + O.setSubReg(0); + continue; + } + + // Sub-register indexes goes from small to large. e.g. + // RAX: 1 -> AL, 2 -> AX, 3 -> EAX + // EAX: 1 -> AL, 2 -> AX + // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose + // sub-register 2 is also AX. + if (SubIdx && OldSubIdx && SubIdx != OldSubIdx) + assert(OldSubIdx < SubIdx && "Conflicting sub-register index!"); + else if (SubIdx) + O.setSubReg(SubIdx); + // Remove would-be duplicated kill marker. + if (O.isKill() && UseMI->killsRegister(DstReg)) + O.setIsKill(false); + O.setReg(DstReg); + + // After updating the operand, check if the machine instruction has + // become a copy. If so, update its val# information. + const TargetInstrDesc &TID = UseMI->getDesc(); + unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; + if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 && + tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, + CopySrcSubIdx, CopyDstSubIdx) && + CopySrcReg != CopyDstReg && + (TargetRegisterInfo::isVirtualRegister(CopyDstReg) || + allocatableRegs_[CopyDstReg])) { + LiveInterval &LI = li_->getInterval(CopyDstReg); + unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(UseMI)); + const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx); + if (DLR->valno->def == DefIdx) + DLR->valno->copy = UseMI; + } + } +} + +/// RemoveDeadImpDef - Remove implicit_def instructions which are "re-defining" +/// registers due to insert_subreg coalescing. e.g. +/// r1024 = op +/// r1025 = implicit_def +/// r1025 = insert_subreg r1025, r1024 +/// = op r1025 +/// => +/// r1025 = op +/// r1025 = implicit_def +/// r1025 = insert_subreg r1025, r1025 +/// = op r1025 +void +SimpleRegisterCoalescing::RemoveDeadImpDef(unsigned Reg, LiveInterval &LI) { + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), + E = mri_->reg_end(); I != E; ) { + MachineOperand &O = I.getOperand(); + MachineInstr *DefMI = &*I; + ++I; + if (!O.isDef()) + continue; + if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) + continue; + if (!LI.liveBeforeAndAt(li_->getInstructionIndex(DefMI))) + continue; + li_->RemoveMachineInstrFromMaps(DefMI); + DefMI->eraseFromParent(); + } +} + +/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate +/// due to live range lengthening as the result of coalescing. +void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg, + LiveInterval &LI) { + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), + UE = mri_->use_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + if (UseMO.isKill()) { + MachineInstr *UseMI = UseMO.getParent(); + unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); + const LiveRange *UI = LI.getLiveRangeContaining(UseIdx); + if (!UI || !LI.isKill(UI->valno, UseIdx+1)) + UseMO.setIsKill(false); + } + } +} + +/// removeIntervalIfEmpty - Check if the live interval of a physical register +/// is empty, if so remove it and also remove the empty intervals of its +/// sub-registers. Return true if live interval is removed. +static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, + const TargetRegisterInfo *tri_) { + if (li.empty()) { + if (TargetRegisterInfo::isPhysicalRegister(li.reg)) + for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &sli = li_->getInterval(*SR); + if (sli.empty()) + li_->removeInterval(*SR); + } + li_->removeInterval(li.reg); + return true; + } + return false; +} + +/// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. +/// Return true if live interval is removed. +bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li, + MachineInstr *CopyMI) { + unsigned CopyIdx = li_->getInstructionIndex(CopyMI); + LiveInterval::iterator MLR = + li.FindLiveRangeContaining(li_->getDefIndex(CopyIdx)); + if (MLR == li.end()) + return false; // Already removed by ShortenDeadCopySrcLiveRange. + unsigned RemoveStart = MLR->start; + unsigned RemoveEnd = MLR->end; + // Remove the liverange that's defined by this. + if (RemoveEnd == li_->getDefIndex(CopyIdx)+1) { + removeRange(li, RemoveStart, RemoveEnd, li_, tri_); + return removeIntervalIfEmpty(li, li_, tri_); + } + return false; +} + +/// RemoveDeadDef - If a def of a live interval is now determined dead, remove +/// the val# it defines. If the live interval becomes empty, remove it as well. +bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, + MachineInstr *DefMI) { + unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI)); + LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); + if (DefIdx != MLR->valno->def) + return false; + li.removeValNo(MLR->valno); + return removeIntervalIfEmpty(li, li_, tri_); +} + +/// PropagateDeadness - Propagate the dead marker to the instruction which +/// defines the val#. +static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, + unsigned &LRStart, LiveIntervals *li_, + const TargetRegisterInfo* tri_) { + MachineInstr *DefMI = + li_->getInstructionFromIndex(li_->getDefIndex(LRStart)); + if (DefMI && DefMI != CopyMI) { + int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false, tri_); + if (DeadIdx != -1) { + DefMI->getOperand(DeadIdx).setIsDead(); + // A dead def should have a single cycle interval. + ++LRStart; + } + } +} + +/// ShortenDeadCopySrcLiveRange - Shorten a live range as it's artificially +/// extended by a dead copy. Mark the last use (if any) of the val# as kill as +/// ends the live range there. If there isn't another use, then this live range +/// is dead. Return true if live interval is removed. +bool +SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, + MachineInstr *CopyMI) { + unsigned CopyIdx = li_->getInstructionIndex(CopyMI); + if (CopyIdx == 0) { + // FIXME: special case: function live in. It can be a general case if the + // first instruction index starts at > 0 value. + assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); + // Live-in to the function but dead. Remove it from entry live-in set. + if (mf_->begin()->isLiveIn(li.reg)) + mf_->begin()->removeLiveIn(li.reg); + const LiveRange *LR = li.getLiveRangeContaining(CopyIdx); + removeRange(li, LR->start, LR->end, li_, tri_); + return removeIntervalIfEmpty(li, li_, tri_); + } + + LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx-1); + if (LR == li.end()) + // Livein but defined by a phi. + return false; + + unsigned RemoveStart = LR->start; + unsigned RemoveEnd = li_->getDefIndex(CopyIdx)+1; + if (LR->end > RemoveEnd) + // More uses past this copy? Nothing to do. + return false; + + // If there is a last use in the same bb, we can't remove the live range. + // Shorten the live interval and return. + MachineBasicBlock *CopyMBB = CopyMI->getParent(); + if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR)) + return false; + + MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart); + if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_)) + // If the live range starts in another mbb and the copy mbb is not a fall + // through mbb, then we can only cut the range from the beginning of the + // copy mbb. + RemoveStart = li_->getMBBStartIdx(CopyMBB) + 1; + + if (LR->valno->def == RemoveStart) { + // If the def MI defines the val# and this copy is the only kill of the + // val#, then propagate the dead marker. + if (li.isOnlyLROfValNo(LR)) { + PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_); + ++numDeadValNo; + } + if (li.isKill(LR->valno, RemoveEnd)) + li.removeKill(LR->valno, RemoveEnd); + } + + removeRange(li, RemoveStart, RemoveEnd, li_, tri_); + return removeIntervalIfEmpty(li, li_, tri_); +} + +/// CanCoalesceWithImpDef - Returns true if the specified copy instruction +/// from an implicit def to another register can be coalesced away. +bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI, + LiveInterval &li, + LiveInterval &ImpLi) const{ + if (!CopyMI->killsRegister(ImpLi.reg)) + return false; + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx); + if (LR == li.end()) + return false; + if (LR->valno->hasPHIKill) + return false; + if (LR->valno->def != CopyIdx) + return false; + // Make sure all of val# uses are copies. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(li.reg), + UE = mri_->use_end(); UI != UE;) { + MachineInstr *UseMI = &*UI; + ++UI; + if (JoinedCopies.count(UseMI)) + continue; + unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); + LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx); + if (ULR == li.end() || ULR->valno != LR->valno) + continue; + // If the use is not a use, then it's not safe to coalesce the move. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + if (UseMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG && + UseMI->getOperand(1).getReg() == li.reg) + continue; + return false; + } + } + return true; +} + + +/// RemoveCopiesFromValNo - The specified value# is defined by an implicit +/// def and it is being removed. Turn all copies from this value# into +/// identity copies so they will be removed. +void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li, + VNInfo *VNI) { + SmallVector<MachineInstr*, 4> ImpDefs; + MachineOperand *LastUse = NULL; + unsigned LastUseIdx = li_->getUseIndex(VNI->def); + for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg), + RE = mri_->reg_end(); RI != RE;) { + MachineOperand *MO = &RI.getOperand(); + MachineInstr *MI = &*RI; + ++RI; + if (MO->isDef()) { + if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + ImpDefs.push_back(MI); + } + continue; + } + if (JoinedCopies.count(MI)) + continue; + unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(MI)); + LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx); + if (ULR == li.end() || ULR->valno != VNI) + continue; + // If the use is a copy, turn it into an identity copy. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + SrcReg == li.reg) { + // Each use MI may have multiple uses of this register. Change them all. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == li.reg) + MO.setReg(DstReg); + } + JoinedCopies.insert(MI); + } else if (UseIdx > LastUseIdx) { + LastUseIdx = UseIdx; + LastUse = MO; + } + } + if (LastUse) { + LastUse->setIsKill(); + li.addKill(VNI, LastUseIdx+1); + } else { + // Remove dead implicit_def's. + while (!ImpDefs.empty()) { + MachineInstr *ImpDef = ImpDefs.back(); + ImpDefs.pop_back(); + li_->RemoveMachineInstrFromMaps(ImpDef); + ImpDef->eraseFromParent(); + } + } +} + +/// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a +/// a virtual destination register with physical source register. +bool +SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, + MachineBasicBlock *CopyMBB, + LiveInterval &DstInt, + LiveInterval &SrcInt) { + // If the virtual register live interval is long but it has low use desity, + // do not join them, instead mark the physical register as its allocation + // preference. + const TargetRegisterClass *RC = mri_->getRegClass(DstInt.reg); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(DstInt); + if (Length > Threshold && + (((float)std::distance(mri_->use_begin(DstInt.reg), + mri_->use_end()) / Length) < (1.0 / Threshold))) + return false; + + // If the virtual register live interval extends into a loop, turn down + // aggressiveness. + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + const MachineLoop *L = loopInfo->getLoopFor(CopyMBB); + if (!L) { + // Let's see if the virtual register live interval extends into the loop. + LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx); + assert(DLR != DstInt.end() && "Live range not found!"); + DLR = DstInt.FindLiveRangeContaining(DLR->end+1); + if (DLR != DstInt.end()) { + CopyMBB = li_->getMBBFromIndex(DLR->start); + L = loopInfo->getLoopFor(CopyMBB); + } + } + + if (!L || Length <= Threshold) + return true; + + unsigned UseIdx = li_->getUseIndex(CopyIdx); + LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); + MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); + if (loopInfo->getLoopFor(SMBB) != L) { + if (!loopInfo->isLoopHeader(CopyMBB)) + return false; + // If vr's live interval extends pass the loop header, do not join. + for (MachineBasicBlock::succ_iterator SI = CopyMBB->succ_begin(), + SE = CopyMBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB == CopyMBB) + continue; + if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB), + li_->getMBBEndIdx(SuccMBB)+1)) + return false; + } + } + return true; +} + +/// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a +/// copy from a virtual source register to a physical destination register. +bool +SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, + MachineBasicBlock *CopyMBB, + LiveInterval &DstInt, + LiveInterval &SrcInt) { + // If the virtual register live interval is long but it has low use desity, + // do not join them, instead mark the physical register as its allocation + // preference. + const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(SrcInt); + if (Length > Threshold && + (((float)std::distance(mri_->use_begin(SrcInt.reg), + mri_->use_end()) / Length) < (1.0 / Threshold))) + return false; + + if (SrcInt.empty()) + // Must be implicit_def. + return false; + + // If the virtual register live interval is defined or cross a loop, turn + // down aggressiveness. + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + unsigned UseIdx = li_->getUseIndex(CopyIdx); + LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); + assert(SLR != SrcInt.end() && "Live range not found!"); + SLR = SrcInt.FindLiveRangeContaining(SLR->start-1); + if (SLR == SrcInt.end()) + return true; + MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); + const MachineLoop *L = loopInfo->getLoopFor(SMBB); + + if (!L || Length <= Threshold) + return true; + + if (loopInfo->getLoopFor(CopyMBB) != L) { + if (SMBB != L->getLoopLatch()) + return false; + // If vr's live interval is extended from before the loop latch, do not + // join. + for (MachineBasicBlock::pred_iterator PI = SMBB->pred_begin(), + PE = SMBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *PredMBB = *PI; + if (PredMBB == SMBB) + continue; + if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB), + li_->getMBBEndIdx(PredMBB)+1)) + return false; + } + } + return true; +} + +/// isWinToJoinCrossClass - Return true if it's profitable to coalesce +/// two virtual registers from different register classes. +bool +SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg, + unsigned SmallReg, + unsigned Threshold) { + // Then make sure the intervals are *short*. + LiveInterval &LargeInt = li_->getInterval(LargeReg); + LiveInterval &SmallInt = li_->getInterval(SmallReg); + unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt); + unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt); + if (SmallSize > Threshold || LargeSize > Threshold) + if ((float)std::distance(mri_->use_begin(SmallReg), + mri_->use_end()) / SmallSize < + (float)std::distance(mri_->use_begin(LargeReg), + mri_->use_end()) / LargeSize) + return false; + return true; +} + +/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual +/// register with a physical register, check if any of the virtual register +/// operand is a sub-register use or def. If so, make sure it won't result +/// in an illegal extract_subreg or insert_subreg instruction. e.g. +/// vr1024 = extract_subreg vr1025, 1 +/// ... +/// vr1024 = mov8rr AH +/// If vr1024 is coalesced with AH, the extract_subreg is now illegal since +/// AH does not have a super-reg whose sub-register 1 is AH. +bool +SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, + unsigned VirtReg, + unsigned PhysReg) { + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg), + E = mri_->reg_end(); I != E; ++I) { + MachineOperand &O = I.getOperand(); + MachineInstr *MI = &*I; + if (MI == CopyMI || JoinedCopies.count(MI)) + continue; + unsigned SubIdx = O.getSubReg(); + if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx)) + return true; + if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + SubIdx = MI->getOperand(2).getImm(); + if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx)) + return true; + if (O.isDef()) { + unsigned SrcReg = MI->getOperand(1).getReg(); + const TargetRegisterClass *RC = + TargetRegisterInfo::isPhysicalRegister(SrcReg) + ? tri_->getPhysicalRegisterRegClass(SrcReg) + : mri_->getRegClass(SrcReg); + if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC)) + return true; + } + } + if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + SubIdx = MI->getOperand(3).getImm(); + if (VirtReg == MI->getOperand(0).getReg()) { + if (!tri_->getSubReg(PhysReg, SubIdx)) + return true; + } else { + unsigned DstReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *RC = + TargetRegisterInfo::isPhysicalRegister(DstReg) + ? tri_->getPhysicalRegisterRegClass(DstReg) + : mri_->getRegClass(DstReg); + if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC)) + return true; + } + } + } + return false; +} + + +/// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce +/// an extract_subreg where dst is a physical register, e.g. +/// cl = EXTRACT_SUBREG reg1024, 1 +bool +SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, + unsigned SrcReg, unsigned SubIdx, + unsigned &RealDstReg) { + const TargetRegisterClass *RC = mri_->getRegClass(SrcReg); + RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC); + assert(RealDstReg && "Invalid extract_subreg instruction!"); + + // For this type of EXTRACT_SUBREG, conservatively + // check if the live interval of the source register interfere with the + // actual super physical register we are trying to coalesce with. + LiveInterval &RHS = li_->getInterval(SrcReg); + if (li_->hasInterval(RealDstReg) && + RHS.overlaps(li_->getInterval(RealDstReg))) { + DOUT << "Interfere with register "; + DEBUG(li_->getInterval(RealDstReg).print(DOUT, tri_)); + return false; // Not coalescable + } + for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR) + if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + return false; // Not coalescable + } + return true; +} + +/// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce +/// an insert_subreg where src is a physical register, e.g. +/// reg1024 = INSERT_SUBREG reg1024, c1, 0 +bool +SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, + unsigned SrcReg, unsigned SubIdx, + unsigned &RealSrcReg) { + const TargetRegisterClass *RC = mri_->getRegClass(DstReg); + RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC); + assert(RealSrcReg && "Invalid extract_subreg instruction!"); + + LiveInterval &RHS = li_->getInterval(DstReg); + if (li_->hasInterval(RealSrcReg) && + RHS.overlaps(li_->getInterval(RealSrcReg))) { + DOUT << "Interfere with register "; + DEBUG(li_->getInterval(RealSrcReg).print(DOUT, tri_)); + return false; // Not coalescable + } + for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR) + if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + return false; // Not coalescable + } + return true; +} + +/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// which are the src/dst of the copy instruction CopyMI. This returns true +/// if the copy was successfully coalesced away. If it is not currently +/// possible to coalesce this interval, but it may be possible if other +/// things get coalesced, then it returns true by reference in 'Again'. +bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { + MachineInstr *CopyMI = TheCopy.MI; + + Again = false; + if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) + return false; // Already done. + + DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI; + + unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0; + bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG; + bool isInsSubReg = CopyMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG; + bool isSubRegToReg = CopyMI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG; + unsigned SubIdx = 0; + if (isExtSubReg) { + DstReg = CopyMI->getOperand(0).getReg(); + DstSubIdx = CopyMI->getOperand(0).getSubReg(); + SrcReg = CopyMI->getOperand(1).getReg(); + SrcSubIdx = CopyMI->getOperand(2).getImm(); + } else if (isInsSubReg || isSubRegToReg) { + if (CopyMI->getOperand(2).getSubReg()) { + DOUT << "\tSource of insert_subreg is already coalesced " + << "to another register.\n"; + return false; // Not coalescable. + } + DstReg = CopyMI->getOperand(0).getReg(); + DstSubIdx = CopyMI->getOperand(3).getImm(); + SrcReg = CopyMI->getOperand(2).getReg(); + } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){ + assert(0 && "Unrecognized copy instruction!"); + return false; + } + + // If they are already joined we continue. + if (SrcReg == DstReg) { + DOUT << "\tCopy already coalesced.\n"; + return false; // Not coalescable. + } + + bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + + // If they are both physical registers, we cannot join them. + if (SrcIsPhys && DstIsPhys) { + DOUT << "\tCan not coalesce physregs.\n"; + return false; // Not coalescable. + } + + // We only join virtual registers with allocatable physical registers. + if (SrcIsPhys && !allocatableRegs_[SrcReg]) { + DOUT << "\tSrc reg is unallocatable physreg.\n"; + return false; // Not coalescable. + } + if (DstIsPhys && !allocatableRegs_[DstReg]) { + DOUT << "\tDst reg is unallocatable physreg.\n"; + return false; // Not coalescable. + } + + // Check that a physical source register is compatible with dst regclass + if (SrcIsPhys) { + unsigned SrcSubReg = SrcSubIdx ? + tri_->getSubReg(SrcReg, SrcSubIdx) : SrcReg; + const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); + const TargetRegisterClass *DstSubRC = DstRC; + if (DstSubIdx) + DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx); + assert(DstSubRC && "Illegal subregister index"); + if (!DstSubRC->contains(SrcSubReg)) { + DOUT << "\tIncompatible destination regclass: " + << tri_->getName(SrcSubReg) << " not in " << DstSubRC->getName() + << ".\n"; + return false; // Not coalescable. + } + } + + // Check that a physical dst register is compatible with source regclass + if (DstIsPhys) { + unsigned DstSubReg = DstSubIdx ? + tri_->getSubReg(DstReg, DstSubIdx) : DstReg; + const TargetRegisterClass *SrcRC = mri_->getRegClass(SrcReg); + const TargetRegisterClass *SrcSubRC = SrcRC; + if (SrcSubIdx) + SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx); + assert(SrcSubRC && "Illegal subregister index"); + if (!SrcSubRC->contains(DstReg)) { + DOUT << "\tIncompatible source regclass: " + << tri_->getName(DstSubReg) << " not in " << SrcSubRC->getName() + << ".\n"; + return false; // Not coalescable. + } + } + + // Should be non-null only when coalescing to a sub-register class. + bool CrossRC = false; + const TargetRegisterClass *NewRC = NULL; + MachineBasicBlock *CopyMBB = CopyMI->getParent(); + unsigned RealDstReg = 0; + unsigned RealSrcReg = 0; + if (isExtSubReg || isInsSubReg || isSubRegToReg) { + SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm(); + if (SrcIsPhys && isExtSubReg) { + // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be + // coalesced with AX. + unsigned DstSubIdx = CopyMI->getOperand(0).getSubReg(); + if (DstSubIdx) { + // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been + // coalesced to a larger register so the subreg indices cancel out. + if (DstSubIdx != SubIdx) { + DOUT << "\t Sub-register indices mismatch.\n"; + return false; // Not coalescable. + } + } else + SrcReg = tri_->getSubReg(SrcReg, SubIdx); + SubIdx = 0; + } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) { + // EAX = INSERT_SUBREG EAX, r1024, 0 + unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg(); + if (SrcSubIdx) { + // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been + // coalesced to a larger register so the subreg indices cancel out. + if (SrcSubIdx != SubIdx) { + DOUT << "\t Sub-register indices mismatch.\n"; + return false; // Not coalescable. + } + } else + DstReg = tri_->getSubReg(DstReg, SubIdx); + SubIdx = 0; + } else if ((DstIsPhys && isExtSubReg) || + (SrcIsPhys && (isInsSubReg || isSubRegToReg))) { + if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) { + DOUT << "\tSrc of extract_subreg already coalesced with reg" + << " of a super-class.\n"; + return false; // Not coalescable. + } + + if (isExtSubReg) { + if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg)) + return false; // Not coalescable + } else { + if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg)) + return false; // Not coalescable + } + SubIdx = 0; + } else { + unsigned OldSubIdx = isExtSubReg ? CopyMI->getOperand(0).getSubReg() + : CopyMI->getOperand(2).getSubReg(); + if (OldSubIdx) { + if (OldSubIdx == SubIdx && !differingRegisterClasses(SrcReg, DstReg)) + // r1024<2> = EXTRACT_SUBREG r1025, 2. Then r1024 has already been + // coalesced to a larger register so the subreg indices cancel out. + // Also check if the other larger register is of the same register + // class as the would be resulting register. + SubIdx = 0; + else { + DOUT << "\t Sub-register indices mismatch.\n"; + return false; // Not coalescable. + } + } + if (SubIdx) { + unsigned LargeReg = isExtSubReg ? SrcReg : DstReg; + unsigned SmallReg = isExtSubReg ? DstReg : SrcReg; + unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count(); + if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) { + Again = true; // May be possible to coalesce later. + return false; + } + } + } + } else if (differingRegisterClasses(SrcReg, DstReg)) { + if (!CrossClassJoin) + return false; + CrossRC = true; + + // FIXME: What if the result of a EXTRACT_SUBREG is then coalesced + // with another? If it's the resulting destination register, then + // the subidx must be propagated to uses (but only those defined + // by the EXTRACT_SUBREG). If it's being coalesced into another + // register, it should be safe because register is assumed to have + // the register class of the super-register. + + // Process moves where one of the registers have a sub-register index. + MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg); + MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg); + SubIdx = DstMO->getSubReg(); + if (SubIdx) { + if (SrcMO->getSubReg()) + // FIXME: can we handle this? + return false; + // This is not an insert_subreg but it looks like one. + // e.g. %reg1024:4 = MOV32rr %EAX + isInsSubReg = true; + if (SrcIsPhys) { + if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg)) + return false; // Not coalescable + SubIdx = 0; + } + } else { + SubIdx = SrcMO->getSubReg(); + if (SubIdx) { + // This is not a extract_subreg but it looks like one. + // e.g. %cl = MOV16rr %reg1024:1 + isExtSubReg = true; + if (DstIsPhys) { + if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg)) + return false; // Not coalescable + SubIdx = 0; + } + } + } + + const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); + const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); + unsigned LargeReg = SrcReg; + unsigned SmallReg = DstReg; + unsigned Limit = 0; + + // Now determine the register class of the joined register. + if (isExtSubReg) { + if (SubIdx && DstRC && DstRC->isASubClass()) { + // This is a move to a sub-register class. However, the source is a + // sub-register of a larger register class. We don't know what should + // the register class be. FIXME. + Again = true; + return false; + } + Limit = allocatableRCRegs_[DstRC].count(); + } else if (!SrcIsPhys && !DstIsPhys) { + NewRC = getCommonSubClass(SrcRC, DstRC); + if (!NewRC) { + DOUT << "\tDisjoint regclasses: " + << SrcRC->getName() << ", " + << DstRC->getName() << ".\n"; + return false; // Not coalescable. + } + if (DstRC->getSize() > SrcRC->getSize()) + std::swap(LargeReg, SmallReg); + } + + // If we are joining two virtual registers and the resulting register + // class is more restrictive (fewer register, smaller size). Check if it's + // worth doing the merge. + if (!SrcIsPhys && !DstIsPhys && + (isExtSubReg || DstRC->isASubClass()) && + !isWinToJoinCrossClass(LargeReg, SmallReg, + allocatableRCRegs_[NewRC].count())) { + DOUT << "\tSrc/Dest are different register classes.\n"; + // Allow the coalescer to try again in case either side gets coalesced to + // a physical register that's compatible with the other side. e.g. + // r1024 = MOV32to32_ r1025 + // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. + Again = true; // May be possible to coalesce later. + return false; + } + } + + // Will it create illegal extract_subreg / insert_subreg? + if (SrcIsPhys && HasIncompatibleSubRegDefUse(CopyMI, DstReg, SrcReg)) + return false; + if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg)) + return false; + + LiveInterval &SrcInt = li_->getInterval(SrcReg); + LiveInterval &DstInt = li_->getInterval(DstReg); + assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg && + "Register mapping is horribly broken!"); + + DOUT << "\t\tInspecting "; SrcInt.print(DOUT, tri_); + DOUT << " and "; DstInt.print(DOUT, tri_); + DOUT << ": "; + + // Save a copy of the virtual register live interval. We'll manually + // merge this into the "real" physical register live interval this is + // coalesced with. + LiveInterval *SavedLI = 0; + if (RealDstReg) + SavedLI = li_->dupInterval(&SrcInt); + else if (RealSrcReg) + SavedLI = li_->dupInterval(&DstInt); + + // Check if it is necessary to propagate "isDead" property. + if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) { + MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false); + bool isDead = mopd->isDead(); + + // We need to be careful about coalescing a source physical register with a + // virtual register. Once the coalescing is done, it cannot be broken and + // these are not spillable! If the destination interval uses are far away, + // think twice about coalescing them! + if (!isDead && (SrcIsPhys || DstIsPhys)) { + // If the copy is in a loop, take care not to coalesce aggressively if the + // src is coming in from outside the loop (or the dst is out of the loop). + // If it's not in a loop, then determine whether to join them base purely + // by the length of the interval. + if (PhysJoinTweak) { + if (SrcIsPhys) { + if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { + DstInt.preference = SrcReg; + ++numAborts; + DOUT << "\tMay tie down a physical register, abort!\n"; + Again = true; // May be possible to coalesce later. + return false; + } + } else { + if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { + SrcInt.preference = DstReg; + ++numAborts; + DOUT << "\tMay tie down a physical register, abort!\n"; + Again = true; // May be possible to coalesce later. + return false; + } + } + } else { + // If the virtual register live interval is long but it has low use desity, + // do not join them, instead mark the physical register as its allocation + // preference. + LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; + unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg; + unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; + const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + if (TheCopy.isBackEdge) + Threshold *= 2; // Favors back edge copies. + + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + float Ratio = 1.0 / Threshold; + if (Length > Threshold && + (((float)std::distance(mri_->use_begin(JoinVReg), + mri_->use_end()) / Length) < Ratio)) { + JoinVInt.preference = JoinPReg; + ++numAborts; + DOUT << "\tMay tie down a physical register, abort!\n"; + Again = true; // May be possible to coalesce later. + return false; + } + } + } + } + + // Okay, attempt to join these two intervals. On failure, this returns false. + // Otherwise, if one of the intervals being joined is a physreg, this method + // always canonicalizes DstInt to be it. The output "SrcInt" will not have + // been modified, so we can use this information below to update aliases. + bool Swapped = false; + // If SrcInt is implicitly defined, it's safe to coalesce. + bool isEmpty = SrcInt.empty(); + if (isEmpty && !CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) { + // Only coalesce an empty interval (defined by implicit_def) with + // another interval which has a valno defined by the CopyMI and the CopyMI + // is a kill of the implicit def. + DOUT << "Not profitable!\n"; + return false; + } + + if (!isEmpty && !JoinIntervals(DstInt, SrcInt, Swapped)) { + // Coalescing failed. + + // If definition of source is defined by trivial computation, try + // rematerializing it. + if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && + ReMaterializeTrivialDef(SrcInt, DstInt.reg, CopyMI)) + return true; + + // If we can eliminate the copy without merging the live ranges, do so now. + if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && + (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || + RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) { + JoinedCopies.insert(CopyMI); + return true; + } + + // Otherwise, we are unable to join the intervals. + DOUT << "Interference!\n"; + Again = true; // May be possible to coalesce later. + return false; + } + + LiveInterval *ResSrcInt = &SrcInt; + LiveInterval *ResDstInt = &DstInt; + if (Swapped) { + std::swap(SrcReg, DstReg); + std::swap(ResSrcInt, ResDstInt); + } + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + "LiveInterval::join didn't work right!"); + + // If we're about to merge live ranges into a physical register live interval, + // we have to update any aliased register's live ranges to indicate that they + // have clobbered values for this range. + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + // If this is a extract_subreg where dst is a physical register, e.g. + // cl = EXTRACT_SUBREG reg1024, 1 + // then create and update the actual physical register allocated to RHS. + if (RealDstReg || RealSrcReg) { + LiveInterval &RealInt = + li_->getOrCreateInterval(RealDstReg ? RealDstReg : RealSrcReg); + for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(), + E = SavedLI->vni_end(); I != E; ++I) { + const VNInfo *ValNo = *I; + VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy, + li_->getVNInfoAllocator()); + NewValNo->hasPHIKill = ValNo->hasPHIKill; + NewValNo->redefByEC = ValNo->redefByEC; + RealInt.addKills(NewValNo, ValNo->kills); + RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo); + } + RealInt.weight += SavedLI->weight; + DstReg = RealDstReg ? RealDstReg : RealSrcReg; + } + + // Update the liveintervals of sub-registers. + for (const unsigned *AS = tri_->getSubRegisters(DstReg); *AS; ++AS) + li_->getOrCreateInterval(*AS).MergeInClobberRanges(*ResSrcInt, + li_->getVNInfoAllocator()); + } + + // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the + // larger super-register. + if ((isExtSubReg || isInsSubReg || isSubRegToReg) && + !SrcIsPhys && !DstIsPhys) { + if ((isExtSubReg && !Swapped) || + ((isInsSubReg || isSubRegToReg) && Swapped)) { + ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator()); + std::swap(SrcReg, DstReg); + std::swap(ResSrcInt, ResDstInt); + } + } + + // Coalescing to a virtual register that is of a sub-register class of the + // other. Make sure the resulting register is set to the right register class. + if (CrossRC) { + ++numCrossRCs; + if (NewRC) + mri_->setRegClass(DstReg, NewRC); + } + + if (NewHeuristic) { + // Add all copies that define val# in the source interval into the queue. + for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(), + e = ResSrcInt->vni_end(); i != e; ++i) { + const VNInfo *vni = *i; + if (!vni->def || vni->def == ~1U || vni->def == ~0U) + continue; + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx; + if (CopyMI && + JoinedCopies.count(CopyMI) == 0 && + tii_->isMoveInstr(*CopyMI, NewSrcReg, NewDstReg, + NewSrcSubIdx, NewDstSubIdx)) { + unsigned LoopDepth = loopInfo->getLoopDepth(CopyMBB); + JoinQueue->push(CopyRec(CopyMI, LoopDepth, + isBackEdgeCopy(CopyMI, DstReg))); + } + } + } + + // Remember to delete the copy instruction. + JoinedCopies.insert(CopyMI); + + // Some live range has been lengthened due to colaescing, eliminate the + // unnecessary kills. + RemoveUnnecessaryKills(SrcReg, *ResDstInt); + if (TargetRegisterInfo::isVirtualRegister(DstReg)) + RemoveUnnecessaryKills(DstReg, *ResDstInt); + + if (isInsSubReg) + // Avoid: + // r1024 = op + // r1024 = implicit_def + // ... + // = r1024 + RemoveDeadImpDef(DstReg, *ResDstInt); + UpdateRegDefsUses(SrcReg, DstReg, SubIdx); + + // SrcReg is guarateed to be the register whose live interval that is + // being merged. + li_->removeInterval(SrcReg); + + // Manually deleted the live interval copy. + if (SavedLI) { + SavedLI->clear(); + delete SavedLI; + } + + if (isEmpty) { + // Now the copy is being coalesced away, the val# previously defined + // by the copy is being defined by an IMPLICIT_DEF which defines a zero + // length interval. Remove the val#. + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + const LiveRange *LR = ResDstInt->getLiveRangeContaining(CopyIdx); + VNInfo *ImpVal = LR->valno; + assert(ImpVal->def == CopyIdx); + unsigned NextDef = LR->end; + RemoveCopiesFromValNo(*ResDstInt, ImpVal); + ResDstInt->removeValNo(ImpVal); + LR = ResDstInt->FindLiveRangeContaining(NextDef); + if (LR != ResDstInt->end() && LR->valno->def == NextDef) { + // Special case: vr1024 = implicit_def + // vr1024 = insert_subreg vr1024, vr1025, c + // The insert_subreg becomes a "copy" that defines a val# which can itself + // be coalesced away. + MachineInstr *DefMI = li_->getInstructionFromIndex(NextDef); + if (DefMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) + LR->valno->copy = DefMI; + } + } + + // If resulting interval has a preference that no longer fits because of subreg + // coalescing, just clear the preference. + if (ResDstInt->preference && (isExtSubReg || isInsSubReg || isSubRegToReg) && + TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) { + const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg); + if (!RC->contains(ResDstInt->preference)) + ResDstInt->preference = 0; + } + + DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, tri_); + DOUT << "\n"; + + ++numJoins; + return true; +} + +/// ComputeUltimateVN - Assuming we are going to join two live intervals, +/// compute what the resultant value numbers for each value in the input two +/// ranges will be. This is complicated by copies between the two which can +/// and will commonly cause multiple value numbers to be merged into one. +/// +/// VN is the value number that we're trying to resolve. InstDefiningValue +/// keeps track of the new InstDefiningValue assignment for the result +/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of +/// whether a value in this or other is a copy from the opposite set. +/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have +/// already been assigned. +/// +/// ThisFromOther[x] - If x is defined as a copy from the other interval, this +/// contains the value number the copy is from. +/// +static unsigned ComputeUltimateVN(VNInfo *VNI, + SmallVector<VNInfo*, 16> &NewVNInfo, + DenseMap<VNInfo*, VNInfo*> &ThisFromOther, + DenseMap<VNInfo*, VNInfo*> &OtherFromThis, + SmallVector<int, 16> &ThisValNoAssignments, + SmallVector<int, 16> &OtherValNoAssignments) { + unsigned VN = VNI->id; + + // If the VN has already been computed, just return it. + if (ThisValNoAssignments[VN] >= 0) + return ThisValNoAssignments[VN]; +// assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?"); + + // If this val is not a copy from the other val, then it must be a new value + // number in the destination. + DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); + if (I == ThisFromOther.end()) { + NewVNInfo.push_back(VNI); + return ThisValNoAssignments[VN] = NewVNInfo.size()-1; + } + VNInfo *OtherValNo = I->second; + + // Otherwise, this *is* a copy from the RHS. If the other side has already + // been computed, return it. + if (OtherValNoAssignments[OtherValNo->id] >= 0) + return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; + + // Mark this value number as currently being computed, then ask what the + // ultimate value # of the other value is. + ThisValNoAssignments[VN] = -2; + unsigned UltimateVN = + ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, + OtherValNoAssignments, ThisValNoAssignments); + return ThisValNoAssignments[VN] = UltimateVN; +} + +static bool InVector(VNInfo *Val, const SmallVector<VNInfo*, 8> &V) { + return std::find(V.begin(), V.end(), Val) != V.end(); +} + +/// RangeIsDefinedByCopyFromReg - Return true if the specified live range of +/// the specified live interval is defined by a copy from the specified +/// register. +bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, + LiveRange *LR, + unsigned Reg) { + unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno); + if (SrcReg == Reg) + return true; + if (LR->valno->def == ~0U && + TargetRegisterInfo::isPhysicalRegister(li.reg) && + *tri_->getSuperRegisters(li.reg)) { + // It's a sub-register live interval, we may not have precise information. + // Re-compute it. + MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start); + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (DefMI && + tii_->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + DstReg == li.reg && SrcReg == Reg) { + // Cache computed info. + LR->valno->def = LR->start; + LR->valno->copy = DefMI; + return true; + } + } + return false; +} + +/// SimpleJoin - Attempt to joint the specified interval into this one. The +/// caller of this method must guarantee that the RHS only contains a single +/// value number and that the RHS is not defined by a copy from this +/// interval. This returns false if the intervals are not joinable, or it +/// joins them and returns true. +bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ + assert(RHS.containsOneValue()); + + // Some number (potentially more than one) value numbers in the current + // interval may be defined as copies from the RHS. Scan the overlapping + // portions of the LHS and RHS, keeping track of this and looking for + // overlapping live ranges that are NOT defined as copies. If these exist, we + // cannot coalesce. + + LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end(); + LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end(); + + if (LHSIt->start < RHSIt->start) { + LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start); + if (LHSIt != LHS.begin()) --LHSIt; + } else if (RHSIt->start < LHSIt->start) { + RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start); + if (RHSIt != RHS.begin()) --RHSIt; + } + + SmallVector<VNInfo*, 8> EliminatedLHSVals; + + while (1) { + // Determine if these live intervals overlap. + bool Overlaps = false; + if (LHSIt->start <= RHSIt->start) + Overlaps = LHSIt->end > RHSIt->start; + else + Overlaps = RHSIt->end > LHSIt->start; + + // If the live intervals overlap, there are two interesting cases: if the + // LHS interval is defined by a copy from the RHS, it's ok and we record + // that the LHS value # is the same as the RHS. If it's not, then we cannot + // coalesce these live ranges and we bail out. + if (Overlaps) { + // If we haven't already recorded that this value # is safe, check it. + if (!InVector(LHSIt->valno, EliminatedLHSVals)) { + // Copy from the RHS? + if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) + return false; // Nope, bail out. + + if (LHSIt->contains(RHSIt->valno->def)) + // Here is an interesting situation: + // BB1: + // vr1025 = copy vr1024 + // .. + // BB2: + // vr1024 = op + // = vr1025 + // Even though vr1025 is copied from vr1024, it's not safe to + // coalesce them since the live range of vr1025 intersects the + // def of vr1024. This happens because vr1025 is assigned the + // value of the previous iteration of vr1024. + return false; + EliminatedLHSVals.push_back(LHSIt->valno); + } + + // We know this entire LHS live range is okay, so skip it now. + if (++LHSIt == LHSEnd) break; + continue; + } + + if (LHSIt->end < RHSIt->end) { + if (++LHSIt == LHSEnd) break; + } else { + // One interesting case to check here. It's possible that we have + // something like "X3 = Y" which defines a new value number in the LHS, + // and is the last use of this liverange of the RHS. In this case, we + // want to notice this copy (so that it gets coalesced away) even though + // the live ranges don't actually overlap. + if (LHSIt->start == RHSIt->end) { + if (InVector(LHSIt->valno, EliminatedLHSVals)) { + // We already know that this value number is going to be merged in + // if coalescing succeeds. Just skip the liverange. + if (++LHSIt == LHSEnd) break; + } else { + // Otherwise, if this is a copy from the RHS, mark it as being merged + // in. + if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) { + if (LHSIt->contains(RHSIt->valno->def)) + // Here is an interesting situation: + // BB1: + // vr1025 = copy vr1024 + // .. + // BB2: + // vr1024 = op + // = vr1025 + // Even though vr1025 is copied from vr1024, it's not safe to + // coalesced them since live range of vr1025 intersects the + // def of vr1024. This happens because vr1025 is assigned the + // value of the previous iteration of vr1024. + return false; + EliminatedLHSVals.push_back(LHSIt->valno); + + // We know this entire LHS live range is okay, so skip it now. + if (++LHSIt == LHSEnd) break; + } + } + } + + if (++RHSIt == RHSEnd) break; + } + } + + // If we got here, we know that the coalescing will be successful and that + // the value numbers in EliminatedLHSVals will all be merged together. Since + // the most common case is that EliminatedLHSVals has a single number, we + // optimize for it: if there is more than one value, we merge them all into + // the lowest numbered one, then handle the interval as if we were merging + // with one value number. + VNInfo *LHSValNo = NULL; + if (EliminatedLHSVals.size() > 1) { + // Loop through all the equal value numbers merging them into the smallest + // one. + VNInfo *Smallest = EliminatedLHSVals[0]; + for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) { + if (EliminatedLHSVals[i]->id < Smallest->id) { + // Merge the current notion of the smallest into the smaller one. + LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]); + Smallest = EliminatedLHSVals[i]; + } else { + // Merge into the smallest. + LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest); + } + } + LHSValNo = Smallest; + } else if (EliminatedLHSVals.empty()) { + if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) && + *tri_->getSuperRegisters(LHS.reg)) + // Imprecise sub-register information. Can't handle it. + return false; + assert(0 && "No copies from the RHS?"); + } else { + LHSValNo = EliminatedLHSVals[0]; + } + + // Okay, now that there is a single LHS value number that we're merging the + // RHS into, update the value number info for the LHS to indicate that the + // value number is defined where the RHS value number was. + const VNInfo *VNI = RHS.getValNumInfo(0); + LHSValNo->def = VNI->def; + LHSValNo->copy = VNI->copy; + + // Okay, the final step is to loop over the RHS live intervals, adding them to + // the LHS. + LHSValNo->hasPHIKill |= VNI->hasPHIKill; + LHS.addKills(LHSValNo, VNI->kills); + LHS.MergeRangesInAsValue(RHS, LHSValNo); + LHS.weight += RHS.weight; + if (RHS.preference && !LHS.preference) + LHS.preference = RHS.preference; + + // Update the liveintervals of sub-registers. + if (TargetRegisterInfo::isPhysicalRegister(LHS.reg)) + for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS) + li_->getOrCreateInterval(*AS).MergeInClobberRanges(LHS, + li_->getVNInfoAllocator()); + + return true; +} + +/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. Otherwise, if one of the intervals being joined is a +/// physreg, this method always canonicalizes LHS to be it. The output +/// "RHS" will not have been modified, so we can use this information +/// below to update aliases. +bool +SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, + bool &Swapped) { + // Compute the final value assignment, assuming that the live ranges can be + // coalesced. + SmallVector<int, 16> LHSValNoAssignments; + SmallVector<int, 16> RHSValNoAssignments; + DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; + DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; + SmallVector<VNInfo*, 16> NewVNInfo; + + // If a live interval is a physical register, conservatively check if any + // of its sub-registers is overlapping the live interval of the virtual + // register. If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) && + *tri_->getSubRegisters(LHS.reg)) { + // If it's coalescing a virtual register to a physical register, estimate + // its live interval length. This is the *cost* of scanning an entire live + // interval. If the cost is low, we'll do an exhaustive check instead. + + // If this is something like this: + // BB1: + // v1024 = op + // ... + // BB2: + // ... + // RAX = v1024 + // + // That is, the live interval of v1024 crosses a bb. Then we can't rely on + // less conservative check. It's possible a sub-register is defined before + // v1024 (or live in) and live out of BB1. + if (RHS.containsOneValue() && + li_->intervalIsInOneMBB(RHS) && + li_->getApproximateInstructionCount(RHS) <= 10) { + // Perform a more exhaustive check for some common cases. + if (li_->conflictsWithPhysRegRef(RHS, LHS.reg, true, JoinedCopies)) + return false; + } else { + for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) + if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + return false; + } + } + } else if (TargetRegisterInfo::isPhysicalRegister(RHS.reg) && + *tri_->getSubRegisters(RHS.reg)) { + if (LHS.containsOneValue() && + li_->getApproximateInstructionCount(LHS) <= 10) { + // Perform a more exhaustive check for some common cases. + if (li_->conflictsWithPhysRegRef(LHS, RHS.reg, false, JoinedCopies)) + return false; + } else { + for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) + if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + return false; + } + } + } + + // Compute ultimate value numbers for the LHS and RHS values. + if (RHS.containsOneValue()) { + // Copies from a liveinterval with a single value are simple to handle and + // very common, handle the special case here. This is important, because + // often RHS is small and LHS is large (e.g. a physreg). + + // Find out if the RHS is defined as a copy from some value in the LHS. + int RHSVal0DefinedFromLHS = -1; + int RHSValID = -1; + VNInfo *RHSValNoInfo = NULL; + VNInfo *RHSValNoInfo0 = RHS.getValNumInfo(0); + unsigned RHSSrcReg = li_->getVNInfoSourceReg(RHSValNoInfo0); + if (RHSSrcReg == 0 || RHSSrcReg != LHS.reg) { + // If RHS is not defined as a copy from the LHS, we can use simpler and + // faster checks to see if the live ranges are coalescable. This joiner + // can't swap the LHS/RHS intervals though. + if (!TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { + return SimpleJoin(LHS, RHS); + } else { + RHSValNoInfo = RHSValNoInfo0; + } + } else { + // It was defined as a copy from the LHS, find out what value # it is. + RHSValNoInfo = LHS.getLiveRangeContaining(RHSValNoInfo0->def-1)->valno; + RHSValID = RHSValNoInfo->id; + RHSVal0DefinedFromLHS = RHSValID; + } + + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.resize(LHS.getNumValNums(), NULL); + + // Okay, *all* of the values in LHS that are defined as a copy from RHS + // should now get updated. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (unsigned LHSSrcReg = li_->getVNInfoSourceReg(VNI)) { + if (LHSSrcReg != RHS.reg) { + // If this is not a copy from the RHS, its value number will be + // unmodified by the coalescing. + NewVNInfo[VN] = VNI; + LHSValNoAssignments[VN] = VN; + } else if (RHSValID == -1) { + // Otherwise, it is a copy from the RHS, and we don't already have a + // value# for it. Keep the current value number, but remember it. + LHSValNoAssignments[VN] = RHSValID = VN; + NewVNInfo[VN] = RHSValNoInfo; + LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0; + } else { + // Otherwise, use the specified value #. + LHSValNoAssignments[VN] = RHSValID; + if (VN == (unsigned)RHSValID) { // Else this val# is dead. + NewVNInfo[VN] = RHSValNoInfo; + LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0; + } + } + } else { + NewVNInfo[VN] = VNI; + LHSValNoAssignments[VN] = VN; + } + } + + assert(RHSValID != -1 && "Didn't find value #?"); + RHSValNoAssignments[0] = RHSValID; + if (RHSVal0DefinedFromLHS != -1) { + // This path doesn't go through ComputeUltimateVN so just set + // it to anything. + RHSValsDefinedFromLHS[RHSValNoInfo0] = (VNInfo*)1; + } + } else { + // Loop over the value numbers of the LHS, seeing if any are defined from + // the RHS. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy? + continue; + + // DstReg is known to be a register in the LHS interval. If the src is + // from the RHS interval, we can use its value #. + if (li_->getVNInfoSourceReg(VNI) != RHS.reg) + continue; + + // Figure out the value # from the RHS. + LHSValsDefinedFromRHS[VNI]=RHS.getLiveRangeContaining(VNI->def-1)->valno; + } + + // Loop over the value numbers of the RHS, seeing if any are defined from + // the LHS. + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy? + continue; + + // DstReg is known to be a register in the RHS interval. If the src is + // from the LHS interval, we can use its value #. + if (li_->getVNInfoSourceReg(VNI) != LHS.reg) + continue; + + // Figure out the value # from the LHS. + RHSValsDefinedFromLHS[VNI]=LHS.getLiveRangeContaining(VNI->def-1)->valno; + } + + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); + + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (LHSValNoAssignments[VN] >= 0 || VNI->def == ~1U) + continue; + ComputeUltimateVN(VNI, NewVNInfo, + LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, + LHSValNoAssignments, RHSValNoAssignments); + } + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (RHSValNoAssignments[VN] >= 0 || VNI->def == ~1U) + continue; + // If this value number isn't a copy from the LHS, it's a new number. + if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { + NewVNInfo.push_back(VNI); + RHSValNoAssignments[VN] = NewVNInfo.size()-1; + continue; + } + + ComputeUltimateVN(VNI, NewVNInfo, + RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, + RHSValNoAssignments, LHSValNoAssignments); + } + } + + // Armed with the mappings of LHS/RHS values to ultimate values, walk the + // interval lists to see if these intervals are coalescable. + LiveInterval::const_iterator I = LHS.begin(); + LiveInterval::const_iterator IE = LHS.end(); + LiveInterval::const_iterator J = RHS.begin(); + LiveInterval::const_iterator JE = RHS.end(); + + // Skip ahead until the first place of potential sharing. + if (I->start < J->start) { + I = std::upper_bound(I, IE, J->start); + if (I != LHS.begin()) --I; + } else if (J->start < I->start) { + J = std::upper_bound(J, JE, I->start); + if (J != RHS.begin()) --J; + } + + while (1) { + // Determine if these two live ranges overlap. + bool Overlaps; + if (I->start < J->start) { + Overlaps = I->end > J->start; + } else { + Overlaps = J->end > I->start; + } + + // If so, check value # info to determine if they are really different. + if (Overlaps) { + // If the live range overlap will map to the same value number in the + // result liverange, we can still coalesce them. If not, we can't. + if (LHSValNoAssignments[I->valno->id] != + RHSValNoAssignments[J->valno->id]) + return false; + } + + if (I->end < J->end) { + ++I; + if (I == IE) break; + } else { + ++J; + if (J == JE) break; + } + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), + E = LHSValsDefinedFromRHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned LHSValID = LHSValNoAssignments[VNI->id]; + LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def); + NewVNInfo[LHSValID]->hasPHIKill |= VNI->hasPHIKill; + RHS.addKills(NewVNInfo[LHSValID], VNI->kills); + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), + E = RHSValsDefinedFromLHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned RHSValID = RHSValNoAssignments[VNI->id]; + LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def); + NewVNInfo[RHSValID]->hasPHIKill |= VNI->hasPHIKill; + LHS.addKills(NewVNInfo[RHSValID], VNI->kills); + } + + // If we get here, we know that we can coalesce the live ranges. Ask the + // intervals to coalesce themselves now. + if ((RHS.ranges.size() > LHS.ranges.size() && + TargetRegisterInfo::isVirtualRegister(LHS.reg)) || + TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { + RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo); + Swapped = true; + } else { + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo); + Swapped = false; + } + return true; +} + +namespace { + // DepthMBBCompare - Comparison predicate that sort first based on the loop + // depth of the basic block (the unsigned), and then on the MBB number. + struct DepthMBBCompare { + typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; + bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { + if (LHS.first > RHS.first) return true; // Deeper loops first + return LHS.first == RHS.first && + LHS.second->getNumber() < RHS.second->getNumber(); + } + }; +} + +/// getRepIntervalSize - Returns the size of the interval that represents the +/// specified register. +template<class SF> +unsigned JoinPriorityQueue<SF>::getRepIntervalSize(unsigned Reg) { + return Rc->getRepIntervalSize(Reg); +} + +/// CopyRecSort::operator - Join priority queue sorting function. +/// +bool CopyRecSort::operator()(CopyRec left, CopyRec right) const { + // Inner loops first. + if (left.LoopDepth > right.LoopDepth) + return false; + else if (left.LoopDepth == right.LoopDepth) + if (left.isBackEdge && !right.isBackEdge) + return false; + return true; +} + +void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, + std::vector<CopyRec> &TryAgain) { + DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n"; + + std::vector<CopyRec> VirtCopies; + std::vector<CopyRec> PhysCopies; + std::vector<CopyRec> ImpDefCopies; + unsigned LoopDepth = loopInfo->getLoopDepth(MBB); + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E;) { + MachineInstr *Inst = MII++; + + // If this isn't a copy nor a extract_subreg, we can't join intervals. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(1).getReg(); + } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(2).getReg(); + } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + continue; + + bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + if (NewHeuristic) { + JoinQueue->push(CopyRec(Inst, LoopDepth, isBackEdgeCopy(Inst, DstReg))); + } else { + if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + ImpDefCopies.push_back(CopyRec(Inst, 0, false)); + else if (SrcIsPhys || DstIsPhys) + PhysCopies.push_back(CopyRec(Inst, 0, false)); + else + VirtCopies.push_back(CopyRec(Inst, 0, false)); + } + } + + if (NewHeuristic) + return; + + // Try coalescing implicit copies first, followed by copies to / from + // physical registers, then finally copies from virtual registers to + // virtual registers. + for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { + CopyRec &TheCopy = ImpDefCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { + CopyRec &TheCopy = PhysCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { + CopyRec &TheCopy = VirtCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } +} + +void SimpleRegisterCoalescing::joinIntervals() { + DOUT << "********** JOINING INTERVALS ***********\n"; + + if (NewHeuristic) + JoinQueue = new JoinPriorityQueue<CopyRecSort>(this); + + std::vector<CopyRec> TryAgainList; + if (loopInfo->empty()) { + // If there are no loops in the function, join intervals in function order. + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); + I != E; ++I) + CopyCoalesceInMBB(I, TryAgainList); + } else { + // Otherwise, join intervals in inner loops before other intervals. + // Unfortunately we can't just iterate over loop hierarchy here because + // there may be more MBB's than BB's. Collect MBB's for sorting. + + // Join intervals in the function prolog first. We want to join physical + // registers with virtual registers before the intervals got too long. + std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){ + MachineBasicBlock *MBB = I; + MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I)); + } + + // Sort by loop depth. + std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); + + // Finally, join intervals in loop nest order. + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) + CopyCoalesceInMBB(MBBs[i].second, TryAgainList); + } + + // Joining intervals can allow other intervals to be joined. Iteratively join + // until we make no progress. + if (NewHeuristic) { + SmallVector<CopyRec, 16> TryAgain; + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; + while (!JoinQueue->empty()) { + CopyRec R = JoinQueue->pop(); + bool Again = false; + bool Success = JoinCopy(R, Again); + if (Success) + ProgressMade = true; + else if (Again) + TryAgain.push_back(R); + } + + if (ProgressMade) { + while (!TryAgain.empty()) { + JoinQueue->push(TryAgain.back()); + TryAgain.pop_back(); + } + } + } + } else { + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; + + for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { + CopyRec &TheCopy = TryAgainList[i]; + if (TheCopy.MI) { + bool Again = false; + bool Success = JoinCopy(TheCopy, Again); + if (Success || !Again) { + TheCopy.MI = 0; // Mark this one as done. + ProgressMade = true; + } + } + } + } + } + + if (NewHeuristic) + delete JoinQueue; +} + +/// Return true if the two specified registers belong to different register +/// classes. The registers may be either phys or virt regs. +bool +SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA, + unsigned RegB) const { + // Get the register classes for the first reg. + if (TargetRegisterInfo::isPhysicalRegister(RegA)) { + assert(TargetRegisterInfo::isVirtualRegister(RegB) && + "Shouldn't consider two physregs!"); + return !mri_->getRegClass(RegB)->contains(RegA); + } + + // Compare against the regclass for the second reg. + const TargetRegisterClass *RegClassA = mri_->getRegClass(RegA); + if (TargetRegisterInfo::isVirtualRegister(RegB)) { + const TargetRegisterClass *RegClassB = mri_->getRegClass(RegB); + return RegClassA != RegClassB; + } + return !RegClassA->contains(RegB); +} + +/// lastRegisterUse - Returns the last use of the specific register between +/// cycles Start and End or NULL if there are no uses. +MachineOperand * +SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, + unsigned Reg, unsigned &UseIdx) const{ + UseIdx = 0; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + MachineOperand *LastUse = NULL; + for (MachineRegisterInfo::use_iterator I = mri_->use_begin(Reg), + E = mri_->use_end(); I != E; ++I) { + MachineOperand &Use = I.getOperand(); + MachineInstr *UseMI = Use.getParent(); + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + SrcReg == DstReg) + // Ignore identity copies. + continue; + unsigned Idx = li_->getInstructionIndex(UseMI); + if (Idx >= Start && Idx < End && Idx >= UseIdx) { + LastUse = &Use; + UseIdx = li_->getUseIndex(Idx); + } + } + return LastUse; + } + + int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM; + int s = Start; + while (e >= s) { + // Skip deleted instructions + MachineInstr *MI = li_->getInstructionFromIndex(e); + while ((e - InstrSlots::NUM) >= s && !MI) { + e -= InstrSlots::NUM; + MI = li_->getInstructionFromIndex(e); + } + if (e < s || MI == NULL) + return NULL; + + // Ignore identity copies. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + SrcReg == DstReg)) + for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { + MachineOperand &Use = MI->getOperand(i); + if (Use.isReg() && Use.isUse() && Use.getReg() && + tri_->regsOverlap(Use.getReg(), Reg)) { + UseIdx = li_->getUseIndex(e); + return &Use; + } + } + + e -= InstrSlots::NUM; + } + + return NULL; +} + + +void SimpleRegisterCoalescing::printRegName(unsigned reg) const { + if (TargetRegisterInfo::isPhysicalRegister(reg)) + cerr << tri_->getName(reg); + else + cerr << "%reg" << reg; +} + +void SimpleRegisterCoalescing::releaseMemory() { + JoinedCopies.clear(); + ReMatCopies.clear(); + ReMatDefs.clear(); +} + +static bool isZeroLengthInterval(LiveInterval *li) { + for (LiveInterval::Ranges::const_iterator + i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) + if (i->end - i->start > LiveInterval::InstrSlots::NUM) + return false; + return true; +} + +/// TurnCopyIntoImpDef - If source of the specified copy is an implicit def, +/// turn the copy into an implicit def. +bool +SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I, + MachineBasicBlock *MBB, + unsigned DstReg, unsigned SrcReg) { + MachineInstr *CopyMI = &*I; + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + if (!li_->hasInterval(SrcReg)) + return false; + LiveInterval &SrcInt = li_->getInterval(SrcReg); + if (!SrcInt.empty()) + return false; + if (!li_->hasInterval(DstReg)) + return false; + LiveInterval &DstInt = li_->getInterval(DstReg); + const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx); + DstInt.removeValNo(DstLR->valno); + CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); + for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i) + CopyMI->RemoveOperand(i); + bool NoUse = mri_->use_empty(SrcReg); + if (NoUse) { + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg), + E = mri_->reg_end(); I != E; ) { + assert(I.getOperand().isDef()); + MachineInstr *DefMI = &*I; + ++I; + // The implicit_def source has no other uses, delete it. + assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF); + li_->RemoveMachineInstrFromMaps(DefMI); + DefMI->eraseFromParent(); + } + } + ++I; + return true; +} + + +bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &fn.getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + li_ = &getAnalysis<LiveIntervals>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + DOUT << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'; + + allocatableRegs_ = tri_->getAllocatableSet(fn); + for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(), + E = tri_->regclass_end(); I != E; ++I) + allocatableRCRegs_.insert(std::make_pair(*I, + tri_->getAllocatableSet(fn, *I))); + + // Join (coalesce) intervals if requested. + if (EnableJoining) { + joinIntervals(); + DEBUG({ + DOUT << "********** INTERVALS POST JOINING **********\n"; + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){ + I->second->print(DOUT, tri_); + DOUT << "\n"; + } + }); + } + + // Perform a final pass over the instructions and compute spill weights + // and remove identity moves. + SmallVector<unsigned, 4> DeadDefs; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* mbb = mbbi; + unsigned loopDepth = loopInfo->getLoopDepth(mbb); + + for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); + mii != mie; ) { + MachineInstr *MI = mii; + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (JoinedCopies.count(MI)) { + // Delete all coalesced copies. + if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) && + "Unrecognized copy instruction"); + DstReg = MI->getOperand(0).getReg(); + } + if (MI->registerDefIsDead(DstReg)) { + LiveInterval &li = li_->getInterval(DstReg); + if (!ShortenDeadCopySrcLiveRange(li, MI)) + ShortenDeadCopyLiveRange(li, MI); + } + li_->RemoveMachineInstrFromMaps(MI); + mii = mbbi->erase(mii); + ++numPeep; + continue; + } + + // Now check if this is a remat'ed def instruction which is now dead. + if (ReMatDefs.count(MI)) { + bool isDead = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + DeadDefs.push_back(Reg); + if (MO.isDead()) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !mri_->use_empty(Reg)) { + isDead = false; + break; + } + } + if (isDead) { + while (!DeadDefs.empty()) { + unsigned DeadDef = DeadDefs.back(); + DeadDefs.pop_back(); + RemoveDeadDef(li_->getInterval(DeadDef), MI); + } + li_->RemoveMachineInstrFromMaps(mii); + mii = mbbi->erase(mii); + continue; + } else + DeadDefs.clear(); + } + + // If the move will be an identity move delete it + bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); + if (isMove && SrcReg == DstReg) { + if (li_->hasInterval(SrcReg)) { + LiveInterval &RegInt = li_->getInterval(SrcReg); + // If def of this move instruction is dead, remove its live range + // from the dstination register's live interval. + if (MI->registerDefIsDead(DstReg)) { + if (!ShortenDeadCopySrcLiveRange(RegInt, MI)) + ShortenDeadCopyLiveRange(RegInt, MI); + } + } + li_->RemoveMachineInstrFromMaps(MI); + mii = mbbi->erase(mii); + ++numPeep; + } else if (!isMove || !TurnCopyIntoImpDef(mii, mbb, DstReg, SrcReg)) { + SmallSet<unsigned, 4> UniqueUses; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &mop = MI->getOperand(i); + if (mop.isReg() && mop.getReg() && + TargetRegisterInfo::isVirtualRegister(mop.getReg())) { + unsigned reg = mop.getReg(); + // Multiple uses of reg by the same instruction. It should not + // contribute to spill weight again. + if (UniqueUses.count(reg) != 0) + continue; + LiveInterval &RegInt = li_->getInterval(reg); + RegInt.weight += + li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth); + UniqueUses.insert(reg); + } + } + ++mii; + } + } + } + + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { + LiveInterval &LI = *I->second; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // If the live interval length is essentially zero, i.e. in every live + // range the use follows def immediately, it doesn't make sense to spill + // it and hope it will be easier to allocate for this li. + if (isZeroLengthInterval(&LI)) + LI.weight = HUGE_VALF; + else { + bool isLoad = false; + SmallVector<LiveInterval*, 4> SpillIs; + if (li_->isReMaterializable(LI, SpillIs, isLoad)) { + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. If non of the defs are + // loads, then it's potentially very cheap to re-materialize. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + if (isLoad) + LI.weight *= 0.9F; + else + LI.weight *= 0.5F; + } + } + + // Slightly prefer live interval that has been assigned a preferred reg. + if (LI.preference) + LI.weight *= 1.01F; + + // Divide the weight of the interval by its size. This encourages + // spilling of intervals that are large and have few uses, and + // discourages spilling of small intervals with many uses. + LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; + } + } + + DEBUG(dump()); + return true; +} + +/// print - Implement the dump method. +void SimpleRegisterCoalescing::print(std::ostream &O, const Module* m) const { + li_->print(O, m); +} + +RegisterCoalescer* llvm::createSimpleRegisterCoalescer() { + return new SimpleRegisterCoalescing(); +} + +// Make sure that anything that uses RegisterCoalescer pulls in this file... +DEFINING_FILE_FOR(SimpleRegisterCoalescing) diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h new file mode 100644 index 0000000..a495bfd --- /dev/null +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -0,0 +1,313 @@ +//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple register copy coalescing phase. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H +#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/ADT/BitVector.h" +#include <queue> + +namespace llvm { + class SimpleRegisterCoalescing; + class LiveVariables; + class TargetRegisterInfo; + class TargetInstrInfo; + class VirtRegMap; + class MachineLoopInfo; + + /// CopyRec - Representation for copy instructions in coalescer queue. + /// + struct CopyRec { + MachineInstr *MI; + unsigned LoopDepth; + bool isBackEdge; + CopyRec(MachineInstr *mi, unsigned depth, bool be) + : MI(mi), LoopDepth(depth), isBackEdge(be) {}; + }; + + template<class SF> class JoinPriorityQueue; + + /// CopyRecSort - Sorting function for coalescer queue. + /// + struct CopyRecSort : public std::binary_function<CopyRec,CopyRec,bool> { + JoinPriorityQueue<CopyRecSort> *JPQ; + explicit CopyRecSort(JoinPriorityQueue<CopyRecSort> *jpq) : JPQ(jpq) {} + CopyRecSort(const CopyRecSort &RHS) : JPQ(RHS.JPQ) {} + bool operator()(CopyRec left, CopyRec right) const; + }; + + /// JoinQueue - A priority queue of copy instructions the coalescer is + /// going to process. + template<class SF> + class JoinPriorityQueue { + SimpleRegisterCoalescing *Rc; + std::priority_queue<CopyRec, std::vector<CopyRec>, SF> Queue; + + public: + explicit JoinPriorityQueue(SimpleRegisterCoalescing *rc) + : Rc(rc), Queue(SF(this)) {} + + bool empty() const { return Queue.empty(); } + void push(CopyRec R) { Queue.push(R); } + CopyRec pop() { + if (empty()) return CopyRec(0, 0, false); + CopyRec R = Queue.top(); + Queue.pop(); + return R; + } + + // Callbacks to SimpleRegisterCoalescing. + unsigned getRepIntervalSize(unsigned Reg); + }; + + class SimpleRegisterCoalescing : public MachineFunctionPass, + public RegisterCoalescer { + MachineFunction* mf_; + MachineRegisterInfo* mri_; + const TargetMachine* tm_; + const TargetRegisterInfo* tri_; + const TargetInstrInfo* tii_; + LiveIntervals *li_; + const MachineLoopInfo* loopInfo; + + BitVector allocatableRegs_; + DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_; + + /// JoinQueue - A priority queue of copy instructions the coalescer is + /// going to process. + JoinPriorityQueue<CopyRecSort> *JoinQueue; + + /// JoinedCopies - Keep track of copies eliminated due to coalescing. + /// + SmallPtrSet<MachineInstr*, 32> JoinedCopies; + + /// ReMatCopies - Keep track of copies eliminated due to remat. + /// + SmallPtrSet<MachineInstr*, 32> ReMatCopies; + + /// ReMatDefs - Keep track of definition instructions which have + /// been remat'ed. + SmallPtrSet<MachineInstr*, 8> ReMatDefs; + + public: + static char ID; // Pass identifcation, replacement for typeid + SimpleRegisterCoalescing() : MachineFunctionPass(&ID) {} + + struct InstrSlots { + enum { + LOAD = 0, + USE = 1, + DEF = 2, + STORE = 3, + NUM = 4 + }; + }; + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual void releaseMemory(); + + /// runOnMachineFunction - pass entry point + virtual bool runOnMachineFunction(MachineFunction&); + + bool coalesceFunction(MachineFunction &mf, RegallocQuery &) { + // This runs as an independent pass, so don't do anything. + return false; + }; + + /// getRepIntervalSize - Called from join priority queue sorting function. + /// It returns the size of the interval that represent the given register. + unsigned getRepIntervalSize(unsigned Reg) { + if (!li_->hasInterval(Reg)) + return 0; + return li_->getApproximateInstructionCount(li_->getInterval(Reg)) * + LiveInterval::InstrSlots::NUM; + } + + /// print - Implement the dump method. + virtual void print(std::ostream &O, const Module* = 0) const; + void print(std::ostream *O, const Module* M = 0) const { + if (O) print(*O, M); + } + + private: + /// joinIntervals - join compatible live intervals + void joinIntervals(); + + /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting + /// copies that cannot yet be coalesced into the "TryAgain" list. + void CopyCoalesceInMBB(MachineBasicBlock *MBB, + std::vector<CopyRec> &TryAgain); + + /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, + /// which are the src/dst of the copy instruction CopyMI. This returns true + /// if the copy was successfully coalesced away. If it is not currently + /// possible to coalesce this interval, but it may be possible if other + /// things get coalesced, then it returns true by reference in 'Again'. + bool JoinCopy(CopyRec &TheCopy, bool &Again); + + /// JoinIntervals - Attempt to join these two intervals. On failure, this + /// returns false. Otherwise, if one of the intervals being joined is a + /// physreg, this method always canonicalizes DestInt to be it. The output + /// "SrcInt" will not have been modified, so we can use this information + /// below to update aliases. + bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped); + + /// SimpleJoin - Attempt to join the specified interval into this one. The + /// caller of this method must guarantee that the RHS only contains a single + /// value number and that the RHS is not defined by a copy from this + /// interval. This returns false if the intervals are not joinable, or it + /// joins them and returns true. + bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS); + + /// Return true if the two specified registers belong to different register + /// classes. The registers may be either phys or virt regs. + bool differingRegisterClasses(unsigned RegA, unsigned RegB) const; + + + /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If + /// the source value number is defined by a copy from the destination reg + /// see if we can merge these two destination reg valno# into a single + /// value number, eliminating a copy. + bool AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB, + MachineInstr *CopyMI); + + /// HasOtherReachingDefs - Return true if there are definitions of IntB + /// other than BValNo val# that can reach uses of AValno val# of IntA. + bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, + VNInfo *AValNo, VNInfo *BValNo); + + /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy. + /// If the source value number is defined by a commutable instruction and + /// its other operand is coalesced to the copy dest register, see if we + /// can transform the copy into a noop by commuting the definition. + bool RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB, + MachineInstr *CopyMI); + + /// TrimLiveIntervalToLastUse - If there is a last use in the same basic + /// block as the copy instruction, trim the ive interval to the last use + /// and return true. + bool TrimLiveIntervalToLastUse(unsigned CopyIdx, + MachineBasicBlock *CopyMBB, + LiveInterval &li, const LiveRange *LR); + + /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial + /// computation, replace the copy by rematerialize the definition. + bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, + MachineInstr *CopyMI); + + /// TurnCopyIntoImpDef - If source of the specified copy is an implicit def, + /// turn the copy into an implicit def. + bool TurnCopyIntoImpDef(MachineBasicBlock::iterator &I, + MachineBasicBlock *MBB, + unsigned DstReg, unsigned SrcReg); + + /// CanCoalesceWithImpDef - Returns true if the specified copy instruction + /// from an implicit def to another register can be coalesced away. + bool CanCoalesceWithImpDef(MachineInstr *CopyMI, + LiveInterval &li, LiveInterval &ImpLi) const; + + /// RemoveCopiesFromValNo - The specified value# is defined by an implicit + /// def and it is being removed. Turn all copies from this value# into + /// identity copies so they will be removed. + void RemoveCopiesFromValNo(LiveInterval &li, VNInfo *VNI); + + /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a + /// a virtual destination register with physical source register. + bool isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, + MachineBasicBlock *CopyMBB, + LiveInterval &DstInt, LiveInterval &SrcInt); + + /// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a + /// copy from a virtual source register to a physical destination register. + bool isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, + MachineBasicBlock *CopyMBB, + LiveInterval &DstInt, LiveInterval &SrcInt); + + /// isWinToJoinCrossClass - Return true if it's profitable to coalesce + /// two virtual registers from different register classes. + bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg, + unsigned Threshold); + + /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual + /// register with a physical register, check if any of the virtual register + /// operand is a sub-register use or def. If so, make sure it won't result + /// in an illegal extract_subreg or insert_subreg instruction. + bool HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, + unsigned VirtReg, unsigned PhysReg); + + /// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce + /// an extract_subreg where dst is a physical register, e.g. + /// cl = EXTRACT_SUBREG reg1024, 1 + bool CanJoinExtractSubRegToPhysReg(unsigned DstReg, unsigned SrcReg, + unsigned SubIdx, unsigned &RealDstReg); + + /// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce + /// an insert_subreg where src is a physical register, e.g. + /// reg1024 = INSERT_SUBREG reg1024, c1, 0 + bool CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned SrcReg, + unsigned SubIdx, unsigned &RealDstReg); + + /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of + /// the specified live interval is defined by a copy from the specified + /// register. + bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR, + unsigned Reg); + + /// isBackEdgeCopy - Return true if CopyMI is a back edge copy. + /// + bool isBackEdgeCopy(MachineInstr *CopyMI, unsigned DstReg) const; + + /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and + /// update the subregister number if it is not zero. If DstReg is a + /// physical register and the existing subregister number of the def / use + /// being updated is not zero, make sure to set it to the correct physical + /// subregister. + void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); + + /// RemoveDeadImpDef - Remove implicit_def instructions which are + /// "re-defining" registers due to insert_subreg coalescing. e.g. + void RemoveDeadImpDef(unsigned Reg, LiveInterval &LI); + + /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate + /// due to live range lengthening as the result of coalescing. + void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI); + + /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. + /// Return true if live interval is removed. + bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI); + + /// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially + /// extended by a dead copy. Mark the last use (if any) of the val# as kill + /// as ends the live range there. If there isn't another use, then this + /// live range is dead. Return true if live interval is removed. + bool ShortenDeadCopySrcLiveRange(LiveInterval &li, MachineInstr *CopyMI); + + /// RemoveDeadDef - If a def of a live interval is now determined dead, + /// remove the val# it defines. If the live interval becomes empty, remove + /// it as well. + bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); + + /// lastRegisterUse - Returns the last use of the specific register between + /// cycles Start and End or NULL if there are no uses. + MachineOperand *lastRegisterUse(unsigned Start, unsigned End, unsigned Reg, + unsigned &LastUseIdx) const; + + void printRegName(unsigned reg) const; + }; + +} // End llvm namespace + +#endif diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp new file mode 100644 index 0000000..ce63121 --- /dev/null +++ b/lib/CodeGen/Spiller.cpp @@ -0,0 +1,229 @@ +//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "spiller" + +#include "Spiller.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +Spiller::~Spiller() {} + +namespace { + +/// Utility class for spillers. +class SpillerBase : public Spiller { +protected: + + MachineFunction *mf; + LiveIntervals *lis; + LiveStacks *ls; + MachineFrameInfo *mfi; + MachineRegisterInfo *mri; + const TargetInstrInfo *tii; + VirtRegMap *vrm; + + /// Construct a spiller base. + SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) : + mf(mf), lis(lis), ls(ls), vrm(vrm) + { + mfi = mf->getFrameInfo(); + mri = &mf->getRegInfo(); + tii = mf->getTarget().getInstrInfo(); + } + + /// Insert a store of the given vreg to the given stack slot immediately + /// after the given instruction. Returns the base index of the inserted + /// instruction. The caller is responsible for adding an appropriate + /// LiveInterval to the LiveIntervals analysis. + unsigned insertStoreFor(MachineInstr *mi, unsigned ss, + unsigned newVReg, + const TargetRegisterClass *trc) { + MachineBasicBlock::iterator nextInstItr(mi); + ++nextInstItr; + + if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) { + lis->scaleNumbering(2); + ls->scaleNumbering(2); + } + + unsigned miIdx = lis->getInstructionIndex(mi); + + assert(lis->hasGapAfterInstr(miIdx)); + + tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, newVReg, + true, ss, trc); + MachineBasicBlock::iterator storeInstItr(mi); + ++storeInstItr; + MachineInstr *storeInst = &*storeInstItr; + unsigned storeInstIdx = miIdx + LiveInterval::InstrSlots::NUM; + + assert(lis->getInstructionFromIndex(storeInstIdx) == 0 && + "Store inst index already in use."); + + lis->InsertMachineInstrInMaps(storeInst, storeInstIdx); + + return storeInstIdx; + } + + /// Insert a load of the given veg from the given stack slot immediately + /// before the given instruction. Returns the base index of the inserted + /// instruction. The caller is responsible for adding an appropriate + /// LiveInterval to the LiveIntervals analysis. + unsigned insertLoadFor(MachineInstr *mi, unsigned ss, + unsigned newVReg, + const TargetRegisterClass *trc) { + MachineBasicBlock::iterator useInstItr(mi); + + if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) { + lis->scaleNumbering(2); + ls->scaleNumbering(2); + } + + unsigned miIdx = lis->getInstructionIndex(mi); + + assert(lis->hasGapBeforeInstr(miIdx)); + + tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, newVReg, ss, trc); + MachineBasicBlock::iterator loadInstItr(mi); + --loadInstItr; + MachineInstr *loadInst = &*loadInstItr; + unsigned loadInstIdx = miIdx - LiveInterval::InstrSlots::NUM; + + assert(lis->getInstructionFromIndex(loadInstIdx) == 0 && + "Load inst index already in use."); + + lis->InsertMachineInstrInMaps(loadInst, loadInstIdx); + + return loadInstIdx; + } + + + /// Add spill ranges for every use/def of the live interval, inserting loads + /// immediately before each use, and stores after each def. No folding is + /// attempted. + std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) { + DOUT << "Spilling everywhere " << *li << "\n"; + + assert(li->weight != HUGE_VALF && + "Attempting to spill already spilled value."); + + assert(!li->isStackSlot() && + "Trying to spill a stack slot."); + + std::vector<LiveInterval*> added; + + const TargetRegisterClass *trc = mri->getRegClass(li->reg); + unsigned ss = vrm->assignVirt2StackSlot(li->reg); + + for (MachineRegisterInfo::reg_iterator + regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) { + + MachineInstr *mi = &*regItr; + do { + ++regItr; + } while (regItr != mri->reg_end() && (&*regItr == mi)); + + SmallVector<unsigned, 2> indices; + bool hasUse = false; + bool hasDef = false; + + for (unsigned i = 0; i != mi->getNumOperands(); ++i) { + MachineOperand &op = mi->getOperand(i); + + if (!op.isReg() || op.getReg() != li->reg) + continue; + + hasUse |= mi->getOperand(i).isUse(); + hasDef |= mi->getOperand(i).isDef(); + + indices.push_back(i); + } + + unsigned newVReg = mri->createVirtualRegister(trc); + vrm->grow(); + vrm->assignVirt2StackSlot(newVReg, ss); + + LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); + newLI->weight = HUGE_VALF; + + for (unsigned i = 0; i < indices.size(); ++i) { + mi->getOperand(indices[i]).setReg(newVReg); + + if (mi->getOperand(indices[i]).isUse()) { + mi->getOperand(indices[i]).setIsKill(true); + } + } + + assert(hasUse || hasDef); + + if (hasUse) { + unsigned loadInstIdx = insertLoadFor(mi, ss, newVReg, trc); + unsigned start = lis->getDefIndex(loadInstIdx), + end = lis->getUseIndex(lis->getInstructionIndex(mi)); + + VNInfo *vni = + newLI->getNextValue(loadInstIdx, 0, lis->getVNInfoAllocator()); + vni->kills.push_back(lis->getInstructionIndex(mi)); + LiveRange lr(start, end, vni); + + newLI->addRange(lr); + } + + if (hasDef) { + unsigned storeInstIdx = insertStoreFor(mi, ss, newVReg, trc); + unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)), + end = lis->getUseIndex(storeInstIdx); + + VNInfo *vni = + newLI->getNextValue(storeInstIdx, 0, lis->getVNInfoAllocator()); + vni->kills.push_back(storeInstIdx); + LiveRange lr(start, end, vni); + + newLI->addRange(lr); + } + + added.push_back(newLI); + } + + + return added; + } + +}; + + +/// Spills any live range using the spill-everywhere method with no attempt at +/// folding. +class TrivialSpiller : public SpillerBase { +public: + TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) : + SpillerBase(mf, lis, ls, vrm) {} + + std::vector<LiveInterval*> spill(LiveInterval *li) { + return trivialSpillEverywhere(li); + } + +}; + +} + +llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, + LiveStacks *ls, VirtRegMap *vrm) { + return new TrivialSpiller(mf, lis, ls, vrm); +} diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h new file mode 100644 index 0000000..cad054d --- /dev/null +++ b/lib/CodeGen/Spiller.h @@ -0,0 +1,37 @@ +//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SPILLER_H +#define LLVM_CODEGEN_SPILLER_H + +#include <vector> + +namespace llvm { + class LiveInterval; + class LiveIntervals; + class LiveStacks; + class MachineFunction; + class VirtRegMap; + + /// Spiller interface. + /// + /// Implementations are utility classes which insert spill or remat code on + /// demand. + class Spiller { + public: + virtual ~Spiller() = 0; + virtual std::vector<LiveInterval*> spill(LiveInterval *li) = 0; + }; + + /// Create and return a spiller object, as specified on the command line. + Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li, + LiveStacks *ls, VirtRegMap *vrm); +} + +#endif diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp new file mode 100644 index 0000000..c179f1e --- /dev/null +++ b/lib/CodeGen/StackProtector.cpp @@ -0,0 +1,224 @@ +//===-- StackProtector.cpp - Stack Protector Insertion --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass inserts stack protectors into functions which need them. A variable +// with a random value in it is stored onto the stack before the local variables +// are allocated. Upon exiting the block, the stored value is checked. If it's +// changed, then there was some sort of violation and the program aborts. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stack-protector" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Attributes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +// SSPBufferSize - The lower bound for a buffer to be considered for stack +// smashing protection. +static cl::opt<unsigned> +SSPBufferSize("stack-protector-buffer-size", cl::init(8), + cl::desc("Lower bound for a buffer to be considered for " + "stack protection")); + +namespace { + class VISIBILITY_HIDDEN StackProtector : public FunctionPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// target type sizes. + const TargetLowering *TLI; + + Function *F; + Module *M; + + /// InsertStackProtectors - Insert code into the prologue and epilogue of + /// the function. + /// + /// - The prologue code loads and stores the stack guard onto the stack. + /// - The epilogue checks the value stored in the prologue against the + /// original value. It calls __stack_chk_fail if they differ. + bool InsertStackProtectors(); + + /// CreateFailBB - Create a basic block to jump to when the stack protector + /// check fails. + BasicBlock *CreateFailBB(); + + /// RequiresStackProtector - Check whether or not this function needs a + /// stack protector based upon the stack protector level. + bool RequiresStackProtector() const; + public: + static char ID; // Pass identification, replacement for typeid. + StackProtector() : FunctionPass(&ID), TLI(0) {} + StackProtector(const TargetLowering *tli) + : FunctionPass(&ID), TLI(tli) {} + + virtual bool runOnFunction(Function &Fn); + }; +} // end anonymous namespace + +char StackProtector::ID = 0; +static RegisterPass<StackProtector> +X("stack-protector", "Insert stack protectors"); + +FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) { + return new StackProtector(tli); +} + +bool StackProtector::runOnFunction(Function &Fn) { + F = &Fn; + M = F->getParent(); + + if (!RequiresStackProtector()) return false; + + return InsertStackProtectors(); +} + +/// RequiresStackProtector - Check whether or not this function needs a stack +/// protector based upon the stack protector level. The heuristic we use is to +/// add a guard variable to functions that call alloca, and functions with +/// buffers larger than SSPBufferSize bytes. +bool StackProtector::RequiresStackProtector() const { + if (F->hasFnAttr(Attribute::StackProtectReq)) + return true; + + if (!F->hasFnAttr(Attribute::StackProtect)) + return false; + + const TargetData *TD = TLI->getTargetData(); + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + BasicBlock *BB = I; + + for (BasicBlock::iterator + II = BB->begin(), IE = BB->end(); II != IE; ++II) + if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (AI->isArrayAllocation()) + // This is a call to alloca with a variable size. Emit stack + // protectors. + return true; + + if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) + // If an array has more than SSPBufferSize bytes of allocated space, + // then we emit stack protectors. + if (SSPBufferSize <= TD->getTypeAllocSize(AT)) + return true; + } + } + + return false; +} + +/// InsertStackProtectors - Insert code into the prologue and epilogue of the +/// function. +/// +/// - The prologue code loads and stores the stack guard onto the stack. +/// - The epilogue checks the value stored in the prologue against the original +/// value. It calls __stack_chk_fail if they differ. +bool StackProtector::InsertStackProtectors() { + BasicBlock *FailBB = 0; // The basic block to jump to if check fails. + AllocaInst *AI = 0; // Place on stack that stores the stack guard. + Constant *StackGuardVar = 0; // The stack guard variable. + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { + BasicBlock *BB = I++; + + ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); + if (!RI) continue; + + if (!FailBB) { + // Insert code into the entry block that stores the __stack_chk_guard + // variable onto the stack: + // + // entry: + // StackGuardSlot = alloca i8* + // StackGuard = load __stack_chk_guard + // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) + // + PointerType *PtrTy = PointerType::getUnqual(Type::Int8Ty); + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + + BasicBlock &Entry = F->getEntryBlock(); + Instruction *InsPt = &Entry.front(); + + AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt); + LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt); + + Value *Args[] = { LI, AI }; + CallInst:: + Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), + &Args[0], array_endof(Args), "", InsPt); + + // Create the basic block to jump to when the guard check fails. + FailBB = CreateFailBB(); + } + + // For each block with a return instruction, convert this: + // + // return: + // ... + // ret ... + // + // into this: + // + // return: + // ... + // %1 = load __stack_chk_guard + // %2 = load StackGuardSlot + // %3 = cmp i1 %1, %2 + // br i1 %3, label %SP_return, label %CallStackCheckFailBlk + // + // SP_return: + // ret ... + // + // CallStackCheckFailBlk: + // call void @__stack_chk_fail() + // unreachable + + // Split the basic block before the return instruction. + BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + + // Remove default branch instruction to the new BB. + BB->getTerminator()->eraseFromParent(); + + // Move the newly created basic block to the point right after the old basic + // block so that it's in the "fall through" position. + NewBB->moveAfter(BB); + + // Generate the stack protector instructions in the old basic block. + LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB); + LoadInst *LI2 = new LoadInst(AI, "", true, BB); + ICmpInst *Cmp = new ICmpInst(CmpInst::ICMP_EQ, LI1, LI2, "", BB); + BranchInst::Create(NewBB, FailBB, Cmp, BB); + } + + // Return if we didn't modify any basic blocks. I.e., there are no return + // statements in the function. + if (!FailBB) return false; + + return true; +} + +/// CreateFailBB - Create a basic block to jump to when the stack protector +/// check fails. +BasicBlock *StackProtector::CreateFailBB() { + BasicBlock *FailBB = BasicBlock::Create("CallStackCheckFailBlk", F); + Constant *StackChkFail = + M->getOrInsertFunction("__stack_chk_fail", Type::VoidTy, NULL); + CallInst::Create(StackChkFail, "", FailBB); + new UnreachableInst(FailBB); + return FailBB; +} diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp new file mode 100644 index 0000000..5824644 --- /dev/null +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -0,0 +1,733 @@ +//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the stack slot coloring pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackcoloring" +#include "VirtRegMap.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include <vector> +using namespace llvm; + +static cl::opt<bool> +DisableSharing("no-stack-slot-sharing", + cl::init(false), cl::Hidden, + cl::desc("Suppress slot sharing during stack coloring")); + +static cl::opt<bool> +ColorWithRegsOpt("color-ss-with-regs", + cl::init(false), cl::Hidden, + cl::desc("Color stack slots with free registers")); + + +static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden); + +STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring"); +STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs"); +STATISTIC(NumLoadElim, "Number of loads eliminated"); +STATISTIC(NumStoreElim, "Number of stores eliminated"); +STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); + +namespace { + class VISIBILITY_HIDDEN StackSlotColoring : public MachineFunctionPass { + bool ColorWithRegs; + LiveStacks* LS; + VirtRegMap* VRM; + MachineFrameInfo *MFI; + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MachineLoopInfo *loopInfo; + + // SSIntervals - Spill slot intervals. + std::vector<LiveInterval*> SSIntervals; + + // SSRefs - Keep a list of frame index references for each spill slot. + SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs; + + // OrigAlignments - Alignments of stack objects before coloring. + SmallVector<unsigned, 16> OrigAlignments; + + // OrigSizes - Sizess of stack objects before coloring. + SmallVector<unsigned, 16> OrigSizes; + + // AllColors - If index is set, it's a spill slot, i.e. color. + // FIXME: This assumes PEI locate spill slot with smaller indices + // closest to stack pointer / frame pointer. Therefore, smaller + // index == better color. + BitVector AllColors; + + // NextColor - Next "color" that's not yet used. + int NextColor; + + // UsedColors - "Colors" that have been assigned. + BitVector UsedColors; + + // Assignments - Color to intervals mapping. + SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments; + + public: + static char ID; // Pass identification + StackSlotColoring() : + MachineFunctionPass(&ID), ColorWithRegs(false), NextColor(-1) {} + StackSlotColoring(bool RegColor) : + MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveStacks>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<VirtRegMap>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char* getPassName() const { + return "Stack Slot Coloring"; + } + + private: + void InitializeSlots(); + void ScanForSpillSlotRefs(MachineFunction &MF); + bool OverlapWithAssignments(LiveInterval *li, int Color) const; + int ColorSlot(LiveInterval *li); + bool ColorSlots(MachineFunction &MF); + bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, + SmallVector<SmallVector<int, 4>, 16> &RevMap, + BitVector &SlotIsReg); + void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, + MachineFunction &MF); + bool PropagateBackward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg); + bool PropagateForward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg); + void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, + unsigned Reg, const TargetRegisterClass *RC, + SmallSet<unsigned, 4> &Defs, + MachineFunction &MF); + bool AllMemRefsCanBeUnfolded(int SS); + bool RemoveDeadStores(MachineBasicBlock* MBB); + }; +} // end anonymous namespace + +char StackSlotColoring::ID = 0; + +static RegisterPass<StackSlotColoring> +X("stack-slot-coloring", "Stack Slot Coloring"); + +FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) { + return new StackSlotColoring(RegColor); +} + +namespace { + // IntervalSorter - Comparison predicate that sort live intervals by + // their weight. + struct IntervalSorter { + bool operator()(LiveInterval* LHS, LiveInterval* RHS) const { + return LHS->weight > RHS->weight; + } + }; +} + +/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot +/// references and update spill slot weights. +void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { + SSRefs.resize(MFI->getObjectIndexEnd()); + + // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = &*MBBI; + unsigned loopDepth = loopInfo->getLoopDepth(MBB); + for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); + MII != EE; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (FI < 0) + continue; + if (!LS->hasInterval(FI)) + continue; + LiveInterval &li = LS->getInterval(FI); + li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); + SSRefs[FI].push_back(MI); + } + } + } +} + +/// InitializeSlots - Process all spill stack slot liveintervals and add them +/// to a sorted (by weight) list. +void StackSlotColoring::InitializeSlots() { + int LastFI = MFI->getObjectIndexEnd(); + OrigAlignments.resize(LastFI); + OrigSizes.resize(LastFI); + AllColors.resize(LastFI); + UsedColors.resize(LastFI); + Assignments.resize(LastFI); + + // Gather all spill slots into a list. + DOUT << "Spill slot intervals:\n"; + for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { + LiveInterval &li = i->second; + DEBUG(li.dump()); + int FI = li.getStackSlotIndex(); + if (MFI->isDeadObjectIndex(FI)) + continue; + SSIntervals.push_back(&li); + OrigAlignments[FI] = MFI->getObjectAlignment(FI); + OrigSizes[FI] = MFI->getObjectSize(FI); + AllColors.set(FI); + } + DOUT << '\n'; + + // Sort them by weight. + std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + + // Get first "color". + NextColor = AllColors.find_first(); +} + +/// OverlapWithAssignments - Return true if LiveInterval overlaps with any +/// LiveIntervals that have already been assigned to the specified color. +bool +StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { + const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color]; + for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) { + LiveInterval *OtherLI = OtherLIs[i]; + if (OtherLI->overlaps(*li)) + return true; + } + return false; +} + +/// ColorSlotsWithFreeRegs - If there are any free registers available, try +/// replacing spill slots references with registers instead. +bool +StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, + SmallVector<SmallVector<int, 4>, 16> &RevMap, + BitVector &SlotIsReg) { + if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters()) + return false; + + bool Changed = false; + DOUT << "Assigning unused registers to spill slots:\n"; + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = li->getStackSlotIndex(); + if (!UsedColors[SS] || li->weight < 20) + // If the weight is < 20, i.e. two references in a loop with depth 1, + // don't bother with it. + continue; + + // These slots allow to share the same registers. + bool AllColored = true; + SmallVector<unsigned, 4> ColoredRegs; + for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) { + int RSS = RevMap[SS][j]; + const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS); + // If it's not colored to another stack slot, try coloring it + // to a "free" register. + if (!RC) { + AllColored = false; + continue; + } + unsigned Reg = VRM->getFirstUnusedRegister(RC); + if (!Reg) { + AllColored = false; + continue; + } + if (!AllMemRefsCanBeUnfolded(RSS)) { + AllColored = false; + continue; + } else { + DOUT << "Assigning fi#" << RSS << " to " << TRI->getName(Reg) << '\n'; + ColoredRegs.push_back(Reg); + SlotMapping[RSS] = Reg; + SlotIsReg.set(RSS); + Changed = true; + } + } + + // Register and its sub-registers are no longer free. + while (!ColoredRegs.empty()) { + unsigned Reg = ColoredRegs.back(); + ColoredRegs.pop_back(); + VRM->setRegisterUsed(Reg); + // If reg is a callee-saved register, it will have to be spilled in + // the prologue. + MRI->setPhysRegUsed(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + VRM->setRegisterUsed(*AS); + MRI->setPhysRegUsed(*AS); + } + } + // This spill slot is dead after the rewrites + if (AllColored) { + MFI->RemoveStackObject(SS); + ++NumEliminated; + } + } + DOUT << '\n'; + + return Changed; +} + +/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot. +/// +int StackSlotColoring::ColorSlot(LiveInterval *li) { + int Color = -1; + bool Share = false; + if (!DisableSharing) { + // Check if it's possible to reuse any of the used colors. + Color = UsedColors.find_first(); + while (Color != -1) { + if (!OverlapWithAssignments(li, Color)) { + Share = true; + ++NumEliminated; + break; + } + Color = UsedColors.find_next(Color); + } + } + + // Assign it to the first available color (assumed to be the best) if it's + // not possible to share a used color with other objects. + if (!Share) { + assert(NextColor != -1 && "No more spill slots?"); + Color = NextColor; + UsedColors.set(Color); + NextColor = AllColors.find_next(NextColor); + } + + // Record the assignment. + Assignments[Color].push_back(li); + int FI = li->getStackSlotIndex(); + DOUT << "Assigning fi#" << FI << " to fi#" << Color << "\n"; + + // Change size and alignment of the allocated slot. If there are multiple + // objects sharing the same slot, then make sure the size and alignment + // are large enough for all. + unsigned Align = OrigAlignments[FI]; + if (!Share || Align > MFI->getObjectAlignment(Color)) + MFI->setObjectAlignment(Color, Align); + int64_t Size = OrigSizes[FI]; + if (!Share || Size > MFI->getObjectSize(Color)) + MFI->setObjectSize(Color, Size); + return Color; +} + +/// Colorslots - Color all spill stack slots and rewrite all frameindex machine +/// operands in the function. +bool StackSlotColoring::ColorSlots(MachineFunction &MF) { + unsigned NumObjs = MFI->getObjectIndexEnd(); + SmallVector<int, 16> SlotMapping(NumObjs, -1); + SmallVector<float, 16> SlotWeights(NumObjs, 0.0); + SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs); + BitVector SlotIsReg(NumObjs); + BitVector UsedColors(NumObjs); + + DOUT << "Color spill slot intervals:\n"; + bool Changed = false; + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = li->getStackSlotIndex(); + int NewSS = ColorSlot(li); + assert(NewSS >= 0 && "Stack coloring failed?"); + SlotMapping[SS] = NewSS; + RevMap[NewSS].push_back(SS); + SlotWeights[NewSS] += li->weight; + UsedColors.set(NewSS); + Changed |= (SS != NewSS); + } + + DOUT << "\nSpill slots after coloring:\n"; + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = li->getStackSlotIndex(); + li->weight = SlotWeights[SS]; + } + // Sort them by new weight. + std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + +#ifndef NDEBUG + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) + DEBUG(SSIntervals[i]->dump()); + DOUT << '\n'; +#endif + + // Can we "color" a stack slot with a unused register? + Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg); + + if (!Changed) + return false; + + // Rewrite all MO_FrameIndex operands. + SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs()); + for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { + bool isReg = SlotIsReg[SS]; + int NewFI = SlotMapping[SS]; + if (NewFI == -1 || (NewFI == (int)SS && !isReg)) + continue; + + const TargetRegisterClass *RC = LS->getIntervalRegClass(SS); + SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; + for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) + if (!isReg) + RewriteInstruction(RefMIs[i], SS, NewFI, MF); + else { + // Rewrite to use a register instead. + unsigned MBBId = RefMIs[i]->getParent()->getNumber(); + SmallSet<unsigned, 4> &Defs = NewDefs[MBBId]; + UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF); + } + } + + // Delete unused stack slots. + while (NextColor != -1) { + DOUT << "Removing unused stack object fi#" << NextColor << "\n"; + MFI->RemoveStackObject(NextColor); + NextColor = AllColors.find_next(NextColor); + } + + return true; +} + +/// AllMemRefsCanBeUnfolded - Return true if all references of the specified +/// spill slot index can be unfolded. +bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) { + SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; + for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) { + MachineInstr *MI = RefMIs[i]; + if (TII->isLoadFromStackSlot(MI, SS) || + TII->isStoreToStackSlot(MI, SS)) + // Restore and spill will become copies. + return true; + if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false)) + return false; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = MI->getOperand(j); + if (MO.isFI() && MO.getIndex() != SS) + // If it uses another frameindex, we can, currently* unfold it. + return false; + } + } + return true; +} + +/// RewriteInstruction - Rewrite specified instruction by replacing references +/// to old frame index with new one. +void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, + int NewFI, MachineFunction &MF) { + for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (FI != OldFI) + continue; + MO.setIndex(NewFI); + } + + // Update the MachineMemOperand for the new memory location. + // FIXME: We need a better method of managing these too. + SmallVector<MachineMemOperand, 2> MMOs(MI->memoperands_begin(), + MI->memoperands_end()); + MI->clearMemOperands(MF); + const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI); + for (unsigned i = 0, ee = MMOs.size(); i != ee; ++i) { + if (MMOs[i].getValue() != OldSV) + MI->addMemOperand(MF, MMOs[i]); + else { + MachineMemOperand MMO(PseudoSourceValue::getFixedStack(NewFI), + MMOs[i].getFlags(), MMOs[i].getOffset(), + MMOs[i].getSize(), MMOs[i].getAlignment()); + MI->addMemOperand(MF, MMO); + } + } +} + +/// PropagateBackward - Traverse backward and look for the definition of +/// OldReg. If it can successfully update all of the references with NewReg, +/// do so and return true. +bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg) { + if (MII == MBB->begin()) + return false; + + SmallVector<MachineOperand*, 4> Uses; + SmallVector<MachineOperand*, 4> Refs; + while (--MII != MBB->begin()) { + bool FoundDef = false; // Not counting 2address def. + + Uses.clear(); + const TargetInstrDesc &TID = MII->getDesc(); + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (Reg == OldReg) { + if (MO.isImplicit()) + return false; + const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i); + if (RC && !RC->contains(NewReg)) + return false; + + if (MO.isUse()) { + Uses.push_back(&MO); + } else { + Refs.push_back(&MO); + if (!MII->isRegTiedToUseOperand(i)) + FoundDef = true; + } + } else if (TRI->regsOverlap(Reg, NewReg)) { + return false; + } else if (TRI->regsOverlap(Reg, OldReg)) { + if (!MO.isUse() || !MO.isKill()) + return false; + } + } + + if (FoundDef) { + // Found non-two-address def. Stop here. + for (unsigned i = 0, e = Refs.size(); i != e; ++i) + Refs[i]->setReg(NewReg); + return true; + } + + // Two-address uses must be updated as well. + for (unsigned i = 0, e = Uses.size(); i != e; ++i) + Refs.push_back(Uses[i]); + } + return false; +} + +/// PropagateForward - Traverse forward and look for the kill of OldReg. If +/// it can successfully update all of the uses with NewReg, do so and +/// return true. +bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, + MachineBasicBlock *MBB, + unsigned OldReg, unsigned NewReg) { + if (MII == MBB->end()) + return false; + + SmallVector<MachineOperand*, 4> Uses; + while (++MII != MBB->end()) { + bool FoundUse = false; + bool FoundKill = false; + const TargetInstrDesc &TID = MII->getDesc(); + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (Reg == OldReg) { + if (MO.isDef() || MO.isImplicit()) + return false; + + const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i); + if (RC && !RC->contains(NewReg)) + return false; + FoundUse = true; + if (MO.isKill()) + FoundKill = true; + Uses.push_back(&MO); + } else if (TRI->regsOverlap(Reg, NewReg) || + TRI->regsOverlap(Reg, OldReg)) + return false; + } + if (FoundKill) { + for (unsigned i = 0, e = Uses.size(); i != e; ++i) + Uses[i]->setReg(NewReg); + return true; + } + } + return false; +} + +/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding +/// folded memory references and replacing those references with register +/// references instead. +void +StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, + unsigned Reg, + const TargetRegisterClass *RC, + SmallSet<unsigned, 4> &Defs, + MachineFunction &MF) { + MachineBasicBlock *MBB = MI->getParent(); + if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) { + if (PropagateForward(MI, MBB, DstReg, Reg)) { + DOUT << "Eliminated load: "; + DEBUG(MI->dump()); + ++NumLoadElim; + } else { + TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC); + ++NumRegRepl; + } + + if (!Defs.count(Reg)) { + // If this is the first use of Reg in this MBB and it wasn't previously + // defined in MBB, add it to livein. + MBB->addLiveIn(Reg); + Defs.insert(Reg); + } + } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) { + if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) { + DOUT << "Eliminated store: "; + DEBUG(MI->dump()); + ++NumStoreElim; + } else { + TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC); + ++NumRegRepl; + } + + // Remember reg has been defined in MBB. + Defs.insert(Reg); + } else { + SmallVector<MachineInstr*, 4> NewMIs; + bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs); + Success = Success; // Silence compiler warning. + assert(Success && "Failed to unfold!"); + MachineInstr *NewMI = NewMIs[0]; + MBB->insert(MI, NewMI); + ++NumRegRepl; + + if (NewMI->readsRegister(Reg)) { + if (!Defs.count(Reg)) + // If this is the first use of Reg in this MBB and it wasn't previously + // defined in MBB, add it to livein. + MBB->addLiveIn(Reg); + Defs.insert(Reg); + } + } + MBB->erase(MI); +} + +/// RemoveDeadStores - Scan through a basic block and look for loads followed +/// by stores. If they're both using the same stack slot, then the store is +/// definitely dead. This could obviously be much more aggressive (consider +/// pairs with instructions between them), but such extensions might have a +/// considerable compile time impact. +bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { + // FIXME: This could be much more aggressive, but we need to investigate + // the compile time impact of doing so. + bool changed = false; + + SmallVector<MachineInstr*, 4> toErase; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (DCELimit != -1 && (int)NumDead >= DCELimit) + break; + + MachineBasicBlock::iterator NextMI = next(I); + if (NextMI == MBB->end()) continue; + + int FirstSS, SecondSS; + unsigned LoadReg = 0; + unsigned StoreReg = 0; + if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; + if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue; + if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; + + ++NumDead; + changed = true; + + if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) { + ++NumDead; + toErase.push_back(I); + } + + toErase.push_back(NextMI); + ++I; + } + + for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(), + E = toErase.end(); I != E; ++I) + (*I)->eraseFromParent(); + + return changed; +} + + +bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { + DOUT << "********** Stack Slot Coloring **********\n"; + + MFI = MF.getFrameInfo(); + MRI = &MF.getRegInfo(); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + LS = &getAnalysis<LiveStacks>(); + VRM = &getAnalysis<VirtRegMap>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + bool Changed = false; + + unsigned NumSlots = LS->getNumIntervals(); + if (NumSlots < 2) { + if (NumSlots == 0 || !VRM->HasUnusedRegisters()) + // Nothing to do! + return false; + } + + // Gather spill slot references + ScanForSpillSlotRefs(MF); + InitializeSlots(); + Changed = ColorSlots(MF); + + NextColor = -1; + SSIntervals.clear(); + for (unsigned i = 0, e = SSRefs.size(); i != e; ++i) + SSRefs[i].clear(); + SSRefs.clear(); + OrigAlignments.clear(); + OrigSizes.clear(); + AllColors.clear(); + UsedColors.clear(); + for (unsigned i = 0, e = Assignments.size(); i != e; ++i) + Assignments[i].clear(); + Assignments.clear(); + + if (Changed) { + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= RemoveDeadStores(I); + } + + return Changed; +} diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp new file mode 100644 index 0000000..a2c1255 --- /dev/null +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -0,0 +1,1053 @@ +//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates machine instruction PHI nodes by inserting copy +// instructions, using an intelligent copy-folding technique based on +// dominator information. This is technique is derived from: +// +// Budimlic, et al. Fast copy coalescing and live-range identification. +// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language +// Design and Implementation (Berlin, Germany, June 17 - 19, 2002). +// PLDI '02. ACM, New York, NY, 25-32. +// DOI= http://doi.acm.org/10.1145/512529.512534 +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "strongphielim" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN StrongPHIElimination : public MachineFunctionPass { + static char ID; // Pass identification, replacement for typeid + StrongPHIElimination() : MachineFunctionPass(&ID) {} + + // Waiting stores, for each MBB, the set of copies that need to + // be inserted into that MBB + DenseMap<MachineBasicBlock*, + std::multimap<unsigned, unsigned> > Waiting; + + // Stacks holds the renaming stack for each register + std::map<unsigned, std::vector<unsigned> > Stacks; + + // Registers in UsedByAnother are PHI nodes that are themselves + // used as operands to another another PHI node + std::set<unsigned> UsedByAnother; + + // RenameSets are the is a map from a PHI-defined register + // to the input registers to be coalesced along with the + // predecessor block for those input registers. + std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets; + + // PhiValueNumber holds the ID numbers of the VNs for each phi that we're + // eliminating, indexed by the register defined by that phi. + std::map<unsigned, unsigned> PhiValueNumber; + + // Store the DFS-in number of each block + DenseMap<MachineBasicBlock*, unsigned> preorder; + + // Store the DFS-out number of each block + DenseMap<MachineBasicBlock*, unsigned> maxpreorder; + + bool runOnMachineFunction(MachineFunction &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<LiveIntervals>(); + + // TODO: Actually make this true. + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<RegisterCoalescer>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual void releaseMemory() { + preorder.clear(); + maxpreorder.clear(); + + Waiting.clear(); + Stacks.clear(); + UsedByAnother.clear(); + RenameSets.clear(); + } + + private: + + /// DomForestNode - Represents a node in the "dominator forest". This is + /// a forest in which the nodes represent registers and the edges + /// represent a dominance relation in the block defining those registers. + struct DomForestNode { + private: + // Store references to our children + std::vector<DomForestNode*> children; + // The register we represent + unsigned reg; + + // Add another node as our child + void addChild(DomForestNode* DFN) { children.push_back(DFN); } + + public: + typedef std::vector<DomForestNode*>::iterator iterator; + + // Create a DomForestNode by providing the register it represents, and + // the node to be its parent. The virtual root node has register 0 + // and a null parent. + DomForestNode(unsigned r, DomForestNode* parent) : reg(r) { + if (parent) + parent->addChild(this); + } + + ~DomForestNode() { + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; + } + + /// getReg - Return the regiser that this node represents + inline unsigned getReg() { return reg; } + + // Provide iterator access to our children + inline DomForestNode::iterator begin() { return children.begin(); } + inline DomForestNode::iterator end() { return children.end(); } + }; + + void computeDFS(MachineFunction& MF); + void processBlock(MachineBasicBlock* MBB); + + std::vector<DomForestNode*> computeDomForest( + std::map<unsigned, MachineBasicBlock*>& instrs, + MachineRegisterInfo& MRI); + void processPHIUnion(MachineInstr* Inst, + std::map<unsigned, MachineBasicBlock*>& PHIUnion, + std::vector<StrongPHIElimination::DomForestNode*>& DF, + std::vector<std::pair<unsigned, unsigned> >& locals); + void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed); + void InsertCopies(MachineDomTreeNode* MBB, + SmallPtrSet<MachineBasicBlock*, 16>& v); + bool mergeLiveIntervals(unsigned primary, unsigned secondary); + }; +} + +char StrongPHIElimination::ID = 0; +static RegisterPass<StrongPHIElimination> +X("strong-phi-node-elimination", + "Eliminate PHI nodes for register allocation, intelligently"); + +const PassInfo *const llvm::StrongPHIEliminationID = &X; + +/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree +/// of the given MachineFunction. These numbers are then used in other parts +/// of the PHI elimination process. +void StrongPHIElimination::computeDFS(MachineFunction& MF) { + SmallPtrSet<MachineDomTreeNode*, 8> frontier; + SmallPtrSet<MachineDomTreeNode*, 8> visited; + + unsigned time = 0; + + MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>(); + + MachineDomTreeNode* node = DT.getRootNode(); + + std::vector<MachineDomTreeNode*> worklist; + worklist.push_back(node); + + while (!worklist.empty()) { + MachineDomTreeNode* currNode = worklist.back(); + + if (!frontier.count(currNode)) { + frontier.insert(currNode); + ++time; + preorder.insert(std::make_pair(currNode->getBlock(), time)); + } + + bool inserted = false; + for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end(); + I != E; ++I) + if (!frontier.count(*I) && !visited.count(*I)) { + worklist.push_back(*I); + inserted = true; + break; + } + + if (!inserted) { + frontier.erase(currNode); + visited.insert(currNode); + maxpreorder.insert(std::make_pair(currNode->getBlock(), time)); + + worklist.pop_back(); + } + } +} + +namespace { + +/// PreorderSorter - a helper class that is used to sort registers +/// according to the preorder number of their defining blocks +class PreorderSorter { +private: + DenseMap<MachineBasicBlock*, unsigned>& preorder; + MachineRegisterInfo& MRI; + +public: + PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p, + MachineRegisterInfo& M) : preorder(p), MRI(M) { } + + bool operator()(unsigned A, unsigned B) { + if (A == B) + return false; + + MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent(); + MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent(); + + if (preorder[ABlock] < preorder[BBlock]) + return true; + else if (preorder[ABlock] > preorder[BBlock]) + return false; + + return false; + } +}; + +} + +/// computeDomForest - compute the subforest of the DomTree corresponding +/// to the defining blocks of the registers in question +std::vector<StrongPHIElimination::DomForestNode*> +StrongPHIElimination::computeDomForest( + std::map<unsigned, MachineBasicBlock*>& regs, + MachineRegisterInfo& MRI) { + // Begin by creating a virtual root node, since the actual results + // may well be a forest. Assume this node has maximum DFS-out number. + DomForestNode* VirtualRoot = new DomForestNode(0, 0); + maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL)); + + // Populate a worklist with the registers + std::vector<unsigned> worklist; + worklist.reserve(regs.size()); + for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(), + E = regs.end(); I != E; ++I) + worklist.push_back(I->first); + + // Sort the registers by the DFS-in number of their defining block + PreorderSorter PS(preorder, MRI); + std::sort(worklist.begin(), worklist.end(), PS); + + // Create a "current parent" stack, and put the virtual root on top of it + DomForestNode* CurrentParent = VirtualRoot; + std::vector<DomForestNode*> stack; + stack.push_back(VirtualRoot); + + // Iterate over all the registers in the previously computed order + for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end(); + I != E; ++I) { + unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()]; + MachineBasicBlock* parentBlock = CurrentParent->getReg() ? + MRI.getVRegDef(CurrentParent->getReg())->getParent() : + 0; + + // If the DFS-in number of the register is greater than the DFS-out number + // of the current parent, repeatedly pop the parent stack until it isn't. + while (pre > maxpreorder[parentBlock]) { + stack.pop_back(); + CurrentParent = stack.back(); + + parentBlock = CurrentParent->getReg() ? + MRI.getVRegDef(CurrentParent->getReg())->getParent() : + 0; + } + + // Now that we've found the appropriate parent, create a DomForestNode for + // this register and attach it to the forest + DomForestNode* child = new DomForestNode(*I, CurrentParent); + + // Push this new node on the "current parent" stack + stack.push_back(child); + CurrentParent = child; + } + + // Return a vector containing the children of the virtual root node + std::vector<DomForestNode*> ret; + ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end()); + return ret; +} + +/// isLiveIn - helper method that determines, from a regno, if a register +/// is live into a block +static bool isLiveIn(unsigned r, MachineBasicBlock* MBB, + LiveIntervals& LI) { + LiveInterval& I = LI.getOrCreateInterval(r); + unsigned idx = LI.getMBBStartIdx(MBB); + return I.liveAt(idx); +} + +/// isLiveOut - help method that determines, from a regno, if a register is +/// live out of a block. +static bool isLiveOut(unsigned r, MachineBasicBlock* MBB, + LiveIntervals& LI) { + for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), + E = MBB->succ_end(); PI != E; ++PI) + if (isLiveIn(r, *PI, LI)) + return true; + + return false; +} + +/// interferes - checks for local interferences by scanning a block. The only +/// trick parameter is 'mode' which tells it the relationship of the two +/// registers. 0 - defined in the same block, 1 - first properly dominates +/// second, 2 - second properly dominates first +static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan, + LiveIntervals& LV, unsigned mode) { + MachineInstr* def = 0; + MachineInstr* kill = 0; + + // The code is still in SSA form at this point, so there is only one + // definition per VReg. Thus we can safely use MRI->getVRegDef(). + const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo(); + + bool interference = false; + + // Wallk the block, checking for interferences + for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end(); + MBI != MBE; ++MBI) { + MachineInstr* curr = MBI; + + // Same defining block... + if (mode == 0) { + if (curr == MRI->getVRegDef(a)) { + // If we find our first definition, save it + if (!def) { + def = curr; + // If there's already an unkilled definition, then + // this is an interference + } else if (!kill) { + interference = true; + break; + // If there's a definition followed by a KillInst, then + // they can't interfere + } else { + interference = false; + break; + } + // Symmetric with the above + } else if (curr == MRI->getVRegDef(b)) { + if (!def) { + def = curr; + } else if (!kill) { + interference = true; + break; + } else { + interference = false; + break; + } + // Store KillInsts if they match up with the definition + } else if (curr->killsRegister(a)) { + if (def == MRI->getVRegDef(a)) { + kill = curr; + } else if (curr->killsRegister(b)) { + if (def == MRI->getVRegDef(b)) { + kill = curr; + } + } + } + // First properly dominates second... + } else if (mode == 1) { + if (curr == MRI->getVRegDef(b)) { + // Definition of second without kill of first is an interference + if (!kill) { + interference = true; + break; + // Definition after a kill is a non-interference + } else { + interference = false; + break; + } + // Save KillInsts of First + } else if (curr->killsRegister(a)) { + kill = curr; + } + // Symmetric with the above + } else if (mode == 2) { + if (curr == MRI->getVRegDef(a)) { + if (!kill) { + interference = true; + break; + } else { + interference = false; + break; + } + } else if (curr->killsRegister(b)) { + kill = curr; + } + } + } + + return interference; +} + +/// processBlock - Determine how to break up PHIs in the current block. Each +/// PHI is broken up by some combination of renaming its operands and inserting +/// copies. This method is responsible for determining which operands receive +/// which treatment. +void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); + + // Holds names that have been added to a set in any PHI within this block + // before the current one. + std::set<unsigned> ProcessedNames; + + // Iterate over all the PHI nodes in this block + MachineBasicBlock::iterator P = MBB->begin(); + while (P != MBB->end() && P->getOpcode() == TargetInstrInfo::PHI) { + unsigned DestReg = P->getOperand(0).getReg(); + + // Don't both doing PHI elimination for dead PHI's. + if (P->registerDefIsDead(DestReg)) { + ++P; + continue; + } + + LiveInterval& PI = LI.getOrCreateInterval(DestReg); + unsigned pIdx = LI.getDefIndex(LI.getInstructionIndex(P)); + VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno; + PhiValueNumber.insert(std::make_pair(DestReg, PVN->id)); + + // PHIUnion is the set of incoming registers to the PHI node that + // are going to be renames rather than having copies inserted. This set + // is refinded over the course of this function. UnionedBlocks is the set + // of corresponding MBBs. + std::map<unsigned, MachineBasicBlock*> PHIUnion; + SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks; + + // Iterate over the operands of the PHI node + for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { + unsigned SrcReg = P->getOperand(i-1).getReg(); + + // Don't need to try to coalesce a register with itself. + if (SrcReg == DestReg) { + ProcessedNames.insert(SrcReg); + continue; + } + + // We don't need to insert copies for implicit_defs. + MachineInstr* DefMI = MRI.getVRegDef(SrcReg); + if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + ProcessedNames.insert(SrcReg); + + // Check for trivial interferences via liveness information, allowing us + // to avoid extra work later. Any registers that interfere cannot both + // be in the renaming set, so choose one and add copies for it instead. + // The conditions are: + // 1) if the operand is live into the PHI node's block OR + // 2) if the PHI node is live out of the operand's defining block OR + // 3) if the operand is itself a PHI node and the original PHI is + // live into the operand's defining block OR + // 4) if the operand is already being renamed for another PHI node + // in this block OR + // 5) if any two operands are defined in the same block, insert copies + // for one of them + if (isLiveIn(SrcReg, P->getParent(), LI) || + isLiveOut(P->getOperand(0).getReg(), + MRI.getVRegDef(SrcReg)->getParent(), LI) || + ( MRI.getVRegDef(SrcReg)->getOpcode() == TargetInstrInfo::PHI && + isLiveIn(P->getOperand(0).getReg(), + MRI.getVRegDef(SrcReg)->getParent(), LI) ) || + ProcessedNames.count(SrcReg) || + UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) { + + // Add a copy for the selected register + MachineBasicBlock* From = P->getOperand(i).getMBB(); + Waiting[From].insert(std::make_pair(SrcReg, DestReg)); + UsedByAnother.insert(SrcReg); + } else { + // Otherwise, add it to the renaming set + PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB())); + UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent()); + } + } + + // Compute the dominator forest for the renaming set. This is a forest + // where the nodes are the registers and the edges represent dominance + // relations between the defining blocks of the registers + std::vector<StrongPHIElimination::DomForestNode*> DF = + computeDomForest(PHIUnion, MRI); + + // Walk DomForest to resolve interferences at an inter-block level. This + // will remove registers from the renaming set (and insert copies for them) + // if interferences are found. + std::vector<std::pair<unsigned, unsigned> > localInterferences; + processPHIUnion(P, PHIUnion, DF, localInterferences); + + // If one of the inputs is defined in the same block as the current PHI + // then we need to check for a local interference between that input and + // the PHI. + for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(), + E = PHIUnion.end(); I != E; ++I) + if (MRI.getVRegDef(I->first)->getParent() == P->getParent()) + localInterferences.push_back(std::make_pair(I->first, + P->getOperand(0).getReg())); + + // The dominator forest walk may have returned some register pairs whose + // interference cannot be determined from dominator analysis. We now + // examine these pairs for local interferences. + for (std::vector<std::pair<unsigned, unsigned> >::iterator I = + localInterferences.begin(), E = localInterferences.end(); I != E; ++I) { + std::pair<unsigned, unsigned> p = *I; + + MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); + + // Determine the block we need to scan and the relationship between + // the two registers + MachineBasicBlock* scan = 0; + unsigned mode = 0; + if (MRI.getVRegDef(p.first)->getParent() == + MRI.getVRegDef(p.second)->getParent()) { + scan = MRI.getVRegDef(p.first)->getParent(); + mode = 0; // Same block + } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(), + MRI.getVRegDef(p.second)->getParent())) { + scan = MRI.getVRegDef(p.second)->getParent(); + mode = 1; // First dominates second + } else { + scan = MRI.getVRegDef(p.first)->getParent(); + mode = 2; // Second dominates first + } + + // If there's an interference, we need to insert copies + if (interferes(p.first, p.second, scan, LI, mode)) { + // Insert copies for First + for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { + if (P->getOperand(i-1).getReg() == p.first) { + unsigned SrcReg = p.first; + MachineBasicBlock* From = P->getOperand(i).getMBB(); + + Waiting[From].insert(std::make_pair(SrcReg, + P->getOperand(0).getReg())); + UsedByAnother.insert(SrcReg); + + PHIUnion.erase(SrcReg); + } + } + } + } + + // Add the renaming set for this PHI node to our overall renaming information + for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(), + QE = PHIUnion.end(); QI != QE; ++QI) { + DOUT << "Adding Renaming: " << QI->first << " -> " + << P->getOperand(0).getReg() << "\n"; + } + + RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion)); + + // Remember which registers are already renamed, so that we don't try to + // rename them for another PHI node in this block + for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(), + E = PHIUnion.end(); I != E; ++I) + ProcessedNames.insert(I->first); + + ++P; + } +} + +/// processPHIUnion - Take a set of candidate registers to be coalesced when +/// decomposing the PHI instruction. Use the DominanceForest to remove the ones +/// that are known to interfere, and flag others that need to be checked for +/// local interferences. +void StrongPHIElimination::processPHIUnion(MachineInstr* Inst, + std::map<unsigned, MachineBasicBlock*>& PHIUnion, + std::vector<StrongPHIElimination::DomForestNode*>& DF, + std::vector<std::pair<unsigned, unsigned> >& locals) { + + std::vector<DomForestNode*> worklist(DF.begin(), DF.end()); + SmallPtrSet<DomForestNode*, 4> visited; + + // Code is still in SSA form, so we can use MRI::getVRegDef() + MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo(); + + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + unsigned DestReg = Inst->getOperand(0).getReg(); + + // DF walk on the DomForest + while (!worklist.empty()) { + DomForestNode* DFNode = worklist.back(); + + visited.insert(DFNode); + + bool inserted = false; + for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end(); + CI != CE; ++CI) { + DomForestNode* child = *CI; + + // If the current node is live-out of the defining block of one of its + // children, insert a copy for it. NOTE: The paper actually calls for + // a more elaborate heuristic for determining whether to insert copies + // for the child or the parent. In the interest of simplicity, we're + // just always choosing the parent. + if (isLiveOut(DFNode->getReg(), + MRI.getVRegDef(child->getReg())->getParent(), LI)) { + // Insert copies for parent + for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) { + if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) { + unsigned SrcReg = DFNode->getReg(); + MachineBasicBlock* From = Inst->getOperand(i).getMBB(); + + Waiting[From].insert(std::make_pair(SrcReg, DestReg)); + UsedByAnother.insert(SrcReg); + + PHIUnion.erase(SrcReg); + } + } + + // If a node is live-in to the defining block of one of its children, but + // not live-out, then we need to scan that block for local interferences. + } else if (isLiveIn(DFNode->getReg(), + MRI.getVRegDef(child->getReg())->getParent(), LI) || + MRI.getVRegDef(DFNode->getReg())->getParent() == + MRI.getVRegDef(child->getReg())->getParent()) { + // Add (p, c) to possible local interferences + locals.push_back(std::make_pair(DFNode->getReg(), child->getReg())); + } + + if (!visited.count(child)) { + worklist.push_back(child); + inserted = true; + } + } + + if (!inserted) worklist.pop_back(); + } +} + +/// ScheduleCopies - Insert copies into predecessor blocks, scheduling +/// them properly so as to avoid the 'lost copy' and the 'virtual swap' +/// problems. +/// +/// Based on "Practical Improvements to the Construction and Destruction +/// of Static Single Assignment Form" by Briggs, et al. +void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, + std::set<unsigned>& pushed) { + // FIXME: This function needs to update LiveIntervals + std::multimap<unsigned, unsigned>& copy_set= Waiting[MBB]; + + std::multimap<unsigned, unsigned> worklist; + std::map<unsigned, unsigned> map; + + // Setup worklist of initial copies + for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(), + E = copy_set.end(); I != E; ) { + map.insert(std::make_pair(I->first, I->first)); + map.insert(std::make_pair(I->second, I->second)); + + if (!UsedByAnother.count(I->second)) { + worklist.insert(*I); + + // Avoid iterator invalidation + std::multimap<unsigned, unsigned>::iterator OI = I; + ++I; + copy_set.erase(OI); + } else { + ++I; + } + } + + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + MachineFunction* MF = MBB->getParent(); + MachineRegisterInfo& MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests; + + // Iterate over the worklist, inserting copies + while (!worklist.empty() || !copy_set.empty()) { + while (!worklist.empty()) { + std::multimap<unsigned, unsigned>::iterator WI = worklist.begin(); + std::pair<unsigned, unsigned> curr = *WI; + worklist.erase(WI); + + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first); + + if (isLiveOut(curr.second, MBB, LI)) { + // Create a temporary + unsigned t = MF->getRegInfo().createVirtualRegister(RC); + + // Insert copy from curr.second to a temporary at + // the Phi defining curr.second + MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second); + TII->copyRegToReg(*PI->getParent(), PI, t, + curr.second, RC, RC); + + DOUT << "Inserted copy from " << curr.second << " to " << t << "\n"; + + // Push temporary on Stacks + Stacks[curr.second].push_back(t); + + // Insert curr.second in pushed + pushed.insert(curr.second); + + // Create a live interval for this temporary + InsertedPHIDests.push_back(std::make_pair(t, --PI)); + } + + // Insert copy from map[curr.first] to curr.second + TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, + map[curr.first], RC, RC); + map[curr.first] = curr.second; + DOUT << "Inserted copy from " << curr.first << " to " + << curr.second << "\n"; + + // Push this copy onto InsertedPHICopies so we can + // update LiveIntervals with it. + MachineBasicBlock::iterator MI = MBB->getFirstTerminator(); + InsertedPHIDests.push_back(std::make_pair(curr.second, --MI)); + + // If curr.first is a destination in copy_set... + for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(), + E = copy_set.end(); I != E; ) + if (curr.first == I->second) { + std::pair<unsigned, unsigned> temp = *I; + worklist.insert(temp); + + // Avoid iterator invalidation + std::multimap<unsigned, unsigned>::iterator OI = I; + ++I; + copy_set.erase(OI); + + break; + } else { + ++I; + } + } + + if (!copy_set.empty()) { + std::multimap<unsigned, unsigned>::iterator CI = copy_set.begin(); + std::pair<unsigned, unsigned> curr = *CI; + worklist.insert(curr); + copy_set.erase(CI); + + LiveInterval& I = LI.getInterval(curr.second); + MachineBasicBlock::iterator term = MBB->getFirstTerminator(); + unsigned endIdx = 0; + if (term != MBB->end()) + endIdx = LI.getInstructionIndex(term); + else + endIdx = LI.getMBBEndIdx(MBB); + + if (I.liveAt(endIdx)) { + const TargetRegisterClass *RC = + MF->getRegInfo().getRegClass(curr.first); + + // Insert a copy from dest to a new temporary t at the end of b + unsigned t = MF->getRegInfo().createVirtualRegister(RC); + TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t, + curr.second, RC, RC); + map[curr.second] = t; + + MachineBasicBlock::iterator TI = MBB->getFirstTerminator(); + InsertedPHIDests.push_back(std::make_pair(t, --TI)); + } + } + } + + // Renumber the instructions so that we can perform the index computations + // needed to create new live intervals. + LI.computeNumbering(); + + // For copies that we inserted at the ends of predecessors, we construct + // live intervals. This is pretty easy, since we know that the destination + // register cannot have be in live at that point previously. We just have + // to make sure that, for registers that serve as inputs to more than one + // PHI, we don't create multiple overlapping live intervals. + std::set<unsigned> RegHandled; + for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I = + InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) { + if (RegHandled.insert(I->first).second) { + LiveInterval& Int = LI.getOrCreateInterval(I->first); + unsigned instrIdx = LI.getInstructionIndex(I->second); + if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx))) + Int.removeRange(LiveIntervals::getDefIndex(instrIdx), + LI.getMBBEndIdx(I->second->getParent())+1, + true); + + LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second); + R.valno->copy = I->second; + R.valno->def = + LiveIntervals::getDefIndex(LI.getInstructionIndex(I->second)); + } + } +} + +/// InsertCopies - insert copies into MBB and all of its successors +void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, + SmallPtrSet<MachineBasicBlock*, 16>& visited) { + MachineBasicBlock* MBB = MDTN->getBlock(); + visited.insert(MBB); + + std::set<unsigned> pushed; + + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + // Rewrite register uses from Stacks + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->getOpcode() == TargetInstrInfo::PHI) + continue; + + for (unsigned i = 0; i < I->getNumOperands(); ++i) + if (I->getOperand(i).isReg() && + Stacks[I->getOperand(i).getReg()].size()) { + // Remove the live range for the old vreg. + LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg()); + LiveInterval::iterator OldLR = OldInt.FindLiveRangeContaining( + LiveIntervals::getUseIndex(LI.getInstructionIndex(I))); + if (OldLR != OldInt.end()) + OldInt.removeRange(*OldLR, true); + + // Change the register + I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back()); + + // Add a live range for the new vreg + LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg()); + VNInfo* FirstVN = *Int.vni_begin(); + FirstVN->hasPHIKill = false; + if (I->getOperand(i).isKill()) + FirstVN->kills.push_back( + LiveIntervals::getUseIndex(LI.getInstructionIndex(I))); + + LiveRange LR (LI.getMBBStartIdx(I->getParent()), + LiveIntervals::getUseIndex(LI.getInstructionIndex(I))+1, + FirstVN); + + Int.addRange(LR); + } + } + + // Schedule the copies for this block + ScheduleCopies(MBB, pushed); + + // Recur down the dominator tree. + for (MachineDomTreeNode::iterator I = MDTN->begin(), + E = MDTN->end(); I != E; ++I) + if (!visited.count((*I)->getBlock())) + InsertCopies(*I, visited); + + // As we exit this block, pop the names we pushed while processing it + for (std::set<unsigned>::iterator I = pushed.begin(), + E = pushed.end(); I != E; ++I) + Stacks[*I].pop_back(); +} + +bool StrongPHIElimination::mergeLiveIntervals(unsigned primary, + unsigned secondary) { + + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + LiveInterval& LHS = LI.getOrCreateInterval(primary); + LiveInterval& RHS = LI.getOrCreateInterval(secondary); + + LI.computeNumbering(); + + DenseMap<VNInfo*, VNInfo*> VNMap; + for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + LiveRange R = *I; + + unsigned Start = R.start; + unsigned End = R.end; + if (LHS.getLiveRangeContaining(Start)) + return false; + + if (LHS.getLiveRangeContaining(End)) + return false; + + LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R); + if (RI != LHS.end() && RI->start < End) + return false; + } + + for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + LiveRange R = *I; + VNInfo* OldVN = R.valno; + VNInfo*& NewVN = VNMap[OldVN]; + if (!NewVN) { + NewVN = LHS.getNextValue(OldVN->def, + OldVN->copy, + LI.getVNInfoAllocator()); + NewVN->kills = OldVN->kills; + } + + LiveRange LR (R.start, R.end, NewVN); + LHS.addRange(LR); + } + + LI.removeInterval(RHS.reg); + + return true; +} + +bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { + LiveIntervals& LI = getAnalysis<LiveIntervals>(); + + // Compute DFS numbers of each block + computeDFS(Fn); + + // Determine which phi node operands need copies + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + if (!I->empty() && + I->begin()->getOpcode() == TargetInstrInfo::PHI) + processBlock(I); + + // Break interferences where two different phis want to coalesce + // in the same register. + std::set<unsigned> seen; + typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > + RenameSetType; + for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); + I != E; ++I) { + for (std::map<unsigned, MachineBasicBlock*>::iterator + OI = I->second.begin(), OE = I->second.end(); OI != OE; ) { + if (!seen.count(OI->first)) { + seen.insert(OI->first); + ++OI; + } else { + Waiting[OI->second].insert(std::make_pair(OI->first, I->first)); + unsigned reg = OI->first; + ++OI; + I->second.erase(reg); + DOUT << "Removing Renaming: " << reg << " -> " << I->first << "\n"; + } + } + } + + // Insert copies + // FIXME: This process should probably preserve LiveIntervals + SmallPtrSet<MachineBasicBlock*, 16> visited; + MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); + InsertCopies(MDT.getRootNode(), visited); + + // Perform renaming + for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); + I != E; ++I) + while (I->second.size()) { + std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin(); + + DOUT << "Renaming: " << SI->first << " -> " << I->first << "\n"; + + if (SI->first != I->first) { + if (mergeLiveIntervals(I->first, SI->first)) { + Fn.getRegInfo().replaceRegWith(SI->first, I->first); + + if (RenameSets.count(SI->first)) { + I->second.insert(RenameSets[SI->first].begin(), + RenameSets[SI->first].end()); + RenameSets.erase(SI->first); + } + } else { + // Insert a last-minute copy if a conflict was detected. + const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first); + TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(), + I->first, SI->first, RC, RC); + + LI.computeNumbering(); + + LiveInterval& Int = LI.getOrCreateInterval(I->first); + unsigned instrIdx = + LI.getInstructionIndex(--SI->second->getFirstTerminator()); + if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx))) + Int.removeRange(LiveIntervals::getDefIndex(instrIdx), + LI.getMBBEndIdx(SI->second)+1, true); + + LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, + --SI->second->getFirstTerminator()); + R.valno->copy = --SI->second->getFirstTerminator(); + R.valno->def = LiveIntervals::getDefIndex(instrIdx); + + DOUT << "Renaming failed: " << SI->first << " -> " + << I->first << "\n"; + } + } + + LiveInterval& Int = LI.getOrCreateInterval(I->first); + const LiveRange* LR = + Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second)); + LR->valno->hasPHIKill = true; + + I->second.erase(SI->first); + } + + // Remove PHIs + std::vector<MachineInstr*> phis; + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end(); + BI != BE; ++BI) + if (BI->getOpcode() == TargetInstrInfo::PHI) + phis.push_back(BI); + } + + for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end(); + I != E; ) { + MachineInstr* PInstr = *(I++); + + // If this is a dead PHI node, then remove it from LiveIntervals. + unsigned DestReg = PInstr->getOperand(0).getReg(); + LiveInterval& PI = LI.getInterval(DestReg); + if (PInstr->registerDefIsDead(DestReg)) { + if (PI.containsOneValue()) { + LI.removeInterval(DestReg); + } else { + unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); + PI.removeRange(*PI.getLiveRangeContaining(idx), true); + } + } else { + // Trim live intervals of input registers. They are no longer live into + // this block if they died after the PHI. If they lived after it, don't + // trim them because they might have other legitimate uses. + for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) { + unsigned reg = PInstr->getOperand(i).getReg(); + + MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB(); + LiveInterval& InputI = LI.getInterval(reg); + if (MBB != PInstr->getParent() && + InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) && + InputI.expiredAt(LI.getInstructionIndex(PInstr) + + LiveInterval::InstrSlots::NUM)) + InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()), + LI.getInstructionIndex(PInstr), + true); + } + + // If the PHI is not dead, then the valno defined by the PHI + // now has an unknown def. + unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); + const LiveRange* PLR = PI.getLiveRangeContaining(idx); + PLR->valno->def = ~0U; + LiveRange R (LI.getMBBStartIdx(PInstr->getParent()), + PLR->start, PLR->valno); + PI.addRange(R); + } + + LI.RemoveMachineInstrFromMaps(PInstr); + PInstr->eraseFromParent(); + } + + LI.computeNumbering(); + + return true; +} diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp new file mode 100644 index 0000000..a5e1ee4 --- /dev/null +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -0,0 +1,194 @@ +//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetInstrInfoImpl class, it just provides default +// implementations of various methods. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +using namespace llvm; + +// commuteInstruction - The default implementation of this method just exchanges +// operand 1 and 2. +MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, + bool NewMI) const { + assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && + "This only knows how to commute register operands so far"); + unsigned Reg1 = MI->getOperand(1).getReg(); + unsigned Reg2 = MI->getOperand(2).getReg(); + bool Reg1IsKill = MI->getOperand(1).isKill(); + bool Reg2IsKill = MI->getOperand(2).isKill(); + bool ChangeReg0 = false; + if (MI->getOperand(0).getReg() == Reg1) { + // Must be two address instruction! + assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + "Expecting a two-address instruction!"); + Reg2IsKill = false; + ChangeReg0 = true; + } + + if (NewMI) { + // Create a new instruction. + unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg(); + bool Reg0IsDead = MI->getOperand(0).isDead(); + MachineFunction &MF = *MI->getParent()->getParent(); + return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) + .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) + .addReg(Reg2, getKillRegState(Reg2IsKill)) + .addReg(Reg1, getKillRegState(Reg2IsKill)); + } + + if (ChangeReg0) + MI->getOperand(0).setReg(Reg2); + MI->getOperand(2).setReg(Reg1); + MI->getOperand(1).setReg(Reg2); + MI->getOperand(2).setIsKill(Reg1IsKill); + MI->getOperand(1).setIsKill(Reg2IsKill); + return MI; +} + +/// CommuteChangesDestination - Return true if commuting the specified +/// instruction will also changes the destination operand. Also return the +/// current operand index of the would be new destination register by +/// reference. This can happen when the commutable instruction is also a +/// two-address instruction. +bool TargetInstrInfoImpl::CommuteChangesDestination(MachineInstr *MI, + unsigned &OpIdx) const{ + assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && + "This only knows how to commute register operands so far"); + if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { + // Must be two address instruction! + assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + "Expecting a two-address instruction!"); + OpIdx = 2; + return true; + } + return false; +} + + +bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + bool MadeChange = false; + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isPredicable()) + return false; + + for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (TID.OpInfo[i].isPredicate()) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + MO.setReg(Pred[j].getReg()); + MadeChange = true; + } else if (MO.isImm()) { + MO.setImm(Pred[j].getImm()); + MadeChange = true; + } else if (MO.isMBB()) { + MO.setMBB(Pred[j].getMBB()); + MadeChange = true; + } + ++j; + } + } + return MadeChange; +} + +void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + const MachineInstr *Orig) const { + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->getOperand(0).setReg(DestReg); + MBB.insert(I, MI); +} + +unsigned +TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { + unsigned FnSize = 0; + for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + const MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); + I != E; ++I) + FnSize += GetInstSizeInBytes(I); + } + return FnSize; +} + +/// foldMemoryOperand - Attempt to fold a load or store of the specified stack +/// slot into the specified machine instruction for the specified operand(s). +/// If this is possible, a new instruction is returned with the specified +/// operand folded, otherwise NULL is returned. The client is responsible for +/// removing the old instruction and adding the new one in the instruction +/// stream. +MachineInstr* +TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const { + unsigned Flags = 0; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (MI->getOperand(Ops[i]).isDef()) + Flags |= MachineMemOperand::MOStore; + else + Flags |= MachineMemOperand::MOLoad; + + // Ask the target to do the actual folding. + MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); + if (!NewMI) return 0; + + assert((!(Flags & MachineMemOperand::MOStore) || + NewMI->getDesc().mayStore()) && + "Folded a def to a non-store!"); + assert((!(Flags & MachineMemOperand::MOLoad) || + NewMI->getDesc().mayLoad()) && + "Folded a use to a non-load!"); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FrameIndex) != -1); + MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FrameIndex), + Flags, + MFI.getObjectOffset(FrameIndex), + MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); + NewMI->addMemOperand(MF, MMO); + + return NewMI; +} + +/// foldMemoryOperand - Same as the previous version except it allows folding +/// of any load and store from / to any address, not just from a specific +/// stack slot. +MachineInstr* +TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!"); +#ifndef NDEBUG + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); +#endif + + // Ask the target to do the actual folding. + MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); + if (!NewMI) return 0; + + // Copy the memoperands from the load to the folded instruction. + for (std::list<MachineMemOperand>::iterator I = LoadMI->memoperands_begin(), + E = LoadMI->memoperands_end(); I != E; ++I) + NewMI->addMemOperand(MF, *I); + + return NewMI; +} diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp new file mode 100644 index 0000000..3c40404 --- /dev/null +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -0,0 +1,997 @@ +//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TwoAddress instruction pass which is used +// by most register allocators. Two-Address instructions are rewritten +// from: +// +// A = B op C +// +// to: +// +// A = B +// A op= C +// +// Note that if a register allocator chooses to use this pass, that it +// has to be capable of handling the non-SSA nature of these rewritten +// virtual registers. +// +// It is also worth noting that the duplicate operand of the two +// address instruction is removed. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "twoaddrinstr" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); +STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); +STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); +STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); +STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); +STATISTIC(NumReMats, "Number of instructions re-materialized"); +STATISTIC(NumDeletes, "Number of dead instructions deleted"); + +namespace { + class VISIBILITY_HIDDEN TwoAddressInstructionPass + : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + LiveVariables *LV; + + // DistanceMap - Keep track the distance of a MI from the start of the + // current basic block. + DenseMap<MachineInstr*, unsigned> DistanceMap; + + // SrcRegMap - A map from virtual registers to physical registers which + // are likely targets to be coalesced to due to copies from physical + // registers to virtual registers. e.g. v1024 = move r0. + DenseMap<unsigned, unsigned> SrcRegMap; + + // DstRegMap - A map from virtual registers to physical registers which + // are likely targets to be coalesced to due to copies to physical + // registers from virtual registers. e.g. r1 = move v1024. + DenseMap<unsigned, unsigned> DstRegMap; + + bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI, + unsigned Reg, + MachineBasicBlock::iterator OldPos); + + bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC, + MachineInstr *MI, MachineInstr *DefMI, + MachineBasicBlock *MBB, unsigned Loc); + + bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, + unsigned &LastDef); + + MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB, + unsigned Dist); + + bool isProfitableToCommute(unsigned regB, unsigned regC, + MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Dist); + + bool CommuteInstruction(MachineBasicBlock::iterator &mi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned RegC, unsigned Dist); + + bool isProfitableToConv3Addr(unsigned RegA); + + bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned Dist); + + void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &Processed); + public: + static char ID; // Pass identification, replacement for typeid + TwoAddressInstructionPass() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<LiveVariables>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + if (StrongPHIElim) + AU.addPreservedID(StrongPHIEliminationID); + else + AU.addPreservedID(PHIEliminationID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - Pass entry point. + bool runOnMachineFunction(MachineFunction&); + }; +} + +char TwoAddressInstructionPass::ID = 0; +static RegisterPass<TwoAddressInstructionPass> +X("twoaddressinstruction", "Two-Address instruction pass"); + +const PassInfo *const llvm::TwoAddressInstructionPassID = &X; + +/// Sink3AddrInstruction - A two-address instruction has been converted to a +/// three-address instruction to avoid clobbering a register. Try to sink it +/// past the instruction that would kill the above mentioned register to reduce +/// register pressure. +bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, + MachineInstr *MI, unsigned SavedReg, + MachineBasicBlock::iterator OldPos) { + // Check if it's safe to move this instruction. + bool SeenStore = true; // Be conservative. + if (!MI->isSafeToMove(TII, SeenStore)) + return false; + + unsigned DefReg = 0; + SmallSet<unsigned, 4> UseRegs; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isUse() && MOReg != SavedReg) + UseRegs.insert(MO.getReg()); + if (!MO.isDef()) + continue; + if (MO.isImplicit()) + // Don't try to move it if it implicitly defines a register. + return false; + if (DefReg) + // For now, don't move any instructions that define multiple registers. + return false; + DefReg = MO.getReg(); + } + + // Find the instruction that kills SavedReg. + MachineInstr *KillMI = NULL; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SavedReg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + if (!UseMO.isKill()) + continue; + KillMI = UseMO.getParent(); + break; + } + + if (!KillMI || KillMI->getParent() != MBB || KillMI == MI) + return false; + + // If any of the definitions are used by another instruction between the + // position and the kill use, then it's not safe to sink it. + // + // FIXME: This can be sped up if there is an easy way to query whether an + // instruction is before or after another instruction. Then we can use + // MachineRegisterInfo def / use instead. + MachineOperand *KillMO = NULL; + MachineBasicBlock::iterator KillPos = KillMI; + ++KillPos; + + unsigned NumVisited = 0; + for (MachineBasicBlock::iterator I = next(OldPos); I != KillPos; ++I) { + MachineInstr *OtherMI = I; + if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (DefReg == MOReg) + return false; + + if (MO.isKill()) { + if (OtherMI == KillMI && MOReg == SavedReg) + // Save the operand that kills the register. We want to unset the kill + // marker if we can sink MI past it. + KillMO = &MO; + else if (UseRegs.count(MOReg)) + // One of the uses is killed before the destination. + return false; + } + } + } + + // Update kill and LV information. + KillMO->setIsKill(false); + KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); + KillMO->setIsKill(true); + + if (LV) + LV->replaceKillInstruction(SavedReg, KillMI, MI); + + // Move instruction to its destination. + MBB->remove(MI); + MBB->insert(KillPos, MI); + + ++Num3AddrSunk; + return true; +} + +/// isTwoAddrUse - Return true if the specified MI is using the specified +/// register as a two-address operand. +static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { + const TargetInstrDesc &TID = UseMI->getDesc(); + for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && + (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) + // Earlier use is a two-address one. + return true; + } + return false; +} + +/// isProfitableToReMat - Return true if the heuristics determines it is likely +/// to be profitable to re-materialize the definition of Reg rather than copy +/// the register. +bool +TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, + const TargetRegisterClass *RC, + MachineInstr *MI, MachineInstr *DefMI, + MachineBasicBlock *MBB, unsigned Loc) { + bool OtherUse = false; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = UseMO.getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (UseMBB == MBB) { + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end() && DI->second == Loc) + continue; // Current use. + OtherUse = true; + // There is at least one other use in the MBB that will clobber the + // register. + if (isTwoAddrUse(UseMI, Reg)) + return true; + } + } + + // If other uses in MBB are not two-address uses, then don't remat. + if (OtherUse) + return false; + + // No other uses in the same block, remat if it's defined in the same + // block so it does not unnecessarily extend the live range. + return MBB == DefMI->getParent(); +} + +/// NoUseAfterLastDef - Return true if there are no intervening uses between the +/// last instruction in the MBB that defines the specified register and the +/// two-address instruction which is being processed. It also returns the last +/// def location by reference +bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, + MachineBasicBlock *MBB, unsigned Dist, + unsigned &LastDef) { + LastDef = 0; + unsigned LastUse = Dist; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), + E = MRI->reg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB) + continue; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + continue; + if (MO.isUse() && DI->second < LastUse) + LastUse = DI->second; + if (MO.isDef() && DI->second > LastDef) + LastDef = DI->second; + } + + return !(LastUse > LastDef && LastUse < Dist); +} + +MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg, + MachineBasicBlock *MBB, + unsigned Dist) { + unsigned LastUseDist = 0; + MachineInstr *LastUse = 0; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), + E = MRI->reg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB) + continue; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + continue; + if (DI->second >= Dist) + continue; + + if (MO.isUse() && DI->second > LastUseDist) { + LastUse = DI->first; + LastUseDist = DI->second; + } + } + return LastUse; +} + +/// isCopyToReg - Return true if the specified MI is a copy instruction or +/// a extract_subreg instruction. It also returns the source and destination +/// registers and whether they are physical registers by reference. +static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, + unsigned &SrcReg, unsigned &DstReg, + bool &IsSrcPhys, bool &IsDstPhys) { + SrcReg = 0; + DstReg = 0; + unsigned SrcSubIdx, DstSubIdx; + if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + if (MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + } else if (MI.getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + } else if (MI.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + } + } + + if (DstReg) { + IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return true; + } + return false; +} + +/// isKilled - Test if the given register value, which is used by the given +/// instruction, is killed by the given instruction. This looks through +/// coalescable copies to see if the original value is potentially not killed. +/// +/// For example, in this code: +/// +/// %reg1034 = copy %reg1024 +/// %reg1035 = copy %reg1025<kill> +/// %reg1036 = add %reg1034<kill>, %reg1035<kill> +/// +/// %reg1034 is not considered to be killed, since it is copied from a +/// register which is not killed. Treating it as not killed lets the +/// normal heuristics commute the (two-address) add, which lets +/// coalescing eliminate the extra copy. +/// +static bool isKilled(MachineInstr &MI, unsigned Reg, + const MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + MachineInstr *DefMI = &MI; + for (;;) { + if (!DefMI->killsRegister(Reg)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return true; + MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); + // If there are multiple defs, we can't do a simple analysis, so just + // go with what the kill flag says. + if (next(Begin) != MRI->def_end()) + return true; + DefMI = &*Begin; + bool IsSrcPhys, IsDstPhys; + unsigned SrcReg, DstReg; + // If the def is something other than a copy, then it isn't going to + // be coalesced, so follow the kill flag. + if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) + return true; + Reg = SrcReg; + } +} + +/// isTwoAddrUse - Return true if the specified MI uses the specified register +/// as a two-address use. If so, return the destination register by reference. +static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned NumOps = (MI.getOpcode() == TargetInstrInfo::INLINEASM) + ? MI.getNumOperands() : TID.getNumOperands(); + for (unsigned i = 0; i != NumOps; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + unsigned ti; + if (MI.isRegTiedToDefOperand(i, &ti)) { + DstReg = MI.getOperand(ti).getReg(); + return true; + } + } + return false; +} + +/// findOnlyInterestingUse - Given a register, if has a single in-basic block +/// use, return the use instruction if it's a copy or a two-address use. +static +MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII, + bool &IsCopy, + unsigned &DstReg, bool &IsDstPhys) { + MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg); + if (UI == MRI->use_end()) + return 0; + MachineInstr &UseMI = *UI; + if (++UI != MRI->use_end()) + // More than one use. + return 0; + if (UseMI.getParent() != MBB) + return 0; + unsigned SrcReg; + bool IsSrcPhys; + if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) { + IsCopy = true; + return &UseMI; + } + IsDstPhys = false; + if (isTwoAddrUse(UseMI, Reg, DstReg)) { + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return &UseMI; + } + return 0; +} + +/// getMappedReg - Return the physical register the specified virtual register +/// might be mapped to. +static unsigned +getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) { + while (TargetRegisterInfo::isVirtualRegister(Reg)) { + DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg); + if (SI == RegMap.end()) + return 0; + Reg = SI->second; + } + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Reg; + return 0; +} + +/// regsAreCompatible - Return true if the two registers are equal or aliased. +/// +static bool +regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { + if (RegA == RegB) + return true; + if (!RegA || !RegB) + return false; + return TRI->regsOverlap(RegA, RegB); +} + + +/// isProfitableToReMat - Return true if it's potentially profitable to commute +/// the two-address instruction that's being processed. +bool +TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, + MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Dist) { + // Determine if it's profitable to commute this two address instruction. In + // general, we want no uses between this instruction and the definition of + // the two-address register. + // e.g. + // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1 + // %reg1029<def> = MOV8rr %reg1028 + // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead> + // insert => %reg1030<def> = MOV8rr %reg1028 + // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead> + // In this case, it might not be possible to coalesce the second MOV8rr + // instruction if the first one is coalesced. So it would be profitable to + // commute it: + // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1 + // %reg1029<def> = MOV8rr %reg1028 + // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead> + // insert => %reg1030<def> = MOV8rr %reg1029 + // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead> + + if (!MI->killsRegister(regC)) + return false; + + // Ok, we have something like: + // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead> + // let's see if it's worth commuting it. + + // Look for situations like this: + // %reg1024<def> = MOV r1 + // %reg1025<def> = MOV r0 + // %reg1026<def> = ADD %reg1024, %reg1025 + // r0 = MOV %reg1026 + // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. + unsigned FromRegB = getMappedReg(regB, SrcRegMap); + unsigned FromRegC = getMappedReg(regC, SrcRegMap); + unsigned ToRegB = getMappedReg(regB, DstRegMap); + unsigned ToRegC = getMappedReg(regC, DstRegMap); + if (!regsAreCompatible(FromRegB, ToRegB, TRI) && + (regsAreCompatible(FromRegB, ToRegC, TRI) || + regsAreCompatible(FromRegC, ToRegB, TRI))) + return true; + + // If there is a use of regC between its last def (could be livein) and this + // instruction, then bail. + unsigned LastDefC = 0; + if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC)) + return false; + + // If there is a use of regB between its last def (could be livein) and this + // instruction, then go ahead and make this transformation. + unsigned LastDefB = 0; + if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB)) + return true; + + // Since there are no intervening uses for both registers, then commute + // if the def of regC is closer. Its live interval is shorter. + return LastDefB && LastDefC && LastDefC > LastDefB; +} + +/// CommuteInstruction - Commute a two-address instruction and update the basic +/// block, distance map, and live variables if needed. Return true if it is +/// successful. +bool +TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned RegC, unsigned Dist) { + MachineInstr *MI = mi; + DOUT << "2addr: COMMUTING : " << *MI; + MachineInstr *NewMI = TII->commuteInstruction(MI); + + if (NewMI == 0) { + DOUT << "2addr: COMMUTING FAILED!\n"; + return false; + } + + DOUT << "2addr: COMMUTED TO: " << *NewMI; + // If the instruction changed to commute it, update livevar. + if (NewMI != MI) { + if (LV) + // Update live variables + LV->replaceKillInstruction(RegC, MI, NewMI); + + mbbi->insert(mi, NewMI); // Insert the new inst + mbbi->erase(mi); // Nuke the old inst. + mi = NewMI; + DistanceMap.insert(std::make_pair(NewMI, Dist)); + } + + // Update source register map. + unsigned FromRegC = getMappedReg(RegC, SrcRegMap); + if (FromRegC) { + unsigned RegA = MI->getOperand(0).getReg(); + SrcRegMap[RegA] = FromRegC; + } + + return true; +} + +/// isProfitableToConv3Addr - Return true if it is profitable to convert the +/// given 2-address instruction to a 3-address one. +bool +TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) { + // Look for situations like this: + // %reg1024<def> = MOV r1 + // %reg1025<def> = MOV r0 + // %reg1026<def> = ADD %reg1024, %reg1025 + // r2 = MOV %reg1026 + // Turn ADD into a 3-address instruction to avoid a copy. + unsigned FromRegA = getMappedReg(RegA, SrcRegMap); + unsigned ToRegA = getMappedReg(RegA, DstRegMap); + return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI)); +} + +/// ConvertInstTo3Addr - Convert the specified two-address instruction into a +/// three address one. Return true if this transformation was successful. +bool +TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned RegB, unsigned Dist) { + MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); + if (NewMI) { + DOUT << "2addr: CONVERTING 2-ADDR: " << *mi; + DOUT << "2addr: TO 3-ADDR: " << *NewMI; + bool Sunk = false; + + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) + // FIXME: Temporary workaround. If the new instruction doesn't + // uses RegB, convertToThreeAddress must have created more + // then one instruction. + Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi); + + mbbi->erase(mi); // Nuke the old inst. + + if (!Sunk) { + DistanceMap.insert(std::make_pair(NewMI, Dist)); + mi = NewMI; + nmi = next(mi); + } + return true; + } + + return false; +} + +/// ProcessCopy - If the specified instruction is not yet processed, process it +/// if it's a copy. For a copy instruction, we find the physical registers the +/// source and destination registers might be mapped to. These are kept in +/// point-to maps used to determine future optimizations. e.g. +/// v1024 = mov r0 +/// v1025 = mov r1 +/// v1026 = add v1024, v1025 +/// r1 = mov r1026 +/// If 'add' is a two-address instruction, v1024, v1026 are both potentially +/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is +/// potentially joined with r1 on the output side. It's worthwhile to commute +/// 'add' to eliminate a copy. +void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, + MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &Processed) { + if (Processed.count(MI)) + return; + + bool IsSrcPhys, IsDstPhys; + unsigned SrcReg, DstReg; + if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) + return; + + if (IsDstPhys && !IsSrcPhys) + DstRegMap.insert(std::make_pair(SrcReg, DstReg)); + else if (!IsDstPhys && IsSrcPhys) { + bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second; + if (!isNew) + assert(SrcRegMap[DstReg] == SrcReg && + "Can't map to two src physical registers!"); + + SmallVector<unsigned, 4> VirtRegPairs; + bool IsCopy = false; + unsigned NewReg = 0; + while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII, + IsCopy, NewReg, IsDstPhys)) { + if (IsCopy) { + if (!Processed.insert(UseMI)) + break; + } + + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end()) + // Earlier in the same MBB.Reached via a back edge. + break; + + if (IsDstPhys) { + VirtRegPairs.push_back(NewReg); + break; + } + bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second; + if (!isNew) + assert(SrcRegMap[NewReg] == DstReg && + "Can't map to two src physical registers!"); + VirtRegPairs.push_back(NewReg); + DstReg = NewReg; + } + + if (!VirtRegPairs.empty()) { + unsigned ToReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + while (!VirtRegPairs.empty()) { + unsigned FromReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[FromReg] == ToReg && + "Can't map to two dst physical registers!"); + ToReg = FromReg; + } + } + } + + Processed.insert(MI); +} + +/// isSafeToDelete - If the specified instruction does not produce any side +/// effects and all of its defs are dead, then it's safe to delete. +static bool isSafeToDelete(MachineInstr *MI, unsigned Reg, + const TargetInstrInfo *TII, + SmallVector<unsigned, 4> &Kills) { + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.mayStore() || TID.isCall()) + return false; + if (TID.isTerminator() || TID.hasUnmodeledSideEffects()) + return false; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDef() && !MO.isDead()) + return false; + if (MO.isUse() && MO.getReg() != Reg && MO.isKill()) + Kills.push_back(MO.getReg()); + } + + return true; +} + +/// runOnMachineFunction - Reduce two-address instructions to two operands. +/// +bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { + DOUT << "Machine Function\n"; + const TargetMachine &TM = MF.getTarget(); + MRI = &MF.getRegInfo(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + LV = getAnalysisIfAvailable<LiveVariables>(); + + bool MadeChange = false; + + DOUT << "********** REWRITING TWO-ADDR INSTRS **********\n"; + DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + + // ReMatRegs - Keep track of the registers whose def's are remat'ed. + BitVector ReMatRegs; + ReMatRegs.resize(MRI->getLastVirtReg()+1); + + SmallPtrSet<MachineInstr*, 8> Processed; + for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + mbbi != mbbe; ++mbbi) { + unsigned Dist = 0; + DistanceMap.clear(); + SrcRegMap.clear(); + DstRegMap.clear(); + Processed.clear(); + for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + mi != me; ) { + MachineBasicBlock::iterator nmi = next(mi); + const TargetInstrDesc &TID = mi->getDesc(); + bool FirstTied = true; + + DistanceMap.insert(std::make_pair(mi, ++Dist)); + + ProcessCopy(&*mi, &*mbbi, Processed); + + unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM) + ? mi->getNumOperands() : TID.getNumOperands(); + for (unsigned si = 0; si < NumOps; ++si) { + unsigned ti = 0; + if (!mi->isRegTiedToDefOperand(si, &ti)) + continue; + + if (FirstTied) { + ++NumTwoAddressInstrs; + DOUT << '\t'; DEBUG(mi->print(*cerr.stream(), &TM)); + } + + FirstTied = false; + + assert(mi->getOperand(si).isReg() && mi->getOperand(si).getReg() && + mi->getOperand(si).isUse() && "two address instruction invalid"); + + // If the two operands are the same we just remove the use + // and mark the def as def&use, otherwise we have to insert a copy. + if (mi->getOperand(ti).getReg() != mi->getOperand(si).getReg()) { + // Rewrite: + // a = b op c + // to: + // a = b + // a = a op c + unsigned regA = mi->getOperand(ti).getReg(); + unsigned regB = mi->getOperand(si).getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(regB) && + "cannot update physical register live information"); + +#ifndef NDEBUG + // First, verify that we don't have a use of a in the instruction (a = + // b + a for example) because our transformation will not work. This + // should never occur because we are in SSA form. + for (unsigned i = 0; i != mi->getNumOperands(); ++i) + assert(i == ti || + !mi->getOperand(i).isReg() || + mi->getOperand(i).getReg() != regA); +#endif + + // If this instruction is not the killing user of B, see if we can + // rearrange the code to make it so. Making it the killing user will + // allow us to coalesce A and B together, eliminating the copy we are + // about to insert. + if (!isKilled(*mi, regB, MRI, TII)) { + // If regA is dead and the instruction can be deleted, just delete + // it so it doesn't clobber regB. + SmallVector<unsigned, 4> Kills; + if (mi->getOperand(ti).isDead() && + isSafeToDelete(mi, regB, TII, Kills)) { + SmallVector<std::pair<std::pair<unsigned, bool> + ,MachineInstr*>, 4> NewKills; + bool ReallySafe = true; + // If this instruction kills some virtual registers, we need + // update the kill information. If it's not possible to do so, + // then bail out. + while (!Kills.empty()) { + unsigned Kill = Kills.back(); + Kills.pop_back(); + if (TargetRegisterInfo::isPhysicalRegister(Kill)) { + ReallySafe = false; + break; + } + MachineInstr *LastKill = FindLastUseInMBB(Kill, &*mbbi, Dist); + if (LastKill) { + bool isModRef = LastKill->modifiesRegister(Kill); + NewKills.push_back(std::make_pair(std::make_pair(Kill,isModRef), + LastKill)); + } else { + ReallySafe = false; + break; + } + } + + if (ReallySafe) { + if (LV) { + while (!NewKills.empty()) { + MachineInstr *NewKill = NewKills.back().second; + unsigned Kill = NewKills.back().first.first; + bool isDead = NewKills.back().first.second; + NewKills.pop_back(); + if (LV->removeVirtualRegisterKilled(Kill, mi)) { + if (isDead) + LV->addVirtualRegisterDead(Kill, NewKill); + else + LV->addVirtualRegisterKilled(Kill, NewKill); + } + } + } + + // We're really going to nuke the old inst. If regB was marked + // as a kill we need to update its Kills list. + if (mi->getOperand(si).isKill()) + LV->removeVirtualRegisterKilled(regB, mi); + + mbbi->erase(mi); // Nuke the old inst. + mi = nmi; + ++NumDeletes; + break; // Done with this instruction. + } + } + + // If this instruction is commutative, check to see if C dies. If + // so, swap the B and C operands. This makes the live ranges of A + // and C joinable. + // FIXME: This code also works for A := B op C instructions. + if (TID.isCommutable() && mi->getNumOperands() >= 3) { + assert(mi->getOperand(3-si).isReg() && + "Not a proper commutative instruction!"); + unsigned regC = mi->getOperand(3-si).getReg(); + if (isKilled(*mi, regC, MRI, TII)) { + if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) { + ++NumCommuted; + regB = regC; + goto InstructionRearranged; + } + } + } + + // If this instruction is potentially convertible to a true + // three-address instruction, + if (TID.isConvertibleTo3Addr()) { + // FIXME: This assumes there are no more operands which are tied + // to another register. +#ifndef NDEBUG + for (unsigned i = si + 1, e = TID.getNumOperands(); i < e; ++i) + assert(TID.getOperandConstraint(i, TOI::TIED_TO) == -1); +#endif + + if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { + ++NumConvertedTo3Addr; + break; // Done with this instruction. + } + } + } + + // If it's profitable to commute the instruction, do so. + if (TID.isCommutable() && mi->getNumOperands() >= 3) { + unsigned regC = mi->getOperand(3-si).getReg(); + if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) + if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) { + ++NumAggrCommuted; + ++NumCommuted; + regB = regC; + goto InstructionRearranged; + } + } + + // If it's profitable to convert the 2-address instruction to a + // 3-address one, do so. + if (TID.isConvertibleTo3Addr() && isProfitableToConv3Addr(regA)) { + if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { + ++NumConvertedTo3Addr; + break; // Done with this instruction. + } + } + + InstructionRearranged: + const TargetRegisterClass* rc = MRI->getRegClass(regB); + MachineInstr *DefMI = MRI->getVRegDef(regB); + // If it's safe and profitable, remat the definition instead of + // copying it. + if (DefMI && + DefMI->getDesc().isAsCheapAsAMove() && + DefMI->isSafeToReMat(TII, regB) && + isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ + DEBUG(cerr << "2addr: REMATTING : " << *DefMI << "\n"); + TII->reMaterialize(*mbbi, mi, regA, DefMI); + ReMatRegs.set(regB); + ++NumReMats; + } else { + bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc); + (void)Emitted; + assert(Emitted && "Unable to issue a copy instruction!\n"); + } + + MachineBasicBlock::iterator prevMI = prior(mi); + // Update DistanceMap. + DistanceMap.insert(std::make_pair(prevMI, Dist)); + DistanceMap[mi] = ++Dist; + + // Update live variables for regB. + if (LV) { + if (LV->removeVirtualRegisterKilled(regB, mi)) + LV->addVirtualRegisterKilled(regB, prevMI); + + if (LV->removeVirtualRegisterDead(regB, mi)) + LV->addVirtualRegisterDead(regB, prevMI); + } + + DOUT << "\t\tprepend:\t"; DEBUG(prevMI->print(*cerr.stream(), &TM)); + + // Replace all occurences of regB with regA. + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + if (mi->getOperand(i).isReg() && + mi->getOperand(i).getReg() == regB) + mi->getOperand(i).setReg(regA); + } + } + + assert(mi->getOperand(ti).isDef() && mi->getOperand(si).isUse()); + mi->getOperand(ti).setReg(mi->getOperand(si).getReg()); + MadeChange = true; + + DOUT << "\t\trewrite to:\t"; DEBUG(mi->print(*cerr.stream(), &TM)); + } + + mi = nmi; + } + } + + // Some remat'ed instructions are dead. + int VReg = ReMatRegs.find_first(); + while (VReg != -1) { + if (MRI->use_empty(VReg)) { + MachineInstr *DefMI = MRI->getVRegDef(VReg); + DefMI->eraseFromParent(); + } + VReg = ReMatRegs.find_next(VReg); + } + + return MadeChange; +} diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp new file mode 100644 index 0000000..c3b213c --- /dev/null +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -0,0 +1,199 @@ +//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is an extremely simple version of the SimplifyCFG pass. Its sole +// job is to delete LLVM basic blocks that are not reachable from the entry +// node. To do this, it performs a simple depth first traversal of the CFG, +// then deletes any unvisited nodes. +// +// Note that this pass is really a hack. In particular, the instruction +// selectors for various targets should just not generate code for unreachable +// blocks. Until LLVM has a more systematic way of defining instruction +// selectors, however, we cannot really expect them to handle additional +// complexity. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Constant.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN UnreachableBlockElim : public FunctionPass { + virtual bool runOnFunction(Function &F); + public: + static char ID; // Pass identification, replacement for typeid + UnreachableBlockElim() : FunctionPass(&ID) {} + }; +} +char UnreachableBlockElim::ID = 0; +static RegisterPass<UnreachableBlockElim> +X("unreachableblockelim", "Remove unreachable blocks from the CFG"); + +FunctionPass *llvm::createUnreachableBlockEliminationPass() { + return new UnreachableBlockElim(); +} + +bool UnreachableBlockElim::runOnFunction(Function &F) { + SmallPtrSet<BasicBlock*, 8> Reachable; + + // Mark all reachable blocks. + for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I = + df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I) + /* Mark all reachable blocks */; + + // Loop over all dead blocks, remembering them and deleting all instructions + // in them. + std::vector<BasicBlock*> DeadBlocks; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + if (!Reachable.count(I)) { + BasicBlock *BB = I; + DeadBlocks.push_back(BB); + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + PN->replaceAllUsesWith(Constant::getNullValue(PN->getType())); + BB->getInstList().pop_front(); + } + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) + (*SI)->removePredecessor(BB); + BB->dropAllReferences(); + } + + // Actually remove the blocks now. + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) + DeadBlocks[i]->eraseFromParent(); + + return DeadBlocks.size(); +} + + +namespace { + class VISIBILITY_HIDDEN UnreachableMachineBlockElim : + public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &F); + MachineModuleInfo *MMI; + public: + static char ID; // Pass identification, replacement for typeid + UnreachableMachineBlockElim() : MachineFunctionPass(&ID) {} + }; +} +char UnreachableMachineBlockElim::ID = 0; + +static RegisterPass<UnreachableMachineBlockElim> +Y("unreachable-mbb-elimination", + "Remove unreachable machine basic blocks"); + +const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y; + +bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { + SmallPtrSet<MachineBasicBlock*, 8> Reachable; + + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + + // Mark all reachable blocks. + for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> > + I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); + I != E; ++I) + /* Mark all reachable blocks */; + + // Loop over all dead blocks, remembering them and deleting all instructions + // in them. + std::vector<MachineBasicBlock*> DeadBlocks; + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + + // Test for deadness. + if (!Reachable.count(BB)) { + DeadBlocks.push_back(BB); + + while (BB->succ_begin() != BB->succ_end()) { + MachineBasicBlock* succ = *BB->succ_begin(); + + MachineBasicBlock::iterator start = succ->begin(); + while (start != succ->end() && + start->getOpcode() == TargetInstrInfo::PHI) { + for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2) + if (start->getOperand(i).isMBB() && + start->getOperand(i).getMBB() == BB) { + start->RemoveOperand(i); + start->RemoveOperand(i-1); + } + + start++; + } + + BB->removeSuccessor(BB->succ_begin()); + } + } + } + + // Actually remove the blocks now. + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { + MachineBasicBlock *MBB = DeadBlocks[i]; + // If there are any labels in the basic block, unregister them from + // MachineModuleInfo. + if (MMI && !MBB->empty()) { + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->end(); I != E; ++I) { + if (I->isLabel()) + // The label ID # is always operand #0, an immediate. + MMI->InvalidateLabel(I->getOperand(0).getImm()); + } + } + MBB->eraseFromParent(); + } + + // Cleanup PHI nodes. + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + // Prune unneeded PHI entries. + SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(), + BB->pred_end()); + MachineBasicBlock::iterator phi = BB->begin(); + while (phi != BB->end() && + phi->getOpcode() == TargetInstrInfo::PHI) { + for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2) + if (!preds.count(phi->getOperand(i).getMBB())) { + phi->RemoveOperand(i); + phi->RemoveOperand(i-1); + } + + if (phi->getNumOperands() == 3) { + unsigned Input = phi->getOperand(1).getReg(); + unsigned Output = phi->getOperand(0).getReg(); + + MachineInstr* temp = phi; + ++phi; + temp->eraseFromParent(); + + if (Input != Output) + F.getRegInfo().replaceRegWith(Output, Input); + + continue; + } + + ++phi; + } + } + + F.RenumberBlocks(); + + return DeadBlocks.size(); +} diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp new file mode 100644 index 0000000..29637b9 --- /dev/null +++ b/lib/CodeGen/VirtRegMap.cpp @@ -0,0 +1,269 @@ +//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the VirtRegMap class. +// +// It also contains implementations of the the Spiller interface, which, given a +// virtual register map and a machine function, eliminates all virtual +// references by replacing them with physical register references - adding spill +// code as necessary. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "virtregmap" +#include "VirtRegMap.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumSpills , "Number of register spills"); + +//===----------------------------------------------------------------------===// +// VirtRegMap implementation +//===----------------------------------------------------------------------===// + +char VirtRegMap::ID = 0; + +static RegisterPass<VirtRegMap> +X("virtregmap", "Virtual Register Map"); + +bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { + TII = mf.getTarget().getInstrInfo(); + TRI = mf.getTarget().getRegisterInfo(); + MF = &mf; + + ReMatId = MAX_STACK_SLOT+1; + LowSpillSlot = HighSpillSlot = NO_STACK_SLOT; + + Virt2PhysMap.clear(); + Virt2StackSlotMap.clear(); + Virt2ReMatIdMap.clear(); + Virt2SplitMap.clear(); + Virt2SplitKillMap.clear(); + ReMatMap.clear(); + ImplicitDefed.clear(); + SpillSlotToUsesMap.clear(); + MI2VirtMap.clear(); + SpillPt2VirtMap.clear(); + RestorePt2VirtMap.clear(); + EmergencySpillMap.clear(); + EmergencySpillSlots.clear(); + + SpillSlotToUsesMap.resize(8); + ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1- + TargetRegisterInfo::FirstVirtualRegister); + + allocatableRCRegs.clear(); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + allocatableRCRegs.insert(std::make_pair(*I, + TRI->getAllocatableSet(mf, *I))); + + grow(); + + return false; +} + +void VirtRegMap::grow() { + unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); + Virt2PhysMap.grow(LastVirtReg); + Virt2StackSlotMap.grow(LastVirtReg); + Virt2ReMatIdMap.grow(LastVirtReg); + Virt2SplitMap.grow(LastVirtReg); + Virt2SplitKillMap.grow(LastVirtReg); + ReMatMap.grow(LastVirtReg); + ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1); +} + +int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); + int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + if (LowSpillSlot == NO_STACK_SLOT) + LowSpillSlot = SS; + if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) + HighSpillSlot = SS; + unsigned Idx = SS-LowSpillSlot; + while (Idx >= SpillSlotToUsesMap.size()) + SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2); + Virt2StackSlotMap[virtReg] = SS; + ++NumSpills; + return SS; +} + +void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + assert((SS >= 0 || + (SS >= MF->getFrameInfo()->getObjectIndexBegin())) && + "illegal fixed frame index"); + Virt2StackSlotMap[virtReg] = SS; +} + +int VirtRegMap::assignVirtReMatId(unsigned virtReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && + "attempt to assign re-mat id to already spilled register"); + Virt2ReMatIdMap[virtReg] = ReMatId; + return ReMatId++; +} + +void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && + "attempt to assign re-mat id to already spilled register"); + Virt2ReMatIdMap[virtReg] = id; +} + +int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) { + std::map<const TargetRegisterClass*, int>::iterator I = + EmergencySpillSlots.find(RC); + if (I != EmergencySpillSlots.end()) + return I->second; + int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + if (LowSpillSlot == NO_STACK_SLOT) + LowSpillSlot = SS; + if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) + HighSpillSlot = SS; + EmergencySpillSlots[RC] = SS; + return SS; +} + +void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) { + if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) { + // If FI < LowSpillSlot, this stack reference was produced by + // instruction selection and is not a spill + if (FI >= LowSpillSlot) { + assert(FI >= 0 && "Spill slot index should not be negative!"); + assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() + && "Invalid spill slot"); + SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI); + } + } +} + +void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI, + MachineInstr *NewMI, ModRef MRInfo) { + // Move previous memory references folded to new instruction. + MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI); + for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI), + E = MI2VirtMap.end(); I != E && I->first == OldMI; ) { + MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second)); + MI2VirtMap.erase(I++); + } + + // add new memory reference + MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo))); +} + +void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) { + MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI); + MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo))); +} + +void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (MF->getFrameInfo()->isFixedObjectIndex(FI)) + continue; + // This stack reference was produced by instruction selection and + // is not a spill + if (FI < LowSpillSlot) + continue; + assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() + && "Invalid spill slot"); + SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI); + } + MI2VirtMap.erase(MI); + SpillPt2VirtMap.erase(MI); + RestorePt2VirtMap.erase(MI); + EmergencySpillMap.erase(MI); +} + +/// FindUnusedRegisters - Gather a list of allocatable registers that +/// have not been allocated to any virtual register. +bool VirtRegMap::FindUnusedRegisters(const TargetRegisterInfo *TRI, + LiveIntervals* LIs) { + unsigned NumRegs = TRI->getNumRegs(); + UnusedRegs.reset(); + UnusedRegs.resize(NumRegs); + + BitVector Used(NumRegs); + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) + if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG) + Used.set(Virt2PhysMap[i]); + + BitVector Allocatable = TRI->getAllocatableSet(*MF); + bool AnyUnused = false; + for (unsigned Reg = 1; Reg < NumRegs; ++Reg) { + if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) { + bool ReallyUnused = true; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + if (Used[*AS] || LIs->hasInterval(*AS)) { + ReallyUnused = false; + break; + } + } + if (ReallyUnused) { + AnyUnused = true; + UnusedRegs.set(Reg); + } + } + } + + return AnyUnused; +} + +void VirtRegMap::print(std::ostream &OS, const Module* M) const { + const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); + + OS << "********** REGISTER MAP **********\n"; + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) { + if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG) + OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i]) + << "]\n"; + } + + for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, + e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) + if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT) + OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n"; + OS << '\n'; +} + +void VirtRegMap::dump() const { + print(cerr); +} diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h new file mode 100644 index 0000000..507557d --- /dev/null +++ b/lib/CodeGen/VirtRegMap.h @@ -0,0 +1,495 @@ +//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a virtual register map. This maps virtual registers to +// physical registers and virtual registers to stack slots. It is created and +// updated by a register allocator and then used by a machine code rewriter that +// adds spill code and rewrites virtual into physical register references. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_VIRTREGMAP_H +#define LLVM_CODEGEN_VIRTREGMAP_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Streams.h" +#include <map> + +namespace llvm { + class LiveIntervals; + class MachineInstr; + class MachineFunction; + class TargetInstrInfo; + class TargetRegisterInfo; + + class VirtRegMap : public MachineFunctionPass { + public: + enum { + NO_PHYS_REG = 0, + NO_STACK_SLOT = (1L << 30)-1, + MAX_STACK_SLOT = (1L << 18)-1 + }; + + enum ModRef { isRef = 1, isMod = 2, isModRef = 3 }; + typedef std::multimap<MachineInstr*, + std::pair<unsigned, ModRef> > MI2VirtMapTy; + + private: + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineFunction *MF; + + DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs; + + /// Virt2PhysMap - This is a virtual to physical register + /// mapping. Each virtual register is required to have an entry in + /// it; even spilled virtual registers (the register mapped to a + /// spilled register is the temporary used to load it from the + /// stack). + IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap; + + /// Virt2StackSlotMap - This is virtual register to stack slot + /// mapping. Each spilled virtual register has an entry in it + /// which corresponds to the stack slot this register is spilled + /// at. + IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap; + + /// Virt2ReMatIdMap - This is virtual register to rematerialization id + /// mapping. Each spilled virtual register that should be remat'd has an + /// entry in it which corresponds to the remat id. + IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap; + + /// Virt2SplitMap - This is virtual register to splitted virtual register + /// mapping. + IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap; + + /// Virt2SplitKillMap - This is splitted virtual register to its last use + /// (kill) index mapping. + IndexedMap<unsigned> Virt2SplitKillMap; + + /// ReMatMap - This is virtual register to re-materialized instruction + /// mapping. Each virtual register whose definition is going to be + /// re-materialized has an entry in it. + IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap; + + /// MI2VirtMap - This is MachineInstr to virtual register + /// mapping. In the case of memory spill code being folded into + /// instructions, we need to know which virtual register was + /// read/written by this instruction. + MI2VirtMapTy MI2VirtMap; + + /// SpillPt2VirtMap - This records the virtual registers which should + /// be spilled right after the MachineInstr due to live interval + /// splitting. + std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > > + SpillPt2VirtMap; + + /// RestorePt2VirtMap - This records the virtual registers which should + /// be restored right before the MachineInstr due to live interval + /// splitting. + std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap; + + /// EmergencySpillMap - This records the physical registers that should + /// be spilled / restored around the MachineInstr since the register + /// allocator has run out of registers. + std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap; + + /// EmergencySpillSlots - This records emergency spill slots used to + /// spill physical registers when the register allocator runs out of + /// registers. Ideally only one stack slot is used per function per + /// register class. + std::map<const TargetRegisterClass*, int> EmergencySpillSlots; + + /// ReMatId - Instead of assigning a stack slot to a to be rematerialized + /// virtual register, an unique id is being assigned. This keeps track of + /// the highest id used so far. Note, this starts at (1<<18) to avoid + /// conflicts with stack slot numbers. + int ReMatId; + + /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes. + int LowSpillSlot, HighSpillSlot; + + /// SpillSlotToUsesMap - Records uses for each register spill slot. + SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap; + + /// ImplicitDefed - One bit for each virtual register. If set it indicates + /// the register is implicitly defined. + BitVector ImplicitDefed; + + /// UnusedRegs - A list of physical registers that have not been used. + BitVector UnusedRegs; + + VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT + void operator=(const VirtRegMap&); // DO NOT IMPLEMENT + + public: + static char ID; + VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG), + Virt2StackSlotMap(NO_STACK_SLOT), + Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0), + Virt2SplitKillMap(0), ReMatMap(NULL), + ReMatId(MAX_STACK_SLOT+1), + LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { } + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + void grow(); + + /// @brief returns true if the specified virtual register is + /// mapped to a physical register + bool hasPhys(unsigned virtReg) const { + return getPhys(virtReg) != NO_PHYS_REG; + } + + /// @brief returns the physical register mapped to the specified + /// virtual register + unsigned getPhys(unsigned virtReg) const { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + return Virt2PhysMap[virtReg]; + } + + /// @brief creates a mapping for the specified virtual register to + /// the specified physical register + void assignVirt2Phys(unsigned virtReg, unsigned physReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg) && + TargetRegisterInfo::isPhysicalRegister(physReg)); + assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && + "attempt to assign physical register to already mapped " + "virtual register"); + Virt2PhysMap[virtReg] = physReg; + } + + /// @brief clears the specified virtual register's, physical + /// register mapping + void clearVirt(unsigned virtReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2PhysMap[virtReg] != NO_PHYS_REG && + "attempt to clear a not assigned virtual register"); + Virt2PhysMap[virtReg] = NO_PHYS_REG; + } + + /// @brief clears all virtual to physical register mappings + void clearAllVirt() { + Virt2PhysMap.clear(); + grow(); + } + + /// @brief records virtReg is a split live interval from SReg. + void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { + Virt2SplitMap[virtReg] = SReg; + } + + /// @brief returns the live interval virtReg is split from. + unsigned getPreSplitReg(unsigned virtReg) { + return Virt2SplitMap[virtReg]; + } + + /// @brief returns true if the specified virtual register is not + /// mapped to a stack slot or rematerialized. + bool isAssignedReg(unsigned virtReg) const { + if (getStackSlot(virtReg) == NO_STACK_SLOT && + getReMatId(virtReg) == NO_STACK_SLOT) + return true; + // Split register can be assigned a physical register as well as a + // stack slot or remat id. + return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG); + } + + /// @brief returns the stack slot mapped to the specified virtual + /// register + int getStackSlot(unsigned virtReg) const { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + return Virt2StackSlotMap[virtReg]; + } + + /// @brief returns the rematerialization id mapped to the specified virtual + /// register + int getReMatId(unsigned virtReg) const { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + return Virt2ReMatIdMap[virtReg]; + } + + /// @brief create a mapping for the specifed virtual register to + /// the next available stack slot + int assignVirt2StackSlot(unsigned virtReg); + /// @brief create a mapping for the specified virtual register to + /// the specified stack slot + void assignVirt2StackSlot(unsigned virtReg, int frameIndex); + + /// @brief assign an unique re-materialization id to the specified + /// virtual register. + int assignVirtReMatId(unsigned virtReg); + /// @brief assign an unique re-materialization id to the specified + /// virtual register. + void assignVirtReMatId(unsigned virtReg, int id); + + /// @brief returns true if the specified virtual register is being + /// re-materialized. + bool isReMaterialized(unsigned virtReg) const { + return ReMatMap[virtReg] != NULL; + } + + /// @brief returns the original machine instruction being re-issued + /// to re-materialize the specified virtual register. + MachineInstr *getReMaterializedMI(unsigned virtReg) const { + return ReMatMap[virtReg]; + } + + /// @brief records the specified virtual register will be + /// re-materialized and the original instruction which will be re-issed + /// for this purpose. If parameter all is true, then all uses of the + /// registers are rematerialized and it's safe to delete the definition. + void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) { + ReMatMap[virtReg] = def; + } + + /// @brief record the last use (kill) of a split virtual register. + void addKillPoint(unsigned virtReg, unsigned index) { + Virt2SplitKillMap[virtReg] = index; + } + + unsigned getKillPoint(unsigned virtReg) const { + return Virt2SplitKillMap[virtReg]; + } + + /// @brief remove the last use (kill) of a split virtual register. + void removeKillPoint(unsigned virtReg) { + Virt2SplitKillMap[virtReg] = 0; + } + + /// @brief returns true if the specified MachineInstr is a spill point. + bool isSpillPt(MachineInstr *Pt) const { + return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end(); + } + + /// @brief returns the virtual registers that should be spilled due to + /// splitting right after the specified MachineInstr. + std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) { + return SpillPt2VirtMap[Pt]; + } + + /// @brief records the specified MachineInstr as a spill point for virtReg. + void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) { + std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator + I = SpillPt2VirtMap.find(Pt); + if (I != SpillPt2VirtMap.end()) + I->second.push_back(std::make_pair(virtReg, isKill)); + else { + std::vector<std::pair<unsigned,bool> > Virts; + Virts.push_back(std::make_pair(virtReg, isKill)); + SpillPt2VirtMap.insert(std::make_pair(Pt, Virts)); + } + } + + /// @brief - transfer spill point information from one instruction to + /// another. + void transferSpillPts(MachineInstr *Old, MachineInstr *New) { + std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator + I = SpillPt2VirtMap.find(Old); + if (I == SpillPt2VirtMap.end()) + return; + while (!I->second.empty()) { + unsigned virtReg = I->second.back().first; + bool isKill = I->second.back().second; + I->second.pop_back(); + addSpillPoint(virtReg, isKill, New); + } + SpillPt2VirtMap.erase(I); + } + + /// @brief returns true if the specified MachineInstr is a restore point. + bool isRestorePt(MachineInstr *Pt) const { + return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end(); + } + + /// @brief returns the virtual registers that should be restoreed due to + /// splitting right after the specified MachineInstr. + std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) { + return RestorePt2VirtMap[Pt]; + } + + /// @brief records the specified MachineInstr as a restore point for virtReg. + void addRestorePoint(unsigned virtReg, MachineInstr *Pt) { + std::map<MachineInstr*, std::vector<unsigned> >::iterator I = + RestorePt2VirtMap.find(Pt); + if (I != RestorePt2VirtMap.end()) + I->second.push_back(virtReg); + else { + std::vector<unsigned> Virts; + Virts.push_back(virtReg); + RestorePt2VirtMap.insert(std::make_pair(Pt, Virts)); + } + } + + /// @brief - transfer restore point information from one instruction to + /// another. + void transferRestorePts(MachineInstr *Old, MachineInstr *New) { + std::map<MachineInstr*, std::vector<unsigned> >::iterator I = + RestorePt2VirtMap.find(Old); + if (I == RestorePt2VirtMap.end()) + return; + while (!I->second.empty()) { + unsigned virtReg = I->second.back(); + I->second.pop_back(); + addRestorePoint(virtReg, New); + } + RestorePt2VirtMap.erase(I); + } + + /// @brief records that the specified physical register must be spilled + /// around the specified machine instr. + void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) { + if (EmergencySpillMap.find(MI) != EmergencySpillMap.end()) + EmergencySpillMap[MI].push_back(PhysReg); + else { + std::vector<unsigned> PhysRegs; + PhysRegs.push_back(PhysReg); + EmergencySpillMap.insert(std::make_pair(MI, PhysRegs)); + } + } + + /// @brief returns true if one or more physical registers must be spilled + /// around the specified instruction. + bool hasEmergencySpills(MachineInstr *MI) const { + return EmergencySpillMap.find(MI) != EmergencySpillMap.end(); + } + + /// @brief returns the physical registers to be spilled and restored around + /// the instruction. + std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) { + return EmergencySpillMap[MI]; + } + + /// @brief - transfer emergency spill information from one instruction to + /// another. + void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) { + std::map<MachineInstr*,std::vector<unsigned> >::iterator I = + EmergencySpillMap.find(Old); + if (I == EmergencySpillMap.end()) + return; + while (!I->second.empty()) { + unsigned virtReg = I->second.back(); + I->second.pop_back(); + addEmergencySpill(virtReg, New); + } + EmergencySpillMap.erase(I); + } + + /// @brief return or get a emergency spill slot for the register class. + int getEmergencySpillSlot(const TargetRegisterClass *RC); + + /// @brief Return lowest spill slot index. + int getLowSpillSlot() const { + return LowSpillSlot; + } + + /// @brief Return highest spill slot index. + int getHighSpillSlot() const { + return HighSpillSlot; + } + + /// @brief Records a spill slot use. + void addSpillSlotUse(int FrameIndex, MachineInstr *MI); + + /// @brief Returns true if spill slot has been used. + bool isSpillSlotUsed(int FrameIndex) const { + assert(FrameIndex >= 0 && "Spill slot index should not be negative!"); + return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty(); + } + + /// @brief Mark the specified register as being implicitly defined. + void setIsImplicitlyDefined(unsigned VirtReg) { + ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister); + } + + /// @brief Returns true if the virtual register is implicitly defined. + bool isImplicitlyDefined(unsigned VirtReg) const { + return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister]; + } + + /// @brief Updates information about the specified virtual register's value + /// folded into newMI machine instruction. + void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI, + ModRef MRInfo); + + /// @brief Updates information about the specified virtual register's value + /// folded into the specified machine instruction. + void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo); + + /// @brief returns the virtual registers' values folded in memory + /// operands of this instruction + std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator> + getFoldedVirts(MachineInstr* MI) const { + return MI2VirtMap.equal_range(MI); + } + + /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the + /// the folded instruction map and spill point map. + void RemoveMachineInstrFromMaps(MachineInstr *MI); + + /// FindUnusedRegisters - Gather a list of allocatable registers that + /// have not been allocated to any virtual register. + bool FindUnusedRegisters(const TargetRegisterInfo *TRI, + LiveIntervals* LIs); + + /// HasUnusedRegisters - Return true if there are any allocatable registers + /// that have not been allocated to any virtual register. + bool HasUnusedRegisters() const { + return !UnusedRegs.none(); + } + + /// setRegisterUsed - Remember the physical register is now used. + void setRegisterUsed(unsigned Reg) { + UnusedRegs.reset(Reg); + } + + /// isRegisterUnused - Return true if the physical register has not been + /// used. + bool isRegisterUnused(unsigned Reg) const { + return UnusedRegs[Reg]; + } + + /// getFirstUnusedRegister - Return the first physical register that has not + /// been used. + unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) { + int Reg = UnusedRegs.find_first(); + while (Reg != -1) { + if (allocatableRCRegs[RC][Reg]) + return (unsigned)Reg; + Reg = UnusedRegs.find_next(Reg); + } + return 0; + } + + void print(std::ostream &OS, const Module* M = 0) const; + void print(std::ostream *OS) const { if (OS) print(*OS); } + void dump() const; + }; + + inline std::ostream *operator<<(std::ostream *OS, const VirtRegMap &VRM) { + VRM.print(OS); + return OS; + } + inline std::ostream &operator<<(std::ostream &OS, const VirtRegMap &VRM) { + VRM.print(OS); + return OS; + } +} // End llvm namespace + +#endif diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp new file mode 100644 index 0000000..b4c8bc1 --- /dev/null +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -0,0 +1,2225 @@ +//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "virtregrewriter" +#include "VirtRegRewriter.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumDSE , "Number of dead stores elided"); +STATISTIC(NumDSS , "Number of dead spill slots removed"); +STATISTIC(NumCommutes, "Number of instructions commuted"); +STATISTIC(NumDRM , "Number of re-materializable defs elided"); +STATISTIC(NumStores , "Number of stores added"); +STATISTIC(NumPSpills , "Number of physical register spills"); +STATISTIC(NumOmitted , "Number of reloads omited"); +STATISTIC(NumAvoided , "Number of reloads deemed unnecessary"); +STATISTIC(NumCopified, "Number of available reloads turned into copies"); +STATISTIC(NumReMats , "Number of re-materialization"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumReused , "Number of values reused"); +STATISTIC(NumDCE , "Number of copies elided"); +STATISTIC(NumSUnfold , "Number of stores unfolded"); +STATISTIC(NumModRefUnfold, "Number of modref unfolded"); + +namespace { + enum RewriterName { simple, local, trivial }; +} + +static cl::opt<RewriterName> +RewriterOpt("rewriter", + cl::desc("Rewriter to use: (default: local)"), + cl::Prefix, + cl::values(clEnumVal(simple, "simple rewriter"), + clEnumVal(local, "local rewriter"), + clEnumVal(trivial, "trivial rewriter"), + clEnumValEnd), + cl::init(local)); + +VirtRegRewriter::~VirtRegRewriter() {} + + +// ****************************** // +// Simple Spiller Implementation // +// ****************************** // + +struct VISIBILITY_HIDDEN SimpleRewriter : public VirtRegRewriter { + + bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, + LiveIntervals* LIs) { + DOUT << "********** REWRITE MACHINE CODE **********\n"; + DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + const TargetMachine &TM = MF.getTarget(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + + + // LoadedRegs - Keep track of which vregs are loaded, so that we only load + // each vreg once (in the case where a spilled vreg is used by multiple + // operands). This is always smaller than the number of operands to the + // current machine instr, so it should be small. + std::vector<unsigned> LoadedRegs; + + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + DOUT << MBBI->getBasicBlock()->getName() << ":\n"; + MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); + MII != E; ++MII) { + MachineInstr &MI = *MII; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (MO.isReg() && MO.getReg()) { + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned VirtReg = MO.getReg(); + unsigned SubIdx = MO.getSubReg(); + unsigned PhysReg = VRM.getPhys(VirtReg); + unsigned RReg = SubIdx ? TRI.getSubReg(PhysReg, SubIdx) : PhysReg; + if (!VRM.isAssignedReg(VirtReg)) { + int StackSlot = VRM.getStackSlot(VirtReg); + const TargetRegisterClass* RC = + MF.getRegInfo().getRegClass(VirtReg); + + if (MO.isUse() && + std::find(LoadedRegs.begin(), LoadedRegs.end(), VirtReg) + == LoadedRegs.end()) { + TII.loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC); + MachineInstr *LoadMI = prior(MII); + VRM.addSpillSlotUse(StackSlot, LoadMI); + LoadedRegs.push_back(VirtReg); + ++NumLoads; + DOUT << '\t' << *LoadMI; + } + + if (MO.isDef()) { + TII.storeRegToStackSlot(MBB, next(MII), PhysReg, true, + StackSlot, RC); + MachineInstr *StoreMI = next(MII); + VRM.addSpillSlotUse(StackSlot, StoreMI); + ++NumStores; + } + } + MF.getRegInfo().setPhysRegUsed(RReg); + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + } else { + MF.getRegInfo().setPhysRegUsed(MO.getReg()); + } + } + } + + DOUT << '\t' << MI; + LoadedRegs.clear(); + } + } + return true; + } + +}; + +/// This class is intended for use with the new spilling framework only. It +/// rewrites vreg def/uses to use the assigned preg, but does not insert any +/// spill code. +struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter { + + bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, + LiveIntervals* LIs) { + DOUT << "********** REWRITE MACHINE CODE **********\n"; + DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + MachineRegisterInfo *mri = &MF.getRegInfo(); + + bool changed = false; + + for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end(); + liItr != liEnd; ++liItr) { + + if (TargetRegisterInfo::isVirtualRegister(liItr->first)) { + if (VRM.hasPhys(liItr->first)) { + unsigned preg = VRM.getPhys(liItr->first); + mri->replaceRegWith(liItr->first, preg); + mri->setPhysRegUsed(preg); + changed = true; + } + } + else { + if (!liItr->second->empty()) { + mri->setPhysRegUsed(liItr->first); + } + } + } + + return changed; + } + +}; + +// ************************************************************************ // + +/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB +/// from top down, keep track of which spill slots or remat are available in +/// each register. +/// +/// Note that not all physregs are created equal here. In particular, some +/// physregs are reloads that we are allowed to clobber or ignore at any time. +/// Other physregs are values that the register allocated program is using +/// that we cannot CHANGE, but we can read if we like. We keep track of this +/// on a per-stack-slot / remat id basis as the low bit in the value of the +/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks +/// this bit and addAvailable sets it if. +class VISIBILITY_HIDDEN AvailableSpills { + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled + // or remat'ed virtual register values that are still available, due to + // being loaded or stored to, but not invalidated yet. + std::map<int, unsigned> SpillSlotsOrReMatsAvailable; + + // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable, + // indicating which stack slot values are currently held by a physreg. This + // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a + // physreg is modified. + std::multimap<unsigned, int> PhysRegsAvailable; + + void disallowClobberPhysRegOnly(unsigned PhysReg); + + void ClobberPhysRegOnly(unsigned PhysReg); +public: + AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii) + : TRI(tri), TII(tii) { + } + + /// clear - Reset the state. + void clear() { + SpillSlotsOrReMatsAvailable.clear(); + PhysRegsAvailable.clear(); + } + + const TargetRegisterInfo *getRegInfo() const { return TRI; } + + /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is + /// available in a physical register, return that PhysReg, otherwise + /// return 0. + unsigned getSpillSlotOrReMatPhysReg(int Slot) const { + std::map<int, unsigned>::const_iterator I = + SpillSlotsOrReMatsAvailable.find(Slot); + if (I != SpillSlotsOrReMatsAvailable.end()) { + return I->second >> 1; // Remove the CanClobber bit. + } + return 0; + } + + /// addAvailable - Mark that the specified stack slot / remat is available + /// in the specified physreg. If CanClobber is true, the physreg can be + /// modified at any time without changing the semantics of the program. + void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) { + // If this stack slot is thought to be available in some other physreg, + // remove its record. + ModifyStackSlotOrReMat(SlotOrReMat); + + PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat)); + SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) | + (unsigned)CanClobber; + + if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) + DOUT << "Remembering RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Remembering SS#" << SlotOrReMat; + DOUT << " in physreg " << TRI->getName(Reg) << "\n"; + } + + /// canClobberPhysRegForSS - Return true if the spiller is allowed to change + /// the value of the specified stackslot register if it desires. The + /// specified stack slot must be available in a physreg for this query to + /// make sense. + bool canClobberPhysRegForSS(int SlotOrReMat) const { + assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) && + "Value not available!"); + return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1; + } + + /// canClobberPhysReg - Return true if the spiller is allowed to clobber the + /// physical register where values for some stack slot(s) might be + /// available. + bool canClobberPhysReg(unsigned PhysReg) const { + std::multimap<unsigned, int>::const_iterator I = + PhysRegsAvailable.lower_bound(PhysReg); + while (I != PhysRegsAvailable.end() && I->first == PhysReg) { + int SlotOrReMat = I->second; + I++; + if (!canClobberPhysRegForSS(SlotOrReMat)) + return false; + } + return true; + } + + /// disallowClobberPhysReg - Unset the CanClobber bit of the specified + /// stackslot register. The register is still available but is no longer + /// allowed to be modifed. + void disallowClobberPhysReg(unsigned PhysReg); + + /// ClobberPhysReg - This is called when the specified physreg changes + /// value. We use this to invalidate any info about stuff that lives in + /// it and any of its aliases. + void ClobberPhysReg(unsigned PhysReg); + + /// ModifyStackSlotOrReMat - This method is called when the value in a stack + /// slot changes. This removes information about which register the + /// previous value for this slot lives in (as the previous value is dead + /// now). + void ModifyStackSlotOrReMat(int SlotOrReMat); + + /// AddAvailableRegsToLiveIn - Availability information is being kept coming + /// into the specified MBB. Add available physical registers as potential + /// live-in's. If they are reused in the MBB, they will be added to the + /// live-in set to make register scavenger and post-allocation scheduler. + void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps); +}; + +// ************************************************************************ // + +// ReusedOp - For each reused operand, we keep track of a bit of information, +// in case we need to rollback upon processing a new operand. See comments +// below. +struct ReusedOp { + // The MachineInstr operand that reused an available value. + unsigned Operand; + + // StackSlotOrReMat - The spill slot or remat id of the value being reused. + unsigned StackSlotOrReMat; + + // PhysRegReused - The physical register the value was available in. + unsigned PhysRegReused; + + // AssignedPhysReg - The physreg that was assigned for use by the reload. + unsigned AssignedPhysReg; + + // VirtReg - The virtual register itself. + unsigned VirtReg; + + ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr, + unsigned vreg) + : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr), + AssignedPhysReg(apr), VirtReg(vreg) {} +}; + +/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that +/// is reused instead of reloaded. +class VISIBILITY_HIDDEN ReuseInfo { + MachineInstr &MI; + std::vector<ReusedOp> Reuses; + BitVector PhysRegsClobbered; +public: + ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) { + PhysRegsClobbered.resize(tri->getNumRegs()); + } + + bool hasReuses() const { + return !Reuses.empty(); + } + + /// addReuse - If we choose to reuse a virtual register that is already + /// available instead of reloading it, remember that we did so. + void addReuse(unsigned OpNo, unsigned StackSlotOrReMat, + unsigned PhysRegReused, unsigned AssignedPhysReg, + unsigned VirtReg) { + // If the reload is to the assigned register anyway, no undo will be + // required. + if (PhysRegReused == AssignedPhysReg) return; + + // Otherwise, remember this. + Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused, + AssignedPhysReg, VirtReg)); + } + + void markClobbered(unsigned PhysReg) { + PhysRegsClobbered.set(PhysReg); + } + + bool isClobbered(unsigned PhysReg) const { + return PhysRegsClobbered.test(PhysReg); + } + + /// GetRegForReload - We are about to emit a reload into PhysReg. If there + /// is some other operand that is using the specified register, either pick + /// a new register to use, or evict the previous reload and use this reg. + unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI, + AvailableSpills &Spills, + std::vector<MachineInstr*> &MaybeDeadStores, + SmallSet<unsigned, 8> &Rejected, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM); + + /// GetRegForReload - Helper for the above GetRegForReload(). Add a + /// 'Rejected' set to remember which registers have been considered and + /// rejected for the reload. This avoids infinite looping in case like + /// this: + /// t1 := op t2, t3 + /// t2 <- assigned r0 for use by the reload but ended up reuse r1 + /// t3 <- assigned r1 for use by the reload but ended up reuse r0 + /// t1 <- desires r1 + /// sees r1 is taken by t2, tries t2's reload register r0 + /// sees r0 is taken by t3, tries t3's reload register r1 + /// sees r1 is taken by t2, tries t2's reload register r0 ... + unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI, + AvailableSpills &Spills, + std::vector<MachineInstr*> &MaybeDeadStores, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM) { + SmallSet<unsigned, 8> Rejected; + return GetRegForReload(PhysReg, MI, Spills, MaybeDeadStores, Rejected, + RegKills, KillOps, VRM); + } +}; + + +// ****************** // +// Utility Functions // +// ****************** // + +/// findSinglePredSuccessor - Return via reference a vector of machine basic +/// blocks each of which is a successor of the specified BB and has no other +/// predecessor. +static void findSinglePredSuccessor(MachineBasicBlock *MBB, + SmallVectorImpl<MachineBasicBlock *> &Succs) { + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB->pred_size() == 1) + Succs.push_back(SuccMBB); + } +} + +/// InvalidateKill - Invalidate register kill information for a specific +/// register. This also unsets the kills marker on the last kill operand. +static void InvalidateKill(unsigned Reg, + const TargetRegisterInfo* TRI, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + if (RegKills[Reg]) { + KillOps[Reg]->setIsKill(false); + KillOps[Reg] = NULL; + RegKills.reset(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + if (RegKills[*SR]) { + KillOps[*SR]->setIsKill(false); + KillOps[*SR] = NULL; + RegKills.reset(*SR); + } + } + } +} + +/// InvalidateKills - MI is going to be deleted. If any of its operands are +/// marked kill, then invalidate the information. +static void InvalidateKills(MachineInstr &MI, + const TargetRegisterInfo* TRI, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + SmallVector<unsigned, 2> *KillRegs = NULL) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (KillRegs) + KillRegs->push_back(Reg); + assert(Reg < KillOps.size()); + if (KillOps[Reg] == &MO) { + KillOps[Reg] = NULL; + RegKills.reset(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + if (RegKills[*SR]) { + KillOps[*SR] = NULL; + RegKills.reset(*SR); + } + } + } + } +} + +/// InvalidateRegDef - If the def operand of the specified def MI is now dead +/// (since it's spill instruction is removed), mark it isDead. Also checks if +/// the def MI has other definition operands that are not dead. Returns it by +/// reference. +static bool InvalidateRegDef(MachineBasicBlock::iterator I, + MachineInstr &NewDef, unsigned Reg, + bool &HasLiveDef) { + // Due to remat, it's possible this reg isn't being reused. That is, + // the def of this reg (by prev MI) is now dead. + MachineInstr *DefMI = I; + MachineOperand *DefOp = NULL; + for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = DefMI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + if (MO.getReg() == Reg) + DefOp = &MO; + else if (!MO.isDead()) + HasLiveDef = true; + } + } + if (!DefOp) + return false; + + bool FoundUse = false, Done = false; + MachineBasicBlock::iterator E = &NewDef; + ++I; ++E; + for (; !Done && I != E; ++I) { + MachineInstr *NMI = I; + for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = NMI->getOperand(j); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (MO.isUse()) + FoundUse = true; + Done = true; // Stop after scanning all the operands of this MI. + } + } + if (!FoundUse) { + // Def is dead! + DefOp->setIsDead(); + return true; + } + return false; +} + +/// UpdateKills - Track and update kill info. If a MI reads a register that is +/// marked kill, then it must be due to register reuse. Transfer the kill info +/// over. +static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) { + // That can't be right. Register is killed but not re-defined and it's + // being reused. Let's fix that. + KillOps[Reg]->setIsKill(false); + KillOps[Reg] = NULL; + RegKills.reset(Reg); + if (!MI.isRegTiedToDefOperand(i)) + // Unless it's a two-address operand, this is the new kill. + MO.setIsKill(); + } + if (MO.isKill()) { + RegKills.set(Reg); + KillOps[Reg] = &MO; + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + RegKills.set(*SR); + KillOps[*SR] = &MO; + } + } + } + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + RegKills.reset(Reg); + KillOps[Reg] = NULL; + // It also defines (or partially define) aliases. + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + RegKills.reset(*SR); + KillOps[*SR] = NULL; + } + } +} + +/// ReMaterialize - Re-materialize definition for Reg targetting DestReg. +/// +static void ReMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + unsigned DestReg, unsigned Reg, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM) { + TII->reMaterialize(MBB, MII, DestReg, VRM.getReMaterializedMI(Reg)); + MachineInstr *NewMI = prior(MII); + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned VirtReg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) + continue; + assert(MO.isUse()); + unsigned SubIdx = MO.getSubReg(); + unsigned Phys = VRM.getPhys(VirtReg); + assert(Phys); + unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys; + MO.setReg(RReg); + MO.setSubReg(0); + } + ++NumReMats; +} + +/// findSuperReg - Find the SubReg's super-register of given register class +/// where its SubIdx sub-register is SubReg. +static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg, + unsigned SubIdx, const TargetRegisterInfo *TRI) { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + unsigned Reg = *I; + if (TRI->getSubReg(Reg, SubIdx) == SubReg) + return Reg; + } + return 0; +} + +// ******************************** // +// Available Spills Implementation // +// ******************************** // + +/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified +/// stackslot register. The register is still available but is no longer +/// allowed to be modifed. +void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { + std::multimap<unsigned, int>::iterator I = + PhysRegsAvailable.lower_bound(PhysReg); + while (I != PhysRegsAvailable.end() && I->first == PhysReg) { + int SlotOrReMat = I->second; + I++; + assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && + "Bidirectional map mismatch!"); + SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1; + DOUT << "PhysReg " << TRI->getName(PhysReg) + << " copied, it is available for use but can no longer be modified\n"; + } +} + +/// disallowClobberPhysReg - Unset the CanClobber bit of the specified +/// stackslot register and its aliases. The register and its aliases may +/// still available but is no longer allowed to be modifed. +void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) { + for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) + disallowClobberPhysRegOnly(*AS); + disallowClobberPhysRegOnly(PhysReg); +} + +/// ClobberPhysRegOnly - This is called when the specified physreg changes +/// value. We use this to invalidate any info about stuff we thing lives in it. +void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { + std::multimap<unsigned, int>::iterator I = + PhysRegsAvailable.lower_bound(PhysReg); + while (I != PhysRegsAvailable.end() && I->first == PhysReg) { + int SlotOrReMat = I->second; + PhysRegsAvailable.erase(I++); + assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && + "Bidirectional map mismatch!"); + SpillSlotsOrReMatsAvailable.erase(SlotOrReMat); + DOUT << "PhysReg " << TRI->getName(PhysReg) + << " clobbered, invalidating "; + if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) + DOUT << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 << "\n"; + else + DOUT << "SS#" << SlotOrReMat << "\n"; + } +} + +/// ClobberPhysReg - This is called when the specified physreg changes +/// value. We use this to invalidate any info about stuff we thing lives in +/// it and any of its aliases. +void AvailableSpills::ClobberPhysReg(unsigned PhysReg) { + for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) + ClobberPhysRegOnly(*AS); + ClobberPhysRegOnly(PhysReg); +} + +/// AddAvailableRegsToLiveIn - Availability information is being kept coming +/// into the specified MBB. Add available physical registers as potential +/// live-in's. If they are reused in the MBB, they will be added to the +/// live-in set to make register scavenger and post-allocation scheduler. +void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + std::set<unsigned> NotAvailable; + for (std::multimap<unsigned, int>::iterator + I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end(); + I != E; ++I) { + unsigned Reg = I->first; + const TargetRegisterClass* RC = TRI->getPhysicalRegisterRegClass(Reg); + // FIXME: A temporary workaround. We can't reuse available value if it's + // not safe to move the def of the virtual register's class. e.g. + // X86::RFP* register classes. Do not add it as a live-in. + if (!TII->isSafeToMoveRegClassDefs(RC)) + // This is no longer available. + NotAvailable.insert(Reg); + else { + MBB.addLiveIn(Reg); + InvalidateKill(Reg, TRI, RegKills, KillOps); + } + + // Skip over the same register. + std::multimap<unsigned, int>::iterator NI = next(I); + while (NI != E && NI->first == Reg) { + ++I; + ++NI; + } + } + + for (std::set<unsigned>::iterator I = NotAvailable.begin(), + E = NotAvailable.end(); I != E; ++I) { + ClobberPhysReg(*I); + for (const unsigned *SubRegs = TRI->getSubRegisters(*I); + *SubRegs; ++SubRegs) + ClobberPhysReg(*SubRegs); + } +} + +/// ModifyStackSlotOrReMat - This method is called when the value in a stack +/// slot changes. This removes information about which register the previous +/// value for this slot lives in (as the previous value is dead now). +void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { + std::map<int, unsigned>::iterator It = + SpillSlotsOrReMatsAvailable.find(SlotOrReMat); + if (It == SpillSlotsOrReMatsAvailable.end()) return; + unsigned Reg = It->second >> 1; + SpillSlotsOrReMatsAvailable.erase(It); + + // This register may hold the value of multiple stack slots, only remove this + // stack slot from the set of values the register contains. + std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg); + for (; ; ++I) { + assert(I != PhysRegsAvailable.end() && I->first == Reg && + "Map inverse broken!"); + if (I->second == SlotOrReMat) break; + } + PhysRegsAvailable.erase(I); +} + +// ************************** // +// Reuse Info Implementation // +// ************************** // + +/// GetRegForReload - We are about to emit a reload into PhysReg. If there +/// is some other operand that is using the specified register, either pick +/// a new register to use, or evict the previous reload and use this reg. +unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI, + AvailableSpills &Spills, + std::vector<MachineInstr*> &MaybeDeadStores, + SmallSet<unsigned, 8> &Rejected, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM) { + const TargetInstrInfo* TII = MI->getParent()->getParent()->getTarget() + .getInstrInfo(); + + if (Reuses.empty()) return PhysReg; // This is most often empty. + + for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) { + ReusedOp &Op = Reuses[ro]; + // If we find some other reuse that was supposed to use this register + // exactly for its reload, we can change this reload to use ITS reload + // register. That is, unless its reload register has already been + // considered and subsequently rejected because it has also been reused + // by another operand. + if (Op.PhysRegReused == PhysReg && + Rejected.count(Op.AssignedPhysReg) == 0) { + // Yup, use the reload register that we didn't use before. + unsigned NewReg = Op.AssignedPhysReg; + Rejected.insert(PhysReg); + return GetRegForReload(NewReg, MI, Spills, MaybeDeadStores, Rejected, + RegKills, KillOps, VRM); + } else { + // Otherwise, we might also have a problem if a previously reused + // value aliases the new register. If so, codegen the previous reload + // and use this one. + unsigned PRRU = Op.PhysRegReused; + const TargetRegisterInfo *TRI = Spills.getRegInfo(); + if (TRI->areAliases(PRRU, PhysReg)) { + // Okay, we found out that an alias of a reused register + // was used. This isn't good because it means we have + // to undo a previous reuse. + MachineBasicBlock *MBB = MI->getParent(); + const TargetRegisterClass *AliasRC = + MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg); + + // Copy Op out of the vector and remove it, we're going to insert an + // explicit load for it. + ReusedOp NewOp = Op; + Reuses.erase(Reuses.begin()+ro); + + // Ok, we're going to try to reload the assigned physreg into the + // slot that we were supposed to in the first place. However, that + // register could hold a reuse. Check to see if it conflicts or + // would prefer us to use a different register. + unsigned NewPhysReg = GetRegForReload(NewOp.AssignedPhysReg, + MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps, VRM); + + MachineBasicBlock::iterator MII = MI; + if (NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT) { + ReMaterialize(*MBB, MII, NewPhysReg, NewOp.VirtReg, TII, TRI,VRM); + } else { + TII->loadRegFromStackSlot(*MBB, MII, NewPhysReg, + NewOp.StackSlotOrReMat, AliasRC); + MachineInstr *LoadMI = prior(MII); + VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI); + // Any stores to this stack slot are not dead anymore. + MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL; + ++NumLoads; + } + Spills.ClobberPhysReg(NewPhysReg); + Spills.ClobberPhysReg(NewOp.PhysRegReused); + + unsigned SubIdx = MI->getOperand(NewOp.Operand).getSubReg(); + unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) : NewPhysReg; + MI->getOperand(NewOp.Operand).setReg(RReg); + MI->getOperand(NewOp.Operand).setSubReg(0); + + Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); + --MII; + UpdateKills(*MII, TRI, RegKills, KillOps); + DOUT << '\t' << *MII; + + DOUT << "Reuse undone!\n"; + --NumReused; + + // Finally, PhysReg is now available, go ahead and use it. + return PhysReg; + } + } + } + return PhysReg; +} + +// ************************************************************************ // + +/// FoldsStackSlotModRef - Return true if the specified MI folds the specified +/// stack slot mod/ref. It also checks if it's possible to unfold the +/// instruction by having it define a specified physical register instead. +static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM) { + if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI)) + return false; + + bool Found = false; + VirtRegMap::MI2VirtMapTy::const_iterator I, End; + for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) { + unsigned VirtReg = I->second.first; + VirtRegMap::ModRef MR = I->second.second; + if (MR & VirtRegMap::isModRef) + if (VRM.getStackSlot(VirtReg) == SS) { + Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0; + break; + } + } + if (!Found) + return false; + + // Does the instruction uses a register that overlaps the scratch register? + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!VRM.hasPhys(Reg)) + continue; + Reg = VRM.getPhys(Reg); + } + if (TRI->regsOverlap(PhysReg, Reg)) + return false; + } + return true; +} + +/// FindFreeRegister - Find a free register of a given register class by looking +/// at (at most) the last two machine instructions. +static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, + MachineBasicBlock &MBB, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + BitVector &AllocatableRegs) { + BitVector Defs(TRI->getNumRegs()); + BitVector Uses(TRI->getNumRegs()); + SmallVector<unsigned, 4> LocalUses; + SmallVector<unsigned, 4> Kills; + + // Take a look at 2 instructions at most. + for (unsigned Count = 0; Count < 2; ++Count) { + if (MII == MBB.begin()) + break; + MachineInstr *PrevMI = prior(MII); + for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = PrevMI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned Reg = MO.getReg(); + if (MO.isDef()) { + Defs.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Defs.set(*AS); + } else { + LocalUses.push_back(Reg); + if (MO.isKill() && AllocatableRegs[Reg]) + Kills.push_back(Reg); + } + } + + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned Kill = Kills[i]; + if (!Defs[Kill] && !Uses[Kill] && + TRI->getPhysicalRegisterRegClass(Kill) == RC) + return Kill; + } + for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { + unsigned Reg = LocalUses[i]; + Uses.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Uses.set(*AS); + } + + MII = PrevMI; + } + + return 0; +} + +static +void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == VirtReg) + MO.setReg(PhysReg); + } +} + +namespace { + struct RefSorter { + bool operator()(const std::pair<MachineInstr*, int> &A, + const std::pair<MachineInstr*, int> &B) { + return A.second < B.second; + } + }; +} + +// ***************************** // +// Local Spiller Implementation // +// ***************************** // + +class VISIBILITY_HIDDEN LocalRewriter : public VirtRegRewriter { + MachineRegisterInfo *RegInfo; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + BitVector AllocatableRegs; + DenseMap<MachineInstr*, unsigned> DistanceMap; +public: + + bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, + LiveIntervals* LIs) { + RegInfo = &MF.getRegInfo(); + TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + AllocatableRegs = TRI->getAllocatableSet(MF); + DOUT << "\n**** Local spiller rewriting function '" + << MF.getFunction()->getName() << "':\n"; + DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)" + " ****\n"; + DEBUG(MF.dump()); + + // Spills - Keep track of which spilled values are available in physregs + // so that we can choose to reuse the physregs instead of emitting + // reloads. This is usually refreshed per basic block. + AvailableSpills Spills(TRI, TII); + + // Keep track of kill information. + BitVector RegKills(TRI->getNumRegs()); + std::vector<MachineOperand*> KillOps; + KillOps.resize(TRI->getNumRegs(), NULL); + + // SingleEntrySuccs - Successor blocks which have a single predecessor. + SmallVector<MachineBasicBlock*, 4> SinglePredSuccs; + SmallPtrSet<MachineBasicBlock*,16> EarlyVisited; + + // Traverse the basic blocks depth first. + MachineBasicBlock *Entry = MF.begin(); + SmallPtrSet<MachineBasicBlock*,16> Visited; + for (df_ext_iterator<MachineBasicBlock*, + SmallPtrSet<MachineBasicBlock*,16> > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + MachineBasicBlock *MBB = *DFI; + if (!EarlyVisited.count(MBB)) + RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps); + + // If this MBB is the only predecessor of a successor. Keep the + // availability information and visit it next. + do { + // Keep visiting single predecessor successor as long as possible. + SinglePredSuccs.clear(); + findSinglePredSuccessor(MBB, SinglePredSuccs); + if (SinglePredSuccs.empty()) + MBB = 0; + else { + // FIXME: More than one successors, each of which has MBB has + // the only predecessor. + MBB = SinglePredSuccs[0]; + if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) { + Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps); + RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps); + } + } + } while (MBB); + + // Clear the availability info. + Spills.clear(); + } + + DOUT << "**** Post Machine Instrs ****\n"; + DEBUG(MF.dump()); + + // Mark unused spill slots. + MachineFrameInfo *MFI = MF.getFrameInfo(); + int SS = VRM.getLowSpillSlot(); + if (SS != VirtRegMap::NO_STACK_SLOT) + for (int e = VRM.getHighSpillSlot(); SS <= e; ++SS) + if (!VRM.isSpillSlotUsed(SS)) { + MFI->RemoveStackObject(SS); + ++NumDSS; + } + + return true; + } + +private: + + /// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if + /// a scratch register is available. + /// xorq %r12<kill>, %r13 + /// addq %rax, -184(%rbp) + /// addq %r13, -184(%rbp) + /// ==> + /// xorq %r12<kill>, %r13 + /// movq -184(%rbp), %r12 + /// addq %rax, %r12 + /// addq %r13, %r12 + /// movq %r12, -184(%rbp) + bool OptimizeByUnfold2(unsigned VirtReg, int SS, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + std::vector<MachineInstr*> &MaybeDeadStores, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM) { + + MachineBasicBlock::iterator NextMII = next(MII); + if (NextMII == MBB.end()) + return false; + + if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0) + return false; + + // Now let's see if the last couple of instructions happens to have freed up + // a register. + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + unsigned PhysReg = FindFreeRegister(MII, MBB, RC, TRI, AllocatableRegs); + if (!PhysReg) + return false; + + MachineFunction &MF = *MBB.getParent(); + TRI = MF.getTarget().getRegisterInfo(); + MachineInstr &MI = *MII; + if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, VRM)) + return false; + + // If the next instruction also folds the same SS modref and can be unfoled, + // then it's worthwhile to issue a load from SS into the free register and + // then unfold these instructions. + if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM)) + return false; + + // Load from SS to the spare physical register. + TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC); + // This invalidates Phys. + Spills.ClobberPhysReg(PhysReg); + // Remember it's available. + Spills.addAvailable(SS, PhysReg); + MaybeDeadStores[SS] = NULL; + + // Unfold current MI. + SmallVector<MachineInstr*, 4> NewMIs; + if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) + assert(0 && "Unable unfold the load / store folding instruction!"); + assert(NewMIs.size() == 1); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); + VRM.transferRestorePts(&MI, NewMIs[0]); + MII = MBB.insert(MII, NewMIs[0]); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + ++NumModRefUnfold; + + // Unfold next instructions that fold the same SS. + do { + MachineInstr &NextMI = *NextMII; + NextMII = next(NextMII); + NewMIs.clear(); + if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) + assert(0 && "Unable unfold the load / store folding instruction!"); + assert(NewMIs.size() == 1); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); + VRM.transferRestorePts(&NextMI, NewMIs[0]); + MBB.insert(NextMII, NewMIs[0]); + InvalidateKills(NextMI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&NextMI); + MBB.erase(&NextMI); + ++NumModRefUnfold; + } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM)); + + // Store the value back into SS. + TII->storeRegToStackSlot(MBB, NextMII, PhysReg, true, SS, RC); + MachineInstr *StoreMI = prior(NextMII); + VRM.addSpillSlotUse(SS, StoreMI); + VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); + + return true; + } + + /// OptimizeByUnfold - Turn a store folding instruction into a load folding + /// instruction. e.g. + /// xorl %edi, %eax + /// movl %eax, -32(%ebp) + /// movl -36(%ebp), %eax + /// orl %eax, -32(%ebp) + /// ==> + /// xorl %edi, %eax + /// orl -36(%ebp), %eax + /// mov %eax, -32(%ebp) + /// This enables unfolding optimization for a subsequent instruction which will + /// also eliminate the newly introduced store instruction. + bool OptimizeByUnfold(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + std::vector<MachineInstr*> &MaybeDeadStores, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM) { + MachineFunction &MF = *MBB.getParent(); + MachineInstr &MI = *MII; + unsigned UnfoldedOpc = 0; + unsigned UnfoldPR = 0; + unsigned UnfoldVR = 0; + int FoldedSS = VirtRegMap::NO_STACK_SLOT; + VirtRegMap::MI2VirtMapTy::const_iterator I, End; + for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) { + // Only transform a MI that folds a single register. + if (UnfoldedOpc) + return false; + UnfoldVR = I->second.first; + VirtRegMap::ModRef MR = I->second.second; + // MI2VirtMap be can updated which invalidate the iterator. + // Increment the iterator first. + ++I; + if (VRM.isAssignedReg(UnfoldVR)) + continue; + // If this reference is not a use, any previous store is now dead. + // Otherwise, the store to this stack slot is not dead anymore. + FoldedSS = VRM.getStackSlot(UnfoldVR); + MachineInstr* DeadStore = MaybeDeadStores[FoldedSS]; + if (DeadStore && (MR & VirtRegMap::isModRef)) { + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS); + if (!PhysReg || !DeadStore->readsRegister(PhysReg)) + continue; + UnfoldPR = PhysReg; + UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), + false, true); + } + } + + if (!UnfoldedOpc) { + if (!UnfoldVR) + return false; + + // Look for other unfolding opportunities. + return OptimizeByUnfold2(UnfoldVR, FoldedSS, MBB, MII, + MaybeDeadStores, Spills, RegKills, KillOps, VRM); + } + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse()) + continue; + unsigned VirtReg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg()) + continue; + if (VRM.isAssignedReg(VirtReg)) { + unsigned PhysReg = VRM.getPhys(VirtReg); + if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR)) + return false; + } else if (VRM.isReMaterialized(VirtReg)) + continue; + int SS = VRM.getStackSlot(VirtReg); + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); + if (PhysReg) { + if (TRI->regsOverlap(PhysReg, UnfoldPR)) + return false; + continue; + } + if (VRM.hasPhys(VirtReg)) { + PhysReg = VRM.getPhys(VirtReg); + if (!TRI->regsOverlap(PhysReg, UnfoldPR)) + continue; + } + + // Ok, we'll need to reload the value into a register which makes + // it impossible to perform the store unfolding optimization later. + // Let's see if it is possible to fold the load if the store is + // unfolded. This allows us to perform the store unfolding + // optimization. + SmallVector<MachineInstr*, 4> NewMIs; + if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) { + assert(NewMIs.size() == 1); + MachineInstr *NewMI = NewMIs.back(); + NewMIs.clear(); + int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false); + assert(Idx != -1); + SmallVector<unsigned, 1> Ops; + Ops.push_back(Idx); + MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS); + if (FoldedMI) { + VRM.addSpillSlotUse(SS, FoldedMI); + if (!VRM.hasPhys(UnfoldVR)) + VRM.assignVirt2Phys(UnfoldVR, UnfoldPR); + VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); + MII = MBB.insert(MII, FoldedMI); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + MF.DeleteMachineInstr(NewMI); + return true; + } + MF.DeleteMachineInstr(NewMI); + } + } + + return false; + } + + /// CommuteToFoldReload - + /// Look for + /// r1 = load fi#1 + /// r1 = op r1, r2<kill> + /// store r1, fi#1 + /// + /// If op is commutable and r2 is killed, then we can xform these to + /// r2 = op r2, fi#1 + /// store r2, fi#1 + bool CommuteToFoldReload(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + unsigned VirtReg, unsigned SrcReg, int SS, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM) { + if (MII == MBB.begin() || !MII->killsRegister(SrcReg)) + return false; + + MachineFunction &MF = *MBB.getParent(); + MachineInstr &MI = *MII; + MachineBasicBlock::iterator DefMII = prior(MII); + MachineInstr *DefMI = DefMII; + const TargetInstrDesc &TID = DefMI->getDesc(); + unsigned NewDstIdx; + if (DefMII != MBB.begin() && + TID.isCommutable() && + TII->CommuteChangesDestination(DefMI, NewDstIdx)) { + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) + return false; + MachineInstr *ReloadMI = prior(DefMII); + int FrameIdx; + unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx); + if (DestReg != SrcReg || FrameIdx != SS) + return false; + int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false); + if (UseIdx == -1) + return false; + unsigned DefIdx; + if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx)) + return false; + assert(DefMI->getOperand(DefIdx).isReg() && + DefMI->getOperand(DefIdx).getReg() == SrcReg); + + // Now commute def instruction. + MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true); + if (!CommutedMI) + return false; + SmallVector<unsigned, 1> Ops; + Ops.push_back(NewDstIdx); + MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS); + // Not needed since foldMemoryOperand returns new MI. + MF.DeleteMachineInstr(CommutedMI); + if (!FoldedMI) + return false; + + VRM.addSpillSlotUse(SS, FoldedMI); + VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); + // Insert new def MI and spill MI. + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + TII->storeRegToStackSlot(MBB, &MI, NewReg, true, SS, RC); + MII = prior(MII); + MachineInstr *StoreMI = MII; + VRM.addSpillSlotUse(SS, StoreMI); + VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); + MII = MBB.insert(MII, FoldedMI); // Update MII to backtrack. + + // Delete all 3 old instructions. + InvalidateKills(*ReloadMI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(ReloadMI); + MBB.erase(ReloadMI); + InvalidateKills(*DefMI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(DefMI); + MBB.erase(DefMI); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + + // If NewReg was previously holding value of some SS, it's now clobbered. + // This has to be done now because it's a physical register. When this + // instruction is re-visited, it's ignored. + Spills.ClobberPhysReg(NewReg); + + ++NumCommutes; + return true; + } + + return false; + } + + /// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if + /// the last store to the same slot is now dead. If so, remove the last store. + void SpillRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + int Idx, unsigned PhysReg, int StackSlot, + const TargetRegisterClass *RC, + bool isAvailable, MachineInstr *&LastStore, + AvailableSpills &Spills, + SmallSet<MachineInstr*, 4> &ReMatDefs, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM) { + + TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC); + MachineInstr *StoreMI = next(MII); + VRM.addSpillSlotUse(StackSlot, StoreMI); + DOUT << "Store:\t" << *StoreMI; + + // If there is a dead store to this stack slot, nuke it now. + if (LastStore) { + DOUT << "Removed dead store:\t" << *LastStore; + ++NumDSE; + SmallVector<unsigned, 2> KillRegs; + InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs); + MachineBasicBlock::iterator PrevMII = LastStore; + bool CheckDef = PrevMII != MBB.begin(); + if (CheckDef) + --PrevMII; + VRM.RemoveMachineInstrFromMaps(LastStore); + MBB.erase(LastStore); + if (CheckDef) { + // Look at defs of killed registers on the store. Mark the defs + // as dead since the store has been deleted and they aren't + // being reused. + for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { + bool HasOtherDef = false; + if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef)) { + MachineInstr *DeadDef = PrevMII; + if (ReMatDefs.count(DeadDef) && !HasOtherDef) { + // FIXME: This assumes a remat def does not have side + // effects. + VRM.RemoveMachineInstrFromMaps(DeadDef); + MBB.erase(DeadDef); + ++NumDRM; + } + } + } + } + } + + LastStore = next(MII); + + // If the stack slot value was previously available in some other + // register, change it now. Otherwise, make the register available, + // in PhysReg. + Spills.ModifyStackSlotOrReMat(StackSlot); + Spills.ClobberPhysReg(PhysReg); + Spills.addAvailable(StackSlot, PhysReg, isAvailable); + ++NumStores; + } + + /// TransferDeadness - A identity copy definition is dead and it's being + /// removed. Find the last def or use and mark it as dead / kill. + void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist, + unsigned Reg, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM) { + SmallPtrSet<MachineInstr*, 4> Seens; + SmallVector<std::pair<MachineInstr*, int>,8> Refs; + for (MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(Reg), + RE = RegInfo->reg_end(); RI != RE; ++RI) { + MachineInstr *UDMI = &*RI; + if (UDMI->getParent() != MBB) + continue; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI); + if (DI == DistanceMap.end() || DI->second > CurDist) + continue; + if (Seens.insert(UDMI)) + Refs.push_back(std::make_pair(UDMI, DI->second)); + } + + if (Refs.empty()) + return; + std::sort(Refs.begin(), Refs.end(), RefSorter()); + + while (!Refs.empty()) { + MachineInstr *LastUDMI = Refs.back().first; + Refs.pop_back(); + + MachineOperand *LastUD = NULL; + for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = LastUDMI->getOperand(i); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (!LastUD || (LastUD->isUse() && MO.isDef())) + LastUD = &MO; + if (LastUDMI->isRegTiedToDefOperand(i)) + break; + } + if (LastUD->isDef()) { + // If the instruction has no side effect, delete it and propagate + // backward further. Otherwise, mark is dead and we are done. + const TargetInstrDesc &TID = LastUDMI->getDesc(); + if (TID.mayStore() || TID.isCall() || TID.isTerminator() || + TID.hasUnmodeledSideEffects()) { + LastUD->setIsDead(); + break; + } + VRM.RemoveMachineInstrFromMaps(LastUDMI); + MBB->erase(LastUDMI); + } else { + LastUD->setIsKill(); + RegKills.set(Reg); + KillOps[Reg] = LastUD; + break; + } + } + } + + /// rewriteMBB - Keep track of which spills are available even after the + /// register allocator is done with them. If possible, avid reloading vregs. + void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM, + LiveIntervals *LIs, + AvailableSpills &Spills, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + + DOUT << "\n**** Local spiller rewriting MBB '" + << MBB.getBasicBlock()->getName() << "':\n"; + + MachineFunction &MF = *MBB.getParent(); + + // MaybeDeadStores - When we need to write a value back into a stack slot, + // keep track of the inserted store. If the stack slot value is never read + // (because the value was used from some available register, for example), and + // subsequently stored to, the original store is dead. This map keeps track + // of inserted stores that are not used. If we see a subsequent store to the + // same stack slot, the original store is deleted. + std::vector<MachineInstr*> MaybeDeadStores; + MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL); + + // ReMatDefs - These are rematerializable def MIs which are not deleted. + SmallSet<MachineInstr*, 4> ReMatDefs; + + // Clear kill info. + SmallSet<unsigned, 2> KilledMIRegs; + RegKills.reset(); + KillOps.clear(); + KillOps.resize(TRI->getNumRegs(), NULL); + + unsigned Dist = 0; + DistanceMap.clear(); + for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); + MII != E; ) { + MachineBasicBlock::iterator NextMII = next(MII); + + VirtRegMap::MI2VirtMapTy::const_iterator I, End; + bool Erased = false; + bool BackTracked = false; + if (OptimizeByUnfold(MBB, MII, + MaybeDeadStores, Spills, RegKills, KillOps, VRM)) + NextMII = next(MII); + + MachineInstr &MI = *MII; + + if (VRM.hasEmergencySpills(&MI)) { + // Spill physical register(s) in the rare case the allocator has run out + // of registers to allocate. + SmallSet<int, 4> UsedSS; + std::vector<unsigned> &EmSpills = VRM.getEmergencySpills(&MI); + for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) { + unsigned PhysReg = EmSpills[i]; + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(PhysReg); + assert(RC && "Unable to determine register class!"); + int SS = VRM.getEmergencySpillSlot(RC); + if (UsedSS.count(SS)) + assert(0 && "Need to spill more than one physical registers!"); + UsedSS.insert(SS); + TII->storeRegToStackSlot(MBB, MII, PhysReg, true, SS, RC); + MachineInstr *StoreMI = prior(MII); + VRM.addSpillSlotUse(SS, StoreMI); + TII->loadRegFromStackSlot(MBB, next(MII), PhysReg, SS, RC); + MachineInstr *LoadMI = next(MII); + VRM.addSpillSlotUse(SS, LoadMI); + ++NumPSpills; + } + NextMII = next(MII); + } + + // Insert restores here if asked to. + if (VRM.isRestorePt(&MI)) { + std::vector<unsigned> &RestoreRegs = VRM.getRestorePtRestores(&MI); + for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) { + unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order. + if (!VRM.getPreSplitReg(VirtReg)) + continue; // Split interval spilled again. + unsigned Phys = VRM.getPhys(VirtReg); + RegInfo->setPhysRegUsed(Phys); + + // Check if the value being restored if available. If so, it must be + // from a predecessor BB that fallthrough into this BB. We do not + // expect: + // BB1: + // r1 = load fi#1 + // ... + // = r1<kill> + // ... # r1 not clobbered + // ... + // = load fi#1 + bool DoReMat = VRM.isReMaterialized(VirtReg); + int SSorRMId = DoReMat + ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg); + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); + if (InReg == Phys) { + // If the value is already available in the expected register, save + // a reload / remat. + if (SSorRMId) + DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << SSorRMId; + DOUT << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(Phys) << "\n"; + ++NumOmitted; + continue; + } else if (InReg && InReg != Phys) { + if (SSorRMId) + DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << SSorRMId; + DOUT << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" by copying it into physreg " + << TRI->getName(Phys) << "\n"; + + // If the reloaded / remat value is available in another register, + // copy it to the desired register. + TII->copyRegToReg(MBB, &MI, Phys, InReg, RC, RC); + + // This invalidates Phys. + Spills.ClobberPhysReg(Phys); + // Remember it's available. + Spills.addAvailable(SSorRMId, Phys); + + // Mark is killed. + MachineInstr *CopyMI = prior(MII); + MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg); + KillOpnd->setIsKill(); + UpdateKills(*CopyMI, TRI, RegKills, KillOps); + + DOUT << '\t' << *CopyMI; + ++NumCopified; + continue; + } + + if (VRM.isReMaterialized(VirtReg)) { + ReMaterialize(MBB, MII, Phys, VirtReg, TII, TRI, VRM); + } else { + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + TII->loadRegFromStackSlot(MBB, &MI, Phys, SSorRMId, RC); + MachineInstr *LoadMI = prior(MII); + VRM.addSpillSlotUse(SSorRMId, LoadMI); + ++NumLoads; + } + + // This invalidates Phys. + Spills.ClobberPhysReg(Phys); + // Remember it's available. + Spills.addAvailable(SSorRMId, Phys); + + UpdateKills(*prior(MII), TRI, RegKills, KillOps); + DOUT << '\t' << *prior(MII); + } + } + + // Insert spills here if asked to. + if (VRM.isSpillPt(&MI)) { + std::vector<std::pair<unsigned,bool> > &SpillRegs = + VRM.getSpillPtSpills(&MI); + for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) { + unsigned VirtReg = SpillRegs[i].first; + bool isKill = SpillRegs[i].second; + if (!VRM.getPreSplitReg(VirtReg)) + continue; // Split interval spilled again. + const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); + unsigned Phys = VRM.getPhys(VirtReg); + int StackSlot = VRM.getStackSlot(VirtReg); + TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC); + MachineInstr *StoreMI = next(MII); + VRM.addSpillSlotUse(StackSlot, StoreMI); + DOUT << "Store:\t" << *StoreMI; + VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); + } + NextMII = next(MII); + } + + /// ReusedOperands - Keep track of operand reuse in case we need to undo + /// reuse. + ReuseInfo ReusedOperands(MI, TRI); + SmallVector<unsigned, 4> VirtUseOps; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; // Ignore non-register operands. + + unsigned VirtReg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) { + // Ignore physregs for spilling, but remember that it is used by this + // function. + RegInfo->setPhysRegUsed(VirtReg); + continue; + } + + // We want to process implicit virtual register uses first. + if (MO.isImplicit()) + // If the virtual register is implicitly defined, emit a implicit_def + // before so scavenger knows it's "defined". + VirtUseOps.insert(VirtUseOps.begin(), i); + else + VirtUseOps.push_back(i); + } + + // Process all of the spilled uses and all non spilled reg references. + SmallVector<int, 2> PotentialDeadStoreSlots; + KilledMIRegs.clear(); + for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) { + unsigned i = VirtUseOps[j]; + MachineOperand &MO = MI.getOperand(i); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register?"); + + unsigned SubIdx = MO.getSubReg(); + if (VRM.isAssignedReg(VirtReg)) { + // This virtual register was assigned a physreg! + unsigned Phys = VRM.getPhys(VirtReg); + RegInfo->setPhysRegUsed(Phys); + if (MO.isDef()) + ReusedOperands.markClobbered(Phys); + unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + if (VRM.isImplicitlyDefined(VirtReg)) + BuildMI(MBB, &MI, MI.getDebugLoc(), + TII->get(TargetInstrInfo::IMPLICIT_DEF), RReg); + continue; + } + + // This virtual register is now known to be a spilled value. + if (!MO.isUse()) + continue; // Handle defs in the loop below (handle use&def here though) + + bool AvoidReload = false; + if (LIs->hasInterval(VirtReg)) { + LiveInterval &LI = LIs->getInterval(VirtReg); + if (!LI.liveAt(LIs->getUseIndex(LI.beginNumber()))) + // Must be defined by an implicit def. It should not be spilled. Note, + // this is for correctness reason. e.g. + // 8 %reg1024<def> = IMPLICIT_DEF + // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 + // The live range [12, 14) are not part of the r1024 live interval since + // it's defined by an implicit def. It will not conflicts with live + // interval of r1025. Now suppose both registers are spilled, you can + // easily see a situation where both registers are reloaded before + // the INSERT_SUBREG and both target registers that would overlap. + AvoidReload = true; + } + + bool DoReMat = VRM.isReMaterialized(VirtReg); + int SSorRMId = DoReMat + ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg); + int ReuseSlot = SSorRMId; + + // Check to see if this stack slot is available. + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); + + // If this is a sub-register use, make sure the reuse register is in the + // right register class. For example, for x86 not all of the 32-bit + // registers have accessible sub-registers. + // Similarly so for EXTRACT_SUBREG. Consider this: + // EDI = op + // MOV32_mr fi#1, EDI + // ... + // = EXTRACT_SUBREG fi#1 + // fi#1 is available in EDI, but it cannot be reused because it's not in + // the right register file. + if (PhysReg && !AvoidReload && + (SubIdx || MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)) { + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + if (!RC->contains(PhysReg)) + PhysReg = 0; + } + + if (PhysReg && !AvoidReload) { + // This spilled operand might be part of a two-address operand. If this + // is the case, then changing it will necessarily require changing the + // def part of the instruction as well. However, in some cases, we + // aren't allowed to modify the reused register. If none of these cases + // apply, reuse it. + bool CanReuse = true; + bool isTied = MI.isRegTiedToDefOperand(i); + if (isTied) { + // Okay, we have a two address operand. We can reuse this physreg as + // long as we are allowed to clobber the value and there isn't an + // earlier def that has already clobbered the physreg. + CanReuse = !ReusedOperands.isClobbered(PhysReg) && + Spills.canClobberPhysReg(PhysReg); + } + + if (CanReuse) { + // If this stack slot value is already available, reuse it! + if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) + DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << ReuseSlot; + DOUT << " from physreg " + << TRI->getName(PhysReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(VRM.getPhys(VirtReg)) << "\n"; + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + + // The only technical detail we have is that we don't know that + // PhysReg won't be clobbered by a reloaded stack slot that occurs + // later in the instruction. In particular, consider 'op V1, V2'. + // If V1 is available in physreg R0, we would choose to reuse it + // here, instead of reloading it into the register the allocator + // indicated (say R1). However, V2 might have to be reloaded + // later, and it might indicate that it needs to live in R0. When + // this occurs, we need to have information available that + // indicates it is safe to use R1 for the reload instead of R0. + // + // To further complicate matters, we might conflict with an alias, + // or R0 and R1 might not be compatible with each other. In this + // case, we actually insert a reload for V1 in R1, ensuring that + // we can get at R0 or its alias. + ReusedOperands.addReuse(i, ReuseSlot, PhysReg, + VRM.getPhys(VirtReg), VirtReg); + if (isTied) + // Only mark it clobbered if this is a use&def operand. + ReusedOperands.markClobbered(PhysReg); + ++NumReused; + + if (MI.getOperand(i).isKill() && + ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) { + + // The store of this spilled value is potentially dead, but we + // won't know for certain until we've confirmed that the re-use + // above is valid, which means waiting until the other operands + // are processed. For now we just track the spill slot, we'll + // remove it after the other operands are processed if valid. + + PotentialDeadStoreSlots.push_back(ReuseSlot); + } + + // Mark is isKill if it's there no other uses of the same virtual + // register and it's not a two-address operand. IsKill will be + // unset if reg is reused. + if (!isTied && KilledMIRegs.count(VirtReg) == 0) { + MI.getOperand(i).setIsKill(); + KilledMIRegs.insert(VirtReg); + } + + continue; + } // CanReuse + + // Otherwise we have a situation where we have a two-address instruction + // whose mod/ref operand needs to be reloaded. This reload is already + // available in some register "PhysReg", but if we used PhysReg as the + // operand to our 2-addr instruction, the instruction would modify + // PhysReg. This isn't cool if something later uses PhysReg and expects + // to get its initial value. + // + // To avoid this problem, and to avoid doing a load right after a store, + // we emit a copy from PhysReg into the designated register for this + // operand. + unsigned DesignatedReg = VRM.getPhys(VirtReg); + assert(DesignatedReg && "Must map virtreg to physreg!"); + + // Note that, if we reused a register for a previous operand, the + // register we want to reload into might not actually be + // available. If this occurs, use the register indicated by the + // reuser. + if (ReusedOperands.hasReuses()) + DesignatedReg = ReusedOperands.GetRegForReload(DesignatedReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); + + // If the mapped designated register is actually the physreg we have + // incoming, we don't need to inserted a dead copy. + if (DesignatedReg == PhysReg) { + // If this stack slot value is already available, reuse it! + if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) + DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << ReuseSlot; + DOUT << " from physreg " << TRI->getName(PhysReg) + << " for vreg" << VirtReg + << " instead of reloading into same physreg.\n"; + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + ReusedOperands.markClobbered(RReg); + ++NumReused; + continue; + } + + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + RegInfo->setPhysRegUsed(DesignatedReg); + ReusedOperands.markClobbered(DesignatedReg); + TII->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC, RC); + + MachineInstr *CopyMI = prior(MII); + UpdateKills(*CopyMI, TRI, RegKills, KillOps); + + // This invalidates DesignatedReg. + Spills.ClobberPhysReg(DesignatedReg); + + Spills.addAvailable(ReuseSlot, DesignatedReg); + unsigned RReg = + SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + DOUT << '\t' << *prior(MII); + ++NumReused; + continue; + } // if (PhysReg) + + // Otherwise, reload it and remember that we have it. + PhysReg = VRM.getPhys(VirtReg); + assert(PhysReg && "Must map virtreg to physreg!"); + + // Note that, if we reused a register for a previous operand, the + // register we want to reload into might not actually be + // available. If this occurs, use the register indicated by the + // reuser. + if (ReusedOperands.hasReuses()) + PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); + + RegInfo->setPhysRegUsed(PhysReg); + ReusedOperands.markClobbered(PhysReg); + if (AvoidReload) + ++NumAvoided; + else { + if (DoReMat) { + ReMaterialize(MBB, MII, PhysReg, VirtReg, TII, TRI, VRM); + } else { + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + TII->loadRegFromStackSlot(MBB, &MI, PhysReg, SSorRMId, RC); + MachineInstr *LoadMI = prior(MII); + VRM.addSpillSlotUse(SSorRMId, LoadMI); + ++NumLoads; + } + // This invalidates PhysReg. + Spills.ClobberPhysReg(PhysReg); + + // Any stores to this stack slot are not dead anymore. + if (!DoReMat) + MaybeDeadStores[SSorRMId] = NULL; + Spills.addAvailable(SSorRMId, PhysReg); + // Assumes this is the last use. IsKill will be unset if reg is reused + // unless it's a two-address operand. + if (!MI.isRegTiedToDefOperand(i) && + KilledMIRegs.count(VirtReg) == 0) { + MI.getOperand(i).setIsKill(); + KilledMIRegs.insert(VirtReg); + } + + UpdateKills(*prior(MII), TRI, RegKills, KillOps); + DOUT << '\t' << *prior(MII); + } + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + } + + // Ok - now we can remove stores that have been confirmed dead. + for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) { + // This was the last use and the spilled value is still available + // for reuse. That means the spill was unnecessary! + int PDSSlot = PotentialDeadStoreSlots[j]; + MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; + if (DeadStore) { + DOUT << "Removed dead store:\t" << *DeadStore; + InvalidateKills(*DeadStore, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(DeadStore); + MBB.erase(DeadStore); + MaybeDeadStores[PDSSlot] = NULL; + ++NumDSE; + } + } + + + DOUT << '\t' << MI; + + + // If we have folded references to memory operands, make sure we clear all + // physical registers that may contain the value of the spilled virtual + // register + SmallSet<int, 2> FoldedSS; + for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) { + unsigned VirtReg = I->second.first; + VirtRegMap::ModRef MR = I->second.second; + DOUT << "Folded vreg: " << VirtReg << " MR: " << MR; + + // MI2VirtMap be can updated which invalidate the iterator. + // Increment the iterator first. + ++I; + int SS = VRM.getStackSlot(VirtReg); + if (SS == VirtRegMap::NO_STACK_SLOT) + continue; + FoldedSS.insert(SS); + DOUT << " - StackSlot: " << SS << "\n"; + + // If this folded instruction is just a use, check to see if it's a + // straight load from the virt reg slot. + if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) { + int FrameIdx; + unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx); + if (DestReg && FrameIdx == SS) { + // If this spill slot is available, turn it into a copy (or nothing) + // instead of leaving it as a load! + if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { + DOUT << "Promoted Load To Copy: " << MI; + if (DestReg != InReg) { + const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); + TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC); + MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg); + unsigned SubIdx = DefMO->getSubReg(); + // Revisit the copy so we make sure to notice the effects of the + // operation on the destreg (either needing to RA it if it's + // virtual or needing to clobber any values if it's physical). + NextMII = &MI; + --NextMII; // backtrack to the copy. + // Propagate the sub-register index over. + if (SubIdx) { + DefMO = NextMII->findRegisterDefOperand(DestReg); + DefMO->setSubReg(SubIdx); + } + + // Mark is killed. + MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg); + KillOpnd->setIsKill(); + + BackTracked = true; + } else { + DOUT << "Removing now-noop copy: " << MI; + // Unset last kill since it's being reused. + InvalidateKill(InReg, TRI, RegKills, KillOps); + Spills.disallowClobberPhysReg(InReg); + } + + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + goto ProcessNextInst; + } + } else { + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); + SmallVector<MachineInstr*, 4> NewMIs; + if (PhysReg && + TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) { + MBB.insert(MII, NewMIs[0]); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + --NextMII; // backtrack to the unfolded instruction. + BackTracked = true; + goto ProcessNextInst; + } + } + } + + // If this reference is not a use, any previous store is now dead. + // Otherwise, the store to this stack slot is not dead anymore. + MachineInstr* DeadStore = MaybeDeadStores[SS]; + if (DeadStore) { + bool isDead = !(MR & VirtRegMap::isRef); + MachineInstr *NewStore = NULL; + if (MR & VirtRegMap::isModRef) { + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); + SmallVector<MachineInstr*, 4> NewMIs; + // We can reuse this physreg as long as we are allowed to clobber + // the value and there isn't an earlier def that has already clobbered + // the physreg. + if (PhysReg && + !ReusedOperands.isClobbered(PhysReg) && + Spills.canClobberPhysReg(PhysReg) && + !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable! + MachineOperand *KillOpnd = + DeadStore->findRegisterUseOperand(PhysReg, true); + // Note, if the store is storing a sub-register, it's possible the + // super-register is needed below. + if (KillOpnd && !KillOpnd->getSubReg() && + TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){ + MBB.insert(MII, NewMIs[0]); + NewStore = NewMIs[1]; + MBB.insert(MII, NewStore); + VRM.addSpillSlotUse(SS, NewStore); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + --NextMII; + --NextMII; // backtrack to the unfolded instruction. + BackTracked = true; + isDead = true; + ++NumSUnfold; + } + } + } + + if (isDead) { // Previous store is dead. + // If we get here, the store is dead, nuke it now. + DOUT << "Removed dead store:\t" << *DeadStore; + InvalidateKills(*DeadStore, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(DeadStore); + MBB.erase(DeadStore); + if (!NewStore) + ++NumDSE; + } + + MaybeDeadStores[SS] = NULL; + if (NewStore) { + // Treat this store as a spill merged into a copy. That makes the + // stack slot value available. + VRM.virtFolded(VirtReg, NewStore, VirtRegMap::isMod); + goto ProcessNextInst; + } + } + + // If the spill slot value is available, and this is a new definition of + // the value, the value is not available anymore. + if (MR & VirtRegMap::isMod) { + // Notice that the value in this stack slot has been modified. + Spills.ModifyStackSlotOrReMat(SS); + + // If this is *just* a mod of the value, check to see if this is just a + // store to the spill slot (i.e. the spill got merged into the copy). If + // so, realize that the vreg is available now, and add the store to the + // MaybeDeadStore info. + int StackSlot; + if (!(MR & VirtRegMap::isRef)) { + if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) { + assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && + "Src hasn't been allocated yet?"); + + if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot, + Spills, RegKills, KillOps, TRI, VRM)) { + NextMII = next(MII); + BackTracked = true; + goto ProcessNextInst; + } + + // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark + // this as a potentially dead store in case there is a subsequent + // store into the stack slot without a read from it. + MaybeDeadStores[StackSlot] = &MI; + + // If the stack slot value was previously available in some other + // register, change it now. Otherwise, make the register + // available in PhysReg. + Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg)); + } + } + } + } + + // Process all of the spilled defs. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!(MO.isReg() && MO.getReg() && MO.isDef())) + continue; + + unsigned VirtReg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) { + // Check to see if this is a noop copy. If so, eliminate the + // instruction before considering the dest reg to be changed. + unsigned Src, Dst, SrcSR, DstSR; + if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) { + ++NumDCE; + DOUT << "Removing now-noop copy: " << MI; + SmallVector<unsigned, 2> KillRegs; + InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); + if (MO.isDead() && !KillRegs.empty()) { + // Source register or an implicit super/sub-register use is killed. + assert(KillRegs[0] == Dst || + TRI->isSubRegister(KillRegs[0], Dst) || + TRI->isSuperRegister(KillRegs[0], Dst)); + // Last def is now dead. + TransferDeadness(&MBB, Dist, Src, RegKills, KillOps, VRM); + } + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + Spills.disallowClobberPhysReg(VirtReg); + goto ProcessNextInst; + } + + // If it's not a no-op copy, it clobbers the value in the destreg. + Spills.ClobberPhysReg(VirtReg); + ReusedOperands.markClobbered(VirtReg); + + // Check to see if this instruction is a load from a stack slot into + // a register. If so, this provides the stack slot value in the reg. + int FrameIdx; + if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) { + assert(DestReg == VirtReg && "Unknown load situation!"); + + // If it is a folded reference, then it's not safe to clobber. + bool Folded = FoldedSS.count(FrameIdx); + // Otherwise, if it wasn't available, remember that it is now! + Spills.addAvailable(FrameIdx, DestReg, !Folded); + goto ProcessNextInst; + } + + continue; + } + + unsigned SubIdx = MO.getSubReg(); + bool DoReMat = VRM.isReMaterialized(VirtReg); + if (DoReMat) + ReMatDefs.insert(&MI); + + // The only vregs left are stack slot definitions. + int StackSlot = VRM.getStackSlot(VirtReg); + const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); + + // If this def is part of a two-address operand, make sure to execute + // the store from the correct physical register. + unsigned PhysReg; + unsigned TiedOp; + if (MI.isRegTiedToUseOperand(i, &TiedOp)) { + PhysReg = MI.getOperand(TiedOp).getReg(); + if (SubIdx) { + unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI); + assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg && + "Can't find corresponding super-register!"); + PhysReg = SuperReg; + } + } else { + PhysReg = VRM.getPhys(VirtReg); + if (ReusedOperands.isClobbered(PhysReg)) { + // Another def has taken the assigned physreg. It must have been a + // use&def which got it due to reuse. Undo the reuse! + PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); + } + } + + assert(PhysReg && "VR not assigned a physical register?"); + RegInfo->setPhysRegUsed(PhysReg); + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + ReusedOperands.markClobbered(RReg); + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + + if (!MO.isDead()) { + MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; + SpillRegToStackSlot(MBB, MII, -1, PhysReg, StackSlot, RC, true, + LastStore, Spills, ReMatDefs, RegKills, KillOps, VRM); + NextMII = next(MII); + + // Check to see if this is a noop copy. If so, eliminate the + // instruction before considering the dest reg to be changed. + { + unsigned Src, Dst, SrcSR, DstSR; + if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) { + ++NumDCE; + DOUT << "Removing now-noop copy: " << MI; + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + UpdateKills(*LastStore, TRI, RegKills, KillOps); + goto ProcessNextInst; + } + } + } + } + ProcessNextInst: + DistanceMap.insert(std::make_pair(&MI, Dist++)); + if (!Erased && !BackTracked) { + for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II) + UpdateKills(*II, TRI, RegKills, KillOps); + } + MII = NextMII; + } + + } + +}; + +llvm::VirtRegRewriter* llvm::createVirtRegRewriter() { + switch (RewriterOpt) { + default: assert(0 && "Unreachable!"); + case local: + return new LocalRewriter(); + case simple: + return new SimpleRewriter(); + case trivial: + return new TrivialRewriter(); + } +} diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h new file mode 100644 index 0000000..bc830f7 --- /dev/null +++ b/lib/CodeGen/VirtRegRewriter.h @@ -0,0 +1,56 @@ +//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H +#define LLVM_CODEGEN_VIRTREGREWRITER_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Streams.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "VirtRegMap.h" +#include <map> + +// TODO: +// - Finish renaming Spiller -> Rewriter +// - SimpleSpiller +// - LocalSpiller + +namespace llvm { + + /// VirtRegRewriter interface: Implementations of this interface assign + /// spilled virtual registers to stack slots, rewriting the code. + struct VirtRegRewriter { + virtual ~VirtRegRewriter(); + virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, + LiveIntervals* LIs) = 0; + }; + + /// createVirtRegRewriter - Create an return a rewriter object, as specified + /// on the command line. + VirtRegRewriter* createVirtRegRewriter(); + +} + +#endif |