diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
105 files changed, 7814 insertions, 2940 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index a7189ac..5a634d6 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -41,8 +41,11 @@ DebugMod("agg-antidep-debugmod", AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB) : - NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0) { - + NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0), + GroupNodeIndices(TargetRegs, 0), + KillIndices(TargetRegs, 0), + DefIndices(TargetRegs, 0) +{ const unsigned BBSize = BB->size(); for (unsigned i = 0; i < NumTargetRegs; ++i) { // Initialize all registers to be in their own group. Initially we @@ -54,8 +57,7 @@ AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, } } -unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) -{ +unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) { unsigned Node = GroupNodeIndices[Reg]; while (GroupNodes[Node] != Node) Node = GroupNodes[Node]; @@ -145,8 +147,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); - unsigned *KillIndices = State->GetKillIndices(); - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); // Determine the live-out physregs for this block. if (IsReturnBlock) { @@ -226,7 +228,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, DEBUG(MI->dump()); DEBUG(dbgs() << "\tRegs:"); - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { // If Reg is current live, then mark that it can't be renamed as // we don't know the extent of its live-range anymore (now that it @@ -328,8 +330,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag, const char *header, const char *footer) { - unsigned *KillIndices = State->GetKillIndices(); - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -364,7 +366,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count, std::set<unsigned>& PassthruRegs) { - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -560,8 +562,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( unsigned AntiDepGroupIndex, RenameOrderType& RenameOrder, std::map<unsigned, unsigned> &RenameMap) { - unsigned *KillIndices = State->GetKillIndices(); - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -652,6 +654,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( if (R == RB) R = RE; --R; const unsigned NewSuperReg = *R; + // Don't consider non-allocatable registers + if (!AllocatableSet.test(NewSuperReg)) continue; // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; @@ -733,8 +737,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex) { - unsigned *KillIndices = State->GetKillIndices(); - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h index 91ebb85..9d715cc 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -59,27 +59,27 @@ namespace llvm { /// currently representing the group that the register belongs to. /// Register 0 is always represented by the 0 group, a group /// composed of registers that are not eligible for anti-aliasing. - unsigned GroupNodeIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> GroupNodeIndices; /// RegRefs - Map registers to all their references within a live range. std::multimap<unsigned, RegisterReference> RegRefs; /// KillIndices - The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. - unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> KillIndices; /// DefIndices - The index of the most recent complete def (proceding bottom /// up), or ~0u if the register is live. - unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> DefIndices; public: AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB); /// GetKillIndices - Return the kill indices. - unsigned *GetKillIndices() { return KillIndices; } + std::vector<unsigned> &GetKillIndices() { return KillIndices; } /// GetDefIndices - Return the define indices. - unsigned *GetDefIndices() { return DefIndices; } + std::vector<unsigned> &GetDefIndices() { return DefIndices; } /// GetRegRefs - Return the RegRefs map. std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; } diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index f71eee5..e3dd646 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -109,7 +109,7 @@ GlobalVariable *llvm::ExtractTypeInfo(Value *V) { V = V->stripPointerCasts(); GlobalVariable *GV = dyn_cast<GlobalVariable>(V); - if (GV && GV->getName() == ".llvm.eh.catch.all.value") { + if (GV && GV->getName() == "llvm.eh.catch.all.value") { assert(GV->hasInitializer() && "The EH catch-all value must have an initializer"); Value *Init = GV->getInitializer(); @@ -171,7 +171,7 @@ ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) { FOC = FPC = ISD::SETFALSE; break; } - if (FiniteOnlyFPMath()) + if (NoNaNsFPMath) return FOC; else return FPC; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index db1b37a..d358ab2 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -91,7 +91,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD, AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) - : MachineFunctionPass(&ID), + : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), OutContext(Streamer.getContext()), OutStreamer(Streamer), @@ -200,11 +200,17 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: + case GlobalValue::LinkerPrivateWeakDefAutoLinkage: if (MAI->getWeakDefDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - // .weak_definition _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); + + if ((GlobalValue::LinkageTypes)Linkage != + GlobalValue::LinkerPrivateWeakDefAutoLinkage) + // .weak_definition _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); + else + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); } else if (MAI->getLinkOnceDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); @@ -510,12 +516,8 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { } // Check for spill-induced copies - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg, - SrcSubIdx, DstSubIdx)) { - if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) - CommentOS << " Reload Reuse\n"; - } + if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) + CommentOS << " Reload Reuse\n"; } /// EmitImplicitDef - This method emits the specified machine instruction @@ -603,12 +605,15 @@ void AsmPrinter::EmitFunctionBody() { // Print out code for the function. bool HasAnyRealCode = false; + const MachineInstr *LastMI = 0; for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { // Print a label for the basic block. EmitBasicBlockStart(I); for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { + LastMI = II; + // Print the assembly for the instruction. if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() && !II->isDebugValue()) { @@ -625,7 +630,7 @@ void AsmPrinter::EmitFunctionBody() { EmitComments(*II, OutStreamer.GetCommentOS()); switch (II->getOpcode()) { - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol()); @@ -656,11 +661,18 @@ void AsmPrinter::EmitFunctionBody() { } } } - + + // If the last instruction was a prolog label, then we have a situation where + // we emitted a prolog but no function body. This results in the ending prolog + // label equaling the end of function label and an invalid "row" in the + // FDE. We need to emit a noop in this situation so that the FDE's rows are + // valid. + bool RequiresNoop = LastMI && LastMI->isPrologLabel(); + // If the function is empty and the object file uses .subsections_via_symbols, // then we need to emit *something* to the function body to prevent the // labels from collapsing together. Just emit a noop. - if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) { + if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) { MCInst Noop; TM.getInstrInfo()->getNoopForMachoTarget(Noop); if (Noop.getOpcode()) { @@ -1206,6 +1218,22 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/); } } + +/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" +/// where the size in bytes of the directive is specified by Size and Label +/// specifies the label. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, + unsigned Size) + const { + + // Emit Label+Offset + const MCExpr *Plus = + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), + MCConstantExpr::Create(Offset, OutContext), + OutContext); + + OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/); +} //===----------------------------------------------------------------------===// @@ -1244,6 +1272,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); + if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); @@ -1262,10 +1291,17 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) if (C != CE) return LowerConstant(C, AP); -#ifndef NDEBUG - CE->dump(); -#endif - llvm_unreachable("FIXME: Don't support this constant expr"); + + // Otherwise report the problem to the user. + { + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + WriteAsOperand(OS, CE, /*PrintType=*/false, + !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); + } + return MCConstantExpr::Create(0, Ctx); case Instruction::GetElementPtr: { const TargetData &TD = *AP.TM.getTargetData(); // Generate a symbolic expression for the byte address @@ -1413,21 +1449,6 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, "Layout of constant struct may be incorrect!"); } -static void EmitGlobalConstantUnion(const ConstantUnion *CU, - unsigned AddrSpace, AsmPrinter &AP) { - const TargetData *TD = AP.TM.getTargetData(); - unsigned Size = TD->getTypeAllocSize(CU->getType()); - - const Constant *Contents = CU->getOperand(0); - unsigned FilledSize = TD->getTypeAllocSize(Contents->getType()); - - // Print the actually filled part - EmitGlobalConstantImpl(Contents, AddrSpace, AP); - - // And pad with enough zeroes - AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace); -} - static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AsmPrinter &AP) { // FP Constants are printed as integer constants to avoid losing @@ -1530,7 +1551,7 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, case 8: if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue()); - AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); + AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); return; default: EmitGlobalConstantLargeInt(CI, AddrSpace, AP); @@ -1553,9 +1574,6 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, return; } - if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV)) - return EmitGlobalConstantUnion(CVU, AddrSpace, AP); - if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) return EmitGlobalConstantVector(V, AddrSpace, AP); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index b310578..ce4519c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -36,7 +36,7 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - if (MAI->hasLEB128()) { + if (MAI->hasLEB128() && OutStreamer.hasRawTextSupport()) { // FIXME: MCize. OutStreamer.EmitRawText("\t.sleb128\t" + Twine(Value)); return; @@ -61,7 +61,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - if (MAI->hasLEB128() && PadTo == 0) { + if (MAI->hasLEB128() && PadTo == 0 && OutStreamer.hasRawTextSupport()) { // FIXME: MCize. OutStreamer.EmitRawText("\t.uleb128\t" + Twine(Value)); return; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 202d9b6..df03168 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -22,7 +22,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCParser/AsmParser.h" #include "llvm/Target/TargetAsmParser.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegistry.h" @@ -72,16 +71,18 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { // Tell SrcMgr about this buffer, it takes ownership of the buffer. SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); - AsmParser Parser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI); - OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser)); + OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr, + OutContext, OutStreamer, + *MAI)); + OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*Parser, TM)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); - Parser.setTargetParser(*TAP.get()); + Parser->setTargetParser(*TAP.get()); // Don't implicitly switch to the text section before the asm. - int Res = Parser.Run(/*NoInitialTextSection*/ true, - /*NoFinalize*/ true); + int Res = Parser->Run(/*NoInitialTextSection*/ true, + /*NoFinalize*/ true); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 65c1d19..c886a5e 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -44,7 +44,7 @@ using namespace llvm; static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden, cl::desc("Print DbgScope information for each machine instruction")); -static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", +static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); @@ -116,8 +116,8 @@ public: /// addGlobalType - Add a new global type to the compile unit. /// - void addGlobalType(StringRef Name, DIE *Die) { - GlobalTypes[Name] = Die; + void addGlobalType(StringRef Name, DIE *Die) { + GlobalTypes[Name] = Die; } /// getDIE - Returns the debug information entry map slot for the @@ -131,8 +131,9 @@ public: /// getDIEEntry - Returns the debug information entry for the speciefied /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap<const MDNode *, DIEEntry *>::iterator I = MDNodeToDIEEntryMap.find(N); + DIEEntry *getDIEEntry(const MDNode *N) { + DenseMap<const MDNode *, DIEEntry *>::iterator I = + MDNodeToDIEEntryMap.find(N); if (I == MDNodeToDIEEntryMap.end()) return NULL; return I->second; @@ -179,6 +180,73 @@ public: DIE *getDIE() const { return TheDIE; } void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } + StringRef getName() const { return Var.getName(); } + unsigned getTag() const { return Var.getTag(); } + bool variableHasComplexAddress() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.hasComplexAddress(); + } + bool isBlockByrefVariable() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.isBlockByrefVariable(); + } + unsigned getNumAddrElements() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.getNumAddrElements(); + } + uint64_t getAddrElement(unsigned i) const { + return Var.getAddrElement(i); + } + DIType getType() const { + DIType Ty = Var.getType(); + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. + if (Var.isBlockByrefVariable()) { + /* Byref variables, in Blocks, are declared by the programmer as + "SomeType VarName;", but the compiler creates a + __Block_byref_x_VarName struct, and gives the variable VarName + either the struct, or a pointer to the struct, as its type. This + is necessary for various behind-the-scenes things the compiler + needs to do with by-reference variables in blocks. + + However, as far as the original *programmer* is concerned, the + variable should still have type 'SomeType', as originally declared. + + The following function dives into the __Block_byref_x_VarName + struct to find the original type of the variable. This will be + passed back to the code generating the type for the Debug + Information Entry for the variable 'VarName'. 'VarName' will then + have the original type 'SomeType' in its debug information. + + The original type 'SomeType' will be the type of the field named + 'VarName' inside the __Block_byref_x_VarName struct. + + NOTE: In order for this to not completely fail on the debugger + side, the Debug Information Entry for the variable VarName needs to + have a DW_AT_location that tells the debugger how to unwind through + the pointers and __Block_byref_x_VarName struct to find the actual + value of the variable. The function addBlockByrefType does this. */ + DIType subType = Ty; + unsigned tag = Ty.getTag(); + + if (tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + subType = DTy.getTypeDerivedFrom(); + } + + DICompositeType blockStruct = DICompositeType(subType); + DIArray Elements = blockStruct.getTypeArray(); + + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + if (getName() == DT.getName()) + return (DT.getTypeDerivedFrom()); + } + return Ty; + } + return Ty; + } }; //===----------------------------------------------------------------------===// @@ -194,7 +262,7 @@ class DbgScope { DbgScope *Parent; // Parent to this scope. DIDescriptor Desc; // Debug info descriptor for scope. // Location at which this scope is inlined. - AssertingVH<const MDNode> InlinedAtLocation; + AssertingVH<const MDNode> InlinedAtLocation; bool AbstractScope; // Abstract Scope const MachineInstr *LastInsn; // Last instruction of this scope. const MachineInstr *FirstInsn; // First instruction of this scope. @@ -220,19 +288,19 @@ public: const MDNode *getInlinedAt() const { return InlinedAtLocation; } const MDNode *getScopeNode() const { return Desc; } const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; } - const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; } + const SmallVector<DbgVariable *, 8> &getDbgVariables() { return Variables; } const SmallVector<DbgRange, 4> &getRanges() { return Ranges; } /// openInsnRange - This scope covers instruction range starting from MI. void openInsnRange(const MachineInstr *MI) { - if (!FirstInsn) + if (!FirstInsn) FirstInsn = MI; - + if (Parent) Parent->openInsnRange(MI); } - /// extendInsnRange - Extend the current instruction range covered by + /// extendInsnRange - Extend the current instruction range covered by /// this scope. void extendInsnRange(const MachineInstr *MI) { assert (FirstInsn && "MI Range is not open!"); @@ -247,9 +315,9 @@ public: void closeInsnRange(DbgScope *NewScope = NULL) { assert (LastInsn && "Last insn missing!"); Ranges.push_back(DbgRange(FirstInsn, LastInsn)); - FirstInsn = NULL; + FirstInsn = NULL; LastInsn = NULL; - // If Parent dominates NewScope then do not close Parent's instruction + // If Parent dominates NewScope then do not close Parent's instruction // range. if (Parent && (!NewScope || !Parent->dominates(NewScope))) Parent->closeInsnRange(NewScope); @@ -264,7 +332,7 @@ public: unsigned getDFSIn() const { return DFSIn; } void setDFSIn(unsigned I) { DFSIn = I; } bool dominates(const DbgScope *S) { - if (S == this) + if (S == this) return true; if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut()) return true; @@ -313,14 +381,13 @@ DbgScope::~DbgScope() { DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), FirstCU(0), - AbbreviationsSet(InitAbbreviationsSetSize), + AbbreviationsSet(InitAbbreviationsSetSize), CurrentFnDbgScope(0), PrevLabel(NULL) { NextStringPoolNumber = 0; - + DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; - DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; - DwarfDebugLineSectionSym = CurrentLineSectionSym = 0; + DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); { @@ -377,7 +444,7 @@ DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = Integer == 1 ? + DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); Die->addValue(Attribute, Form, Value); } @@ -392,7 +459,7 @@ void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, } /// addString - Add a string attribute data and value. DIEString only -/// keeps string reference. +/// keeps string reference. void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, StringRef String) { DIEValue *Value = new (DIEValueAllocator) DIEString(String); @@ -434,14 +501,14 @@ void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form, /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) { +void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) { // Verify variable. - if (!V->Verify()) + if (!V.Verify()) return; - unsigned Line = V->getLineNumber(); - unsigned FileID = GetOrCreateSourceID(V->getContext().getDirectory(), - V->getContext().getFilename()); + unsigned Line = V.getLineNumber(); + unsigned FileID = GetOrCreateSourceID(V.getContext().getDirectory(), + V.getContext().getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -449,14 +516,14 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::addSourceLine(DIE *Die, const DIGlobalVariable *G) { +void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) { // Verify global variable. - if (!G->Verify()) + if (!G.Verify()) return; - unsigned Line = G->getLineNumber(); - unsigned FileID = GetOrCreateSourceID(G->getContext().getDirectory(), - G->getContext().getFilename()); + unsigned Line = G.getLineNumber(); + unsigned FileID = GetOrCreateSourceID(G.getContext().getDirectory(), + G.getContext().getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -464,19 +531,19 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIGlobalVariable *G) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) { +void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) { // Verify subprogram. - if (!SP->Verify()) + if (!SP.Verify()) return; // If the line number is 0, don't add it. - if (SP->getLineNumber() == 0) + if (SP.getLineNumber() == 0) return; - unsigned Line = SP->getLineNumber(); - if (!SP->getContext().Verify()) + unsigned Line = SP.getLineNumber(); + if (!SP.getContext().Verify()) return; - unsigned FileID = GetOrCreateSourceID(SP->getDirectory(), - SP->getFilename()); + unsigned FileID = GetOrCreateSourceID(SP.getDirectory(), + SP.getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -484,16 +551,16 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) { +void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) { // Verify type. - if (!Ty->Verify()) + if (!Ty.Verify()) return; - unsigned Line = Ty->getLineNumber(); - if (!Ty->getContext().Verify()) + unsigned Line = Ty.getLineNumber(); + if (!Ty.getContext().Verify()) return; - unsigned FileID = GetOrCreateSourceID(Ty->getContext().getDirectory(), - Ty->getContext().getFilename()); + unsigned FileID = GetOrCreateSourceID(Ty.getContext().getDirectory(), + Ty.getContext().getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -501,14 +568,14 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) { +void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) { // Verify namespace. - if (!NS->Verify()) + if (!NS.Verify()) return; - unsigned Line = NS->getLineNumber(); - StringRef FN = NS->getFilename(); - StringRef Dir = NS->getDirectory(); + unsigned Line = NS.getLineNumber(); + StringRef FN = NS.getFilename(); + StringRef Dir = NS.getDirectory(); unsigned FileID = GetOrCreateSourceID(Dir, FN); assert(FileID && "Invalid file id"); @@ -516,55 +583,21 @@ void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) { addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } -/* Byref variables, in Blocks, are declared by the programmer as - "SomeType VarName;", but the compiler creates a - __Block_byref_x_VarName struct, and gives the variable VarName - either the struct, or a pointer to the struct, as its type. This - is necessary for various behind-the-scenes things the compiler - needs to do with by-reference variables in blocks. - - However, as far as the original *programmer* is concerned, the - variable should still have type 'SomeType', as originally declared. - - The following function dives into the __Block_byref_x_VarName - struct to find the original type of the variable. This will be - passed back to the code generating the type for the Debug - Information Entry for the variable 'VarName'. 'VarName' will then - have the original type 'SomeType' in its debug information. - - The original type 'SomeType' will be the type of the field named - 'VarName' inside the __Block_byref_x_VarName struct. - - NOTE: In order for this to not completely fail on the debugger - side, the Debug Information Entry for the variable VarName needs to - have a DW_AT_location that tells the debugger how to unwind through - the pointers and __Block_byref_x_VarName struct to find the actual - value of the variable. The function addBlockByrefType does this. */ - -/// Find the type the programmer originally declared the variable to be -/// and return that type. -/// -DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) { - - DIType subType = Ty; - unsigned tag = Ty.getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - subType = DTy.getTypeDerivedFrom(); - } - - DICompositeType blockStruct = DICompositeType(subType); - DIArray Elements = blockStruct.getTypeArray(); - - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIDerivedType DT = DIDerivedType(Element); - if (Name == DT.getName()) - return (DT.getTypeDerivedFrom()); - } +/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based +/// on provided frame index. +void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) { + MachineLocation Location; + unsigned FrameReg; + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + Location.set(FrameReg, Offset); - return Ty; + if (DV->variableHasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV->isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location); } /// addComplexAddress - Start with the address based on the location provided, @@ -575,8 +608,7 @@ DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) { void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location) { - const DIVariable &VD = DV->getVariable(); - DIType Ty = VD.getType(); + DIType Ty = DV->getType(); // Decode the original location, and use that as the start of the byref // variable's location. @@ -603,12 +635,12 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); } - for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) { - uint64_t Element = VD.getAddrElement(i); + for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) { + uint64_t Element = DV->getAddrElement(i); if (Element == DIFactory::OpPlus) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i)); + addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); } else if (Element == DIFactory::OpDeref) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } else llvm_unreachable("unknown DIFactory Opcode"); @@ -681,13 +713,12 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location) { - const DIVariable &VD = DV->getVariable(); - DIType Ty = VD.getType(); + DIType Ty = DV->getType(); DIType TmpTy = Ty; unsigned Tag = Ty.getTag(); bool isPointer = false; - StringRef varName = VD.getName(); + StringRef varName = DV->getName(); if (Tag == dwarf::DW_TAG_pointer_type) { DIDerivedType DTy = DIDerivedType(Ty); @@ -835,26 +866,26 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS, assert (MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); - + // Get the raw data form of the floating point. const APInt FltVal = FPImm.bitcastToAPInt(); const char *FltPtr = (const char*)FltVal.getRawData(); - + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. bool LittleEndian = Asm->getTargetData().isLittleEndian(); int Incr = (LittleEndian ? 1 : -1); int Start = (LittleEndian ? 0 : NumBytes - 1); int Stop = (LittleEndian ? NumBytes : -1); - + // Output the constant to DWARF one byte at a time. for (; Start != Stop; Start += Incr) addUInt(Block, 0, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); - + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); if (VS) addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); - return true; + return true; } @@ -872,7 +903,7 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { ContextDIE->addChild(Die); } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) ContextDIE->addChild(Die); - else + else getCompileUnit(Context)->addDie(Die); } @@ -965,7 +996,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Add source line info if available and TyDesc is not a forward declaration. if (!DTy.isForwardDecl()) - addSourceLine(&Buffer, &DTy); + addSourceLine(&Buffer, DTy); } /// constructTypeDIE - Construct type DIE from DICompositeType. @@ -1039,7 +1070,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addType(ElemDie, DV.getType()); addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - addSourceLine(ElemDie, &DV); + addSourceLine(ElemDie, DV); } else if (Element.isDerivedType()) ElemDie = createMemberDIE(DIDerivedType(Element)); else @@ -1057,7 +1088,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DICompositeType ContainingType = CTy.getContainingType(); if (DIDescriptor(ContainingType).isCompositeType()) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, getOrCreateTypeDIE(DIType(ContainingType))); else { DIDescriptor Context = CTy.getContext(); @@ -1073,7 +1104,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) @@ -1089,7 +1120,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add source line info if available. if (!CTy.isForwardDecl()) - addSourceLine(&Buffer, &CTy); + addSourceLine(&Buffer, CTy); } } @@ -1149,7 +1180,7 @@ DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) { return Enumerator; } -/// getRealLinkageName - If special LLVM prefix that is used to inform the asm +/// getRealLinkageName - If special LLVM prefix that is used to inform the asm /// printer to not emit usual symbol prefix before the symbol name is used then /// return linkage name after skipping this special LLVM prefix. static StringRef getRealLinkageName(StringRef LinkageName) { @@ -1159,40 +1190,16 @@ static StringRef getRealLinkageName(StringRef LinkageName) { return LinkageName; } -/// createGlobalVariableDIE - Create new DIE using GV. -DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) { - // If the global variable was optmized out then no need to create debug info - // entry. - if (!GV.getGlobal()) return NULL; - if (GV.getDisplayName().empty()) return NULL; - - DIE *GVDie = new DIE(dwarf::DW_TAG_variable); - addString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - GV.getDisplayName()); - - StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) - addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); - - addType(GVDie, GV.getType()); - if (!GV.isLocalToUnit()) - addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - addSourceLine(GVDie, &GV); - - return GVDie; -} - /// createMemberDIE - Create new member DIE. -DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { +DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) { DIE *MemberDie = new DIE(DT.getTag()); StringRef Name = DT.getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - + addType(MemberDie, DT.getTypeDerivedFrom()); - addSourceLine(MemberDie, &DT); + addSourceLine(MemberDie, DT); DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); @@ -1240,7 +1247,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, VBaseLocationDie); } else addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); @@ -1261,7 +1268,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { } /// createSubprogramDIE - Create new DIE using SP. -DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { +DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) { CompileUnit *SPCU = getCompileUnit(SP); DIE *SPDie = SPCU->getDIE(SP); if (SPDie) @@ -1277,7 +1284,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, getRealLinkageName(LinkageName)); - addSourceLine(SPDie, &SP); + addSourceLine(SPDie, SP); // Add prototyped tag, if C or ObjC. unsigned Lang = SP.getCompileUnit().getLanguage(); @@ -1302,7 +1309,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex()); addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); - ContainingTypeMap.insert(std::make_pair(SPDie, + ContainingTypeMap.insert(std::make_pair(SPDie, SP.getContainingType())); } @@ -1331,10 +1338,14 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { if (!SP.isLocalToUnit()) addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - + if (SP.isOptimized()) addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + if (unsigned isa = Asm->getISAEncoding()) { + addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + } + // DW_TAG_inlined_subroutine may refer to this DIE. SPCU->insertDIE(SP, SPDie); @@ -1394,18 +1405,18 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); - + // There is not any need to generate specification DIE for a function // defined at compile unit level. If a function is defined inside another // function then gdb prefers the definition at top level and but does not - // expect specification DIE in parent function. So avoid creating + // expect specification DIE in parent function. So avoid creating // specification DIE for a function defined inside a function. if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && + !SP.getContext().isFile() && !isSubprogramContext(SP.getContext())) { addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - - // Add arguments. + + // Add arguments. DICompositeType SPTy = SP.getType(); DIArray Args = SPTy.getTypeArray(); unsigned SPTag = SPTy.getTag(); @@ -1420,11 +1431,11 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { } DIE *SPDeclDie = SPDie; SPDie = new DIE(dwarf::DW_TAG_subprogram); - addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, SPDeclDie); SPCU->addDie(SPDie); } - + // Pick up abstract subprogram DIE. if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { SPDie = new DIE(dwarf::DW_TAG_subprogram); @@ -1459,7 +1470,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in - // .debug_range as a uint, size 4, for now. emitDIE will handle + // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); @@ -1480,7 +1491,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); - + addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); @@ -1493,7 +1504,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges(); - assert (Ranges.empty() == false + assert (Ranges.empty() == false && "DbgScope does not have instruction markers!"); // FIXME : .debug_inlined section specification does not clearly state how @@ -1551,16 +1562,14 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { /// constructVariableDIE - Construct a DIE for the given DbgVariable. DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { - // Get the descriptor. - const DIVariable &VD = DV->getVariable(); - StringRef Name = VD.getName(); + StringRef Name = DV->getName(); if (Name.empty()) return NULL; // Translate tag to proper Dwarf tag. The result variable is dropped for // now. unsigned Tag; - switch (VD.getTag()) { + switch (DV->getTag()) { case dwarf::DW_TAG_return_variable: return NULL; case dwarf::DW_TAG_arg_variable: @@ -1586,18 +1595,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { dwarf::DW_FORM_ref4, AbsDIE); else { addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - addSourceLine(VariableDie, &VD); + addSourceLine(VariableDie, DV->getVariable()); // Add variable type. - // FIXME: isBlockByrefVariable should be reformulated in terms of complex - // addresses instead. - if (VD.isBlockByrefVariable()) - addType(VariableDie, getBlockByrefType(VD.getType(), Name)); - else - addType(VariableDie, VD.getType()); + addType(VariableDie, DV->getType()); } - if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial()) + if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial()) addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); if (Scope->isAbstractScope()) { @@ -1623,15 +1627,22 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { const MachineInstr *DVInsn = DVI->second; const MCSymbol *DVLabel = findVariableLabel(DV); bool updated = false; - // FIXME : Handle getNumOperands != 3 + // FIXME : Handle getNumOperands != 3 if (DVInsn->getNumOperands() == 3) { - if (DVInsn->getOperand(0).isReg()) - updated = - addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0)); + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + if (DVInsn->getOperand(1).isImm() && + TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { + addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm()); + updated = true; + } else + updated = addRegisterAddress(VariableDie, DVLabel, RegOp); + } else if (DVInsn->getOperand(0).isImm()) updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isFPImm()) - updated = + else if (DVInsn->getOperand(0).isFPImm()) + updated = addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0)); } else { MachineLocation Location = Asm->getDebugValueLocation(DVInsn); @@ -1651,24 +1662,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { } DV->setDIE(VariableDie); return VariableDie; - } + } // .. else use frame index, if available. - MachineLocation Location; - unsigned FrameReg; - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); int FI = 0; - if (findVariableFrameIndex(DV, &FI)) { - int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - Location.set(FrameReg, Offset); - - if (VD.hasComplexAddress()) - addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else if (VD.isBlockByrefVariable()) - addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else - addAddress(VariableDie, dwarf::DW_AT_location, Location); - } + if (findVariableFrameIndex(DV, &FI)) + addVariableAddress(DV, VariableDie, FI); + DV->setDIE(VariableDie); return VariableDie; @@ -1677,7 +1677,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { void DwarfDebug::addPubTypes(DISubprogram SP) { DICompositeType SPTy = SP.getType(); unsigned SPTag = SPTy.getTag(); - if (SPTag != dwarf::DW_TAG_subroutine_type) + if (SPTag != dwarf::DW_TAG_subroutine_type) return; DIArray Args = SPTy.getTypeArray(); @@ -1699,7 +1699,7 @@ void DwarfDebug::addPubTypes(DISubprogram SP) { DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; - + DIScope DS(Scope->getScopeNode()); DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) @@ -1718,9 +1718,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { else ScopeDIE = constructLexicalScopeDIE(Scope); if (!ScopeDIE) return NULL; - + // Add variables to scope. - const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables(); + const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); for (unsigned i = 0, N = Variables.size(); i < N; ++i) { DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); if (VariableDIE) @@ -1736,9 +1736,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { ScopeDIE->addChild(NestedDIE); } - if (DS.isSubprogram()) + if (DS.isSubprogram()) addPubTypes(DISubprogram(DS)); - + return ScopeDIE; } @@ -1748,6 +1748,8 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { /// maps as well. unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){ unsigned DId; + assert (DirName.empty() == false && "Invalid directory name!"); + StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName); if (DI != DirectoryIdMap.end()) { DId = DI->getValue(); @@ -1789,12 +1791,12 @@ DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) { TheCU->insertDIE(NS, NDie); if (!NS.getName().empty()) addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); - addSourceLine(NDie, &NS); + addSourceLine(NDie, NS); addToContextOwner(NDie, NS.getContext()); return NDie; } -/// constructCompileUnit - Create new CompileUnit for the given +/// constructCompileUnit - Create new CompileUnit for the given /// metadata node with tag DW_TAG_compile_unit. void DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); @@ -1812,9 +1814,12 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { // simplifies debug range entries. addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. This offset is calculated - // during endMoudle(). - addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + // compile unit in debug_line section. + if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList()) + addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("section_line")); + else + addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); if (!Dir.empty()) addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); @@ -1865,64 +1870,98 @@ CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const { return I->second; } +/// isUnsignedDIType - Return true if type encoding is unsigned. +static bool isUnsignedDIType(DIType Ty) { + DIDerivedType DTy(Ty); + if (DTy.Verify()) + return isUnsignedDIType(DTy.getTypeDerivedFrom()); + + DIBasicType BTy(Ty); + if (BTy.Verify()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char) + return true; + } + return false; +} /// constructGlobalVariableDIE - Construct global variable DIE. void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { - DIGlobalVariable DI_GV(N); + DIGlobalVariable GV(N); // If debug information is malformed then ignore it. - if (DI_GV.Verify() == false) + if (GV.Verify() == false) return; // Check for pre-existence. CompileUnit *TheCU = getCompileUnit(N); - if (TheCU->getDIE(DI_GV)) + if (TheCU->getDIE(GV)) return; - DIE *VariableDie = createGlobalVariableDIE(DI_GV); - if (!VariableDie) - return; - - // Add to map. - TheCU->insertDIE(N, VariableDie); + DIType GTy = GV.getType(); + DIE *VariableDIE = new DIE(GV.getTag()); - // Add to context owner. - DIDescriptor GVContext = DI_GV.getContext(); - // Do not create specification DIE if context is either compile unit - // or a subprogram. - if (DI_GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && - !isSubprogramContext(GVContext)) { - // Create specification DIE. - DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, VariableDie); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(DI_GV.getGlobal())); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - TheCU->addDie(VariableSpecDIE); - } else { - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(DI_GV.getGlobal())); - addBlock(VariableDie, dwarf::DW_AT_location, 0, Block); - } - addToContextOwner(VariableDie, GVContext); - - // Expose as global. FIXME - need to check external flag. - TheCU->addGlobal(DI_GV.getName(), VariableDie); + bool isGlobalVariable = GV.getGlobal() != NULL; - DIType GTy = DI_GV.getType(); + // Add name. + addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, + GV.getDisplayName()); + StringRef LinkageName = GV.getLinkageName(); + if (!LinkageName.empty() && isGlobalVariable) + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); + // Add type. + addType(VariableDIE, GTy); if (GTy.isCompositeType() && !GTy.getName().empty() && !GTy.isForwardDecl()) { DIEEntry *Entry = TheCU->getDIEEntry(GTy); assert(Entry && "Missing global type!"); TheCU->addGlobalType(GTy.getName(), Entry->getEntry()); } + // Add scoping info. + if (!GV.isLocalToUnit()) { + addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + // Expose as global. + TheCU->addGlobal(GV.getName(), VariableDIE); + } + // Add line number info. + addSourceLine(VariableDIE, GV); + // Add to map. + TheCU->insertDIE(N, VariableDIE); + // Add to context owner. + DIDescriptor GVContext = GV.getContext(); + addToContextOwner(VariableDIE, GVContext); + // Add location. + if (isGlobalVariable) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getSymbol(GV.getGlobal())); + // Do not create specification DIE if context is either compile unit + // or a subprogram. + if (GV.isDefinition() && !GVContext.isCompileUnit() && + !GVContext.isFile() && !isSubprogramContext(GVContext)) { + // Create specification DIE. + DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); + addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, + dwarf::DW_FORM_ref4, VariableDIE); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + TheCU->addDie(VariableSpecDIE); + } else { + addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + } + } else if (Constant *C = GV.getConstant()) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { + if (isUnsignedDIType(GTy)) + addUInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + CI->getZExtValue()); + else + addSInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + CI->getSExtValue()); + } + } return; } @@ -1965,7 +2004,7 @@ void DwarfDebug::beginModule(Module *M) { DbgFinder.processModule(*M); bool HasDebugInfo = false; - + // Scan all the compile-units to see if there are any marked as the main unit. // if not, we do not generate debug info. for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), @@ -1975,15 +2014,15 @@ void DwarfDebug::beginModule(Module *M) { break; } } - + if (!HasDebugInfo) return; // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); - + // Emit initial sections. EmitSectionLabels(); - + // Create all the compile unit DIEs. for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), E = DbgFinder.compile_unit_end(); I != E; ++I) @@ -1999,6 +2038,11 @@ void DwarfDebug::beginModule(Module *M) { E = DbgFinder.global_variable_end(); I != E; ++I) constructGlobalVariableDIE(*I); + //getOrCreateTypeDIE + if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + // Prime section data. SectionMap.insert(Asm->getObjFileLowering().getTextSection()); @@ -2025,6 +2069,7 @@ void DwarfDebug::beginModule(Module *M) { void DwarfDebug::endModule() { if (!FirstCU) return; const Module *M = MMI->getModule(); + DenseMap<const MDNode *, DbgScope *> DeadFnScopeMap; if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) { for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) { if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue; @@ -2032,25 +2077,27 @@ void DwarfDebug::endModule() { if (!SP.Verify()) continue; // Collect info for variables that were optimized out. + if (!SP.isDefinition()) continue; StringRef FName = SP.getLinkageName(); if (FName.empty()) FName = SP.getName(); - NamedMDNode *NMD = + NamedMDNode *NMD = M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName))); if (!NMD) continue; unsigned E = NMD->getNumOperands(); if (!E) continue; DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL); + DeadFnScopeMap[SP] = Scope; for (unsigned I = 0; I != E; ++I) { DIVariable DV(NMD->getOperand(I)); if (!DV.Verify()) continue; Scope->addVariable(new DbgVariable(DV)); } - + // Construct subprogram DIE and add variables DIEs. constructSubprogramDIE(SP); DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP); - const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables(); + const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); for (unsigned i = 0, N = Variables.size(); i < N; ++i) { DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); if (VariableDIE) @@ -2099,15 +2146,15 @@ void DwarfDebug::endModule() { // Compute DIE offsets and sizes. computeSizeAndOffsets(); - // Emit source line correspondence into a debug line section. - emitDebugLines(); - // Emit all the DIEs into a debug info section emitDebugInfo(); // Corresponding abbreviations into a abbrev section. emitAbbreviations(); + // Emit source line correspondence into a debug line section. + emitDebugLines(); + // Emit info into a debug pubnames section. emitDebugPubNames(); @@ -2131,7 +2178,9 @@ void DwarfDebug::endModule() { // Emit info into a debug str section. emitDebugStr(); - + + // clean up. + DeleteContainerSeconds(DeadFnScopeMap); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) delete I->second; @@ -2139,7 +2188,7 @@ void DwarfDebug::endModule() { } /// findAbstractVariable - Find abstract variable, if any, associated with Var. -DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, +DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, DebugLoc ScopeLoc) { DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var); @@ -2159,7 +2208,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, /// collectVariableInfoFromMMITable - Collect variable information from /// side table maintained by MMI. -void +void DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, SmallPtrSet<const MDNode *, 16> &Processed) { const LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); @@ -2177,7 +2226,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, Scope = ConcreteScopes.lookup(IA); if (Scope == 0) Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx)); - + // If variable scope is not found then skip this variable. if (Scope == 0) continue; @@ -2193,7 +2242,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, } } -/// isDbgValueInUndefinedReg - Return true if debug value, encoded by +/// isDbgValueInUndefinedReg - Return true if debug value, encoded by /// DBG_VALUE instruction, is in undefined reg. static bool isDbgValueInUndefinedReg(const MachineInstr *MI) { assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); @@ -2202,7 +2251,7 @@ static bool isDbgValueInUndefinedReg(const MachineInstr *MI) { return false; } -/// isDbgValueInDefinedReg - Return true if debug value, encoded by +/// isDbgValueInDefinedReg - Return true if debug value, encoded by /// DBG_VALUE instruction, is in a defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); @@ -2212,10 +2261,10 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { } /// collectVariableInfo - Populate DbgScope entries with variables' info. -void +void DwarfDebug::collectVariableInfo(const MachineFunction *MF, SmallPtrSet<const MDNode *, 16> &Processed) { - + /// collection info from MMI table. collectVariableInfoFromMMITable(MF, Processed); @@ -2244,11 +2293,11 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, continue; const MachineInstr *PrevMI = MInsn; - for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1, + for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1, ME = DbgValues.end(); MI != ME; ++MI) { - const MDNode *Var = + const MDNode *Var = (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata(); - if (Var == DV && isDbgValueInDefinedReg(*MI) && + if (Var == DV && isDbgValueInDefinedReg(*MI) && !PrevMI->isIdenticalTo(*MI)) MultipleValues.push_back(*MI); PrevMI = *MI; @@ -2269,7 +2318,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, DbgVariable *RegVar = new DbgVariable(DV); Scope->addVariable(RegVar); if (!CurFnArg) - DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn); + DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn); if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { DbgVariableToDbgInstMap[AbsVar] = MInsn; VarToAbstractVarMap[RegVar] = AbsVar; @@ -2286,26 +2335,39 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); const MachineInstr *Begin = NULL; const MachineInstr *End = NULL; - for (SmallVector<const MachineInstr *, 4>::iterator - MVI = MultipleValues.begin(), MVE = MultipleValues.end(); + for (SmallVector<const MachineInstr *, 4>::iterator + MVI = MultipleValues.begin(), MVE = MultipleValues.end(); MVI != MVE; ++MVI) { if (!Begin) { Begin = *MVI; continue; - } + } End = *MVI; MachineLocation MLoc; - MLoc.set(Begin->getOperand(0).getReg(), 0); + if (Begin->getNumOperands() == 3) { + if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) + MLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm()); + } else + MLoc = Asm->getDebugValueLocation(Begin); + const MCSymbol *FLabel = getLabelBeforeInsn(Begin); const MCSymbol *SLabel = getLabelBeforeInsn(End); - DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc)); + if (MLoc.getReg()) + DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc)); + Begin = End; if (MVI + 1 == MVE) { // If End is the last instruction then its value is valid // until the end of the funtion. - MLoc.set(End->getOperand(0).getReg(), 0); - DotDebugLocEntries. - push_back(DotDebugLocEntry(SLabel, FunctionEndSym, MLoc)); + MachineLocation EMLoc; + if (End->getNumOperands() == 3) { + if (End->getOperand(0).isReg() && Begin->getOperand(1).isImm()) + EMLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm()); + } else + EMLoc = Asm->getDebugValueLocation(End); + if (EMLoc.getReg()) + DotDebugLocEntries. + push_back(DotDebugLocEntry(SLabel, FunctionEndSym, EMLoc)); } } DotDebugLocEntries.push_back(DotDebugLocEntry()); @@ -2314,11 +2376,11 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, // Collect info for variables that were optimized out. const Function *F = MF->getFunction(); const Module *M = F->getParent(); - if (NamedMDNode *NMD = - M->getNamedMetadata(Twine("llvm.dbg.lv.", + if (NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(F->getName())))) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i))); + DIVariable DV(cast<MDNode>(NMD->getOperand(i))); if (!DV || !Processed.insert(DV)) continue; DbgScope *Scope = DbgScopeMap.lookup(DV.getContext()); @@ -2364,7 +2426,7 @@ void DwarfDebug::beginScope(const MachineInstr *MI) { return; } - // If location is unknown then use temp label for this DBG_VALUE + // If location is unknown then use temp label for this DBG_VALUE // instruction. if (MI->isDebugValue()) { PrevLabel = MMI->getContext().CreateTempSymbol(); @@ -2393,7 +2455,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) { } /// getOrCreateDbgScope - Create DbgScope for the scope. -DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, +DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt) { if (!InlinedAt) { DbgScope *WScope = DbgScopeMap.lookup(Scope); @@ -2402,7 +2464,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL); DbgScopeMap.insert(std::make_pair(Scope, WScope)); if (DIDescriptor(Scope).isLexicalBlock()) { - DbgScope *Parent = + DbgScope *Parent = getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL); WScope->setParent(Parent); Parent->addScope(WScope); @@ -2419,7 +2481,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, DISubprogram(Scope).getFunction() == Asm->MF->getFunction()) CurrentFnDbgScope = WScope; } - + return WScope; } @@ -2448,14 +2510,14 @@ static bool hasValidLocation(LLVMContext &Ctx, const MDNode *&Scope, const MDNode *&InlinedAt) { DebugLoc DL = MInsn->getDebugLoc(); if (DL.isUnknown()) return false; - + const MDNode *S = DL.getScope(Ctx); - + // There is no need to create another DIE for compile unit. For all // other scopes, create one DbgScope now. This will be translated // into a scope DIE at the end. if (DIScope(S).isCompileUnit()) return false; - + Scope = S; InlinedAt = DL.getInlinedAt(Ctx); return true; @@ -2490,7 +2552,7 @@ static void calculateDominanceGraph(DbgScope *Scope) { } /// printDbgScopeInfo - Print DbgScope info for each machine instruction. -static +static void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap) { @@ -2507,9 +2569,9 @@ void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, // Check if instruction has valid location information. if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { dbgs() << " [ "; - if (InlinedAt) + if (InlinedAt) dbgs() << "*"; - DenseMap<const MachineInstr *, DbgScope *>::iterator DI = + DenseMap<const MachineInstr *, DbgScope *>::iterator DI = MI2ScopeMap.find(MInsn); if (DI != MI2ScopeMap.end()) { DbgScope *S = DI->second; @@ -2517,7 +2579,7 @@ void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, PrevDFSIn = S->getDFSIn(); } else dbgs() << PrevDFSIn; - } else + } else dbgs() << " [ x" << PrevDFSIn; dbgs() << " ]"; MInsn->dump(); @@ -2555,26 +2617,26 @@ bool DwarfDebug::extractScopeInformation() { PrevMI = MInsn; continue; } - + // If scope has not changed then skip this instruction. if (Scope == PrevScope && PrevInlinedAt == InlinedAt) { PrevMI = MInsn; continue; } - if (RangeBeginMI) { - // If we have alread seen a beginning of a instruction range and + if (RangeBeginMI) { + // If we have alread seen a beginning of a instruction range and // current instruction scope does not match scope of first instruction // in this range then create a new instruction range. DbgRange R(RangeBeginMI, PrevMI); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); MIRanges.push_back(R); - } + } // This is a beginning of a new instruction range. RangeBeginMI = MInsn; - + // Reset previous markers. PrevMI = MInsn; PrevScope = Scope; @@ -2588,7 +2650,7 @@ bool DwarfDebug::extractScopeInformation() { MIRanges.push_back(R); MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); } - + if (!CurrentFnDbgScope) return false; @@ -2618,7 +2680,7 @@ bool DwarfDebug::extractScopeInformation() { return !DbgScopeMap.empty(); } -/// identifyScopeMarkers() - +/// identifyScopeMarkers() - /// Each DbgScope has first instruction and last instruction to mark beginning /// and end of a scope respectively. Create an inverse map that list scopes /// starts (and ends) with an instruction. One instruction may start (or end) @@ -2628,23 +2690,23 @@ void DwarfDebug::identifyScopeMarkers() { WorkList.push_back(CurrentFnDbgScope); while (!WorkList.empty()) { DbgScope *S = WorkList.pop_back_val(); - + const SmallVector<DbgScope *, 4> &Children = S->getScopes(); - if (!Children.empty()) + if (!Children.empty()) for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(), SE = Children.end(); SI != SE; ++SI) WorkList.push_back(*SI); if (S->isAbstractScope()) continue; - + const SmallVector<DbgRange, 4> &Ranges = S->getRanges(); if (Ranges.empty()) continue; for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { - assert(RI->first && "DbgRange does not have first instruction!"); - assert(RI->second && "DbgRange does not have second instruction!"); + assert(RI->first && "DbgRange does not have first instruction!"); + assert(RI->second && "DbgRange does not have second instruction!"); InsnsEndScopeSet.insert(RI->second); } } @@ -2680,20 +2742,23 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // function. DebugLoc FDL = FindFirstDebugLoc(MF); if (FDL.isUnknown()) return; - + const MDNode *Scope = FDL.getScope(MF->getFunction()->getContext()); - + const MDNode *TheScope = 0; + DISubprogram SP = getDISubprogram(Scope); unsigned Line, Col; if (SP.Verify()) { Line = SP.getLineNumber(); Col = 0; + TheScope = SP; } else { Line = FDL.getLine(); Col = FDL.getCol(); + TheScope = Scope; } - - recordSourceLine(Line, Col, Scope); + + recordSourceLine(Line, Col, TheScope); /// ProcessedArgs - Collection of arguments already processed. SmallPtrSet<const MDNode *, 8> ProcessedArgs; @@ -2710,7 +2775,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata()); if (!DV.Verify()) continue; // If DBG_VALUE is for a local variable then it needs a label. - if (DV.getTag() != dwarf::DW_TAG_arg_variable + if (DV.getTag() != dwarf::DW_TAG_arg_variable && isDbgValueInUndefinedReg(MI) == false) InsnNeedsLabel.insert(MI); // DBG_VALUE for inlined functions argument needs a label. @@ -2718,10 +2783,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { describes(MF->getFunction())) InsnNeedsLabel.insert(MI); // DBG_VALUE indicating argument location change needs a label. - else if (isDbgValueInUndefinedReg(MI) == false && !ProcessedArgs.insert(DV)) + else if (isDbgValueInUndefinedReg(MI) == false + && !ProcessedArgs.insert(DV)) InsnNeedsLabel.insert(MI); } else { - // If location is unknown then instruction needs a location only if + // If location is unknown then instruction needs a location only if // UnknownLocations flag is set. if (DL.isUnknown()) { if (UnknownLocations && !PrevLoc.isUnknown()) @@ -2730,7 +2796,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Otherwise, instruction needs a location only if it is new location. InsnNeedsLabel.insert(MI); } - + if (!DL.isUnknown() || UnknownLocations) PrevLoc = DL; } @@ -2750,7 +2816,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionEndSym); - + SmallPtrSet<const MDNode *, 16> ProcessedVars; collectVariableInfo(MF, ProcessedVars); @@ -2764,7 +2830,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { SectionLineInfos.insert(SectionLineInfos.end(), Lines.begin(), Lines.end()); } - + // Construct abstract scopes. for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(), AE = AbstractScopesList.end(); AI != AE; ++AI) { @@ -2775,11 +2841,11 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { if (FName.empty()) FName = SP.getName(); const Module *M = MF->getFunction()->getParent(); - if (NamedMDNode *NMD = - M->getNamedMetadata(Twine("llvm.dbg.lv.", + if (NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName)))) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i))); + DIVariable DV(cast<MDNode>(NMD->getOperand(i))); if (!DV || !ProcessedVars.insert(DV)) continue; DbgScope *Scope = AbstractScopes.lookup(DV.getContext()); @@ -2793,9 +2859,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { } DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); - + if (!DisableFramePointerElim(*MF)) - addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, + addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, dwarf::DW_FORM_flag, 1); @@ -2849,22 +2915,22 @@ const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) { else return I->second; } -/// findDbgScope - Find DbgScope for the debug loc attached with an +/// findDbgScope - Find DbgScope for the debug loc attached with an /// instruction. DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { DbgScope *Scope = NULL; - LLVMContext &Ctx = + LLVMContext &Ctx = MInsn->getParent()->getParent()->getFunction()->getContext(); DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) + if (DL.isUnknown()) return Scope; if (const MDNode *IA = DL.getInlinedAt(Ctx)) Scope = ConcreteScopes.lookup(IA); if (Scope == 0) Scope = DbgScopeMap.lookup(DL.getScope(Ctx)); - + return Scope; } @@ -2872,7 +2938,7 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. -MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, +MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S) { StringRef Dir; StringRef Fn; @@ -2899,16 +2965,6 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, Src = GetOrCreateSourceID(Dir, Fn); } -#if 0 - if (!Lines.empty()) { - SrcLineInfo lastSrcLineInfo = Lines.back(); - // Emitting sequential line records with the same line number (but - // different addresses) seems to confuse GDB. Avoid this. - if (lastSrcLineInfo.getLine() == Line) - return NULL; - } -#endif - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); Lines.push_back(SrcLineInfo(Line, Col, Src, Label)); @@ -2991,7 +3047,7 @@ static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section, const char *SymbolStem = 0) { Asm->OutStreamer.SwitchSection(Section); if (!SymbolStem) return 0; - + MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); Asm->OutStreamer.EmitLabel(TmpSym); return TmpSym; @@ -3008,21 +3064,20 @@ void DwarfDebug::EmitSectionLabels() { EmitSectionSym(Asm, TLOF.getDwarfFrameSection(), "section_debug_frame"); } - DwarfInfoSectionSym = + DwarfInfoSectionSym = EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); - DwarfAbbrevSectionSym = + DwarfAbbrevSectionSym = EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); EmitSectionSym(Asm, TLOF.getDwarfARangesSection()); - + if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) EmitSectionSym(Asm, MacroInfo); - DwarfDebugLineSectionSym = - EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); + EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); EmitSectionSym(Asm, TLOF.getDwarfLocSection()); EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); - DwarfStrSectionSym = + DwarfStrSectionSym = EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str"); DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); @@ -3060,7 +3115,7 @@ void DwarfDebug::emitDIE(DIE *Die) { if (Asm->isVerbose()) Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); - + switch (Attr) { case dwarf::DW_AT_sibling: Asm->EmitInt32(Die->getSiblingOffset()); @@ -3075,15 +3130,17 @@ void DwarfDebug::emitDIE(DIE *Die) { case dwarf::DW_AT_ranges: { // DW_AT_range Value encodes offset in debug_range section. DIEInteger *V = cast<DIEInteger>(Values[i]); - Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym, - V->getValue(), - DwarfDebugRangeSectionSym, - 4); - break; - } - case dwarf::DW_AT_stmt_list: { - Asm->EmitLabelDifference(CurrentLineSectionSym, - DwarfDebugLineSectionSym, 4); + + if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) { + Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, + V->getValue(), + 4); + } else { + Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym, + V->getValue(), + DwarfDebugRangeSectionSym, + 4); + } break; } case dwarf::DW_AT_location: { @@ -3124,18 +3181,18 @@ void DwarfDebug::emitDebugInfo() { E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; DIE *Die = TheCU->getCUDie(); - + // Emit the compile units header. Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin", TheCU->getID())); - + // Emit size of content not including length itself unsigned ContentSize = Die->getSize() + sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t) + // Pointer Size (in bytes) sizeof(int32_t); // FIXME - extra pad for gdb bug. - + Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); Asm->EmitInt32(ContentSize); Asm->OutStreamer.AddComment("DWARF version number"); @@ -3145,7 +3202,7 @@ void DwarfDebug::emitDebugInfo() { DwarfAbbrevSectionSym); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getTargetData().getPointerSize()); - + emitDIE(Die); // FIXME - extra padding for gdb bug. Asm->OutStreamer.AddComment("4 extra padding bytes for GDB"); @@ -3194,7 +3251,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Define last address of section. Asm->OutStreamer.AddComment("Extended Op"); Asm->EmitInt8(0); - + Asm->OutStreamer.AddComment("Op size"); Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1); Asm->OutStreamer.AddComment("DW_LNE_set_address"); @@ -3231,15 +3288,13 @@ void DwarfDebug::emitDebugLines() { Asm->getObjFileLowering().getDwarfLineSection()); // Construct the section header. - CurrentLineSectionSym = Asm->GetTempSymbol("section_line_begin"); - Asm->OutStreamer.EmitLabel(CurrentLineSectionSym); Asm->OutStreamer.AddComment("Length of Source Line Info"); Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"), Asm->GetTempSymbol("line_begin"), 4); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_begin")); Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->OutStreamer.AddComment("Prolog Length"); Asm->EmitLabelDifference(Asm->GetTempSymbol("line_prolog_end"), @@ -3294,7 +3349,7 @@ void DwarfDebug::emitDebugLines() { const std::string &FN = getSourceFileName(Id.second); if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Source"); Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0); - + Asm->EmitULEB128(Id.first, "Directory #"); Asm->EmitULEB128(0, "Mod date"); Asm->EmitULEB128(0, "File size"); @@ -3338,18 +3393,18 @@ void DwarfDebug::emitDebugLines() { Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1); Asm->OutStreamer.AddComment("DW_LNE_set_address"); - Asm->EmitInt8(dwarf::DW_LNE_set_address); + Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->OutStreamer.AddComment("Location label"); Asm->OutStreamer.EmitSymbolValue(Label, Asm->getTargetData().getPointerSize(), 0/*AddrSpace*/); - + // If change of source, then switch to the new source. if (Source != LineInfo.getSourceID()) { Source = LineInfo.getSourceID(); Asm->OutStreamer.AddComment("DW_LNS_set_file"); - Asm->EmitInt8(dwarf::DW_LNS_set_file); + Asm->EmitInt8(dwarf::DW_LNS_set_file); Asm->EmitULEB128(Source, "New Source"); } @@ -3457,7 +3512,7 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) { Asm->OutStreamer.EmitLabel(DebugFrameBegin); Asm->OutStreamer.AddComment("FDE CIE offset"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"), + Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"), DwarfFrameSectionSym); Asm->OutStreamer.AddComment("FDE initial location"); @@ -3466,8 +3521,8 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) { Asm->OutStreamer.EmitSymbolValue(FuncBeginSym, Asm->getTargetData().getPointerSize(), 0/*AddrSpace*/); - - + + Asm->OutStreamer.AddComment("FDE address range"); Asm->EmitLabelDifference(Asm->GetTempSymbol("func_end",DebugFrameInfo.Number), FuncBeginSym, Asm->getTargetData().getPointerSize()); @@ -3487,41 +3542,41 @@ void DwarfDebug::emitDebugPubNames() { // Start the dwarf pubnames section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfPubNamesSection()); - + Asm->OutStreamer.AddComment("Length of Public Names Info"); Asm->EmitLabelDifference( Asm->GetTempSymbol("pubnames_end", TheCU->getID()), Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4); - + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", TheCU->getID())); - + Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - + Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), + Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), DwarfInfoSectionSym); - + Asm->OutStreamer.AddComment("Compilation Unit Length"); Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), Asm->GetTempSymbol("info_begin", TheCU->getID()), 4); - + const StringMap<DIE*> &Globals = TheCU->getGlobals(); for (StringMap<DIE*>::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); DIE *Entity = GI->second; - + Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); - + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); } - + Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", @@ -3540,37 +3595,37 @@ void DwarfDebug::emitDebugPubTypes() { Asm->EmitLabelDifference( Asm->GetTempSymbol("pubtypes_end", TheCU->getID()), Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4); - + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", TheCU->getID())); - + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); Asm->EmitInt16(dwarf::DWARF_VERSION); - + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), DwarfInfoSectionSym); - + Asm->OutStreamer.AddComment("Compilation Unit Length"); Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), Asm->GetTempSymbol("info_begin", TheCU->getID()), 4); - + const StringMap<DIE*> &Globals = TheCU->getGlobalTypes(); for (StringMap<DIE*>::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); DIE * Entity = GI->second; - + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); - + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); } - + Asm->OutStreamer.AddComment("End Mark"); - Asm->EmitInt32(0); + Asm->EmitInt32(0); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", TheCU->getID())); } @@ -3581,26 +3636,26 @@ void DwarfDebug::emitDebugPubTypes() { void DwarfDebug::emitDebugStr() { // Check to see if it is worth the effort. if (StringPool.empty()) return; - + // Start the dwarf str section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfStrSection()); // Get all of the string pool entries and put them in an array by their ID so // we can sort them. - SmallVector<std::pair<unsigned, + SmallVector<std::pair<unsigned, StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries; - + for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator I = StringPool.begin(), E = StringPool.end(); I != E; ++I) Entries.push_back(std::make_pair(I->second.second, &*I)); - + array_pod_sort(Entries.begin(), Entries.end()); - + for (unsigned i = 0, e = Entries.size(); i != e; ++i) { // Emit a label for reference from debug information entries. Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first); - + // Emit the string itself. Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/); } @@ -3618,8 +3673,8 @@ void DwarfDebug::emitDebugLoc() { unsigned char Size = Asm->getTargetData().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; - for (SmallVector<DotDebugLocEntry, 4>::iterator - I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); + for (SmallVector<DotDebugLocEntry, 4>::iterator + I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); I != E; ++I, ++index) { DotDebugLocEntry Entry = *I; if (Entry.isEmpty()) { @@ -3631,15 +3686,30 @@ void DwarfDebug::emitDebugLoc() { Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false); - if (Reg < 32) { + if (int Offset = Entry.Loc.getOffset()) { + // If the value is at a certain offset from frame register then + // use DW_OP_fbreg. + unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1; Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1); - Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg); + Asm->EmitInt16(1 + OffsetSize); + Asm->OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_fbreg)); + Asm->EmitInt8(dwarf::DW_OP_fbreg); + Asm->OutStreamer.AddComment("Offset"); + Asm->EmitSLEB128(Offset); } else { - Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1+MCAsmInfo::getULEB128Size(Reg)); - Asm->EmitInt8(dwarf::DW_OP_regx); - Asm->EmitULEB128(Reg); + if (Reg < 32) { + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(1); + Asm->OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); + Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg); + } else { + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg)); + Asm->EmitInt8(dwarf::DW_OP_regx); + Asm->EmitULEB128(Reg); + } } } } @@ -3661,7 +3731,7 @@ void DwarfDebug::emitDebugRanges() { Asm->getObjFileLowering().getDwarfRangesSection()); unsigned char Size = Asm->getTargetData().getPointerSize(); for (SmallVector<const MCSymbol *, 8>::iterator - I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); + I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); I != E; ++I) { if (*I) Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0); @@ -3734,7 +3804,7 @@ void DwarfDebug::emitDebugInlineInfo() { if (LName.empty()) { Asm->OutStreamer.EmitBytes(Name, 0); Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator. - } else + } else Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)), DwarfStrSectionSym); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 5a281c8..f0ff3bc 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -261,7 +261,6 @@ class DwarfDebug { MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; MCSymbol *DwarfDebugLocSectionSym; - MCSymbol *DwarfDebugLineSectionSym, *CurrentLineSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; DIEInteger *DIEIntegerOne; @@ -338,11 +337,11 @@ private: /// addSourceLine - Add location information to specified debug information /// entry. - void addSourceLine(DIE *Die, const DIVariable *V); - void addSourceLine(DIE *Die, const DIGlobalVariable *G); - void addSourceLine(DIE *Die, const DISubprogram *SP); - void addSourceLine(DIE *Die, const DIType *Ty); - void addSourceLine(DIE *Die, const DINameSpace *NS); + void addSourceLine(DIE *Die, DIVariable V); + void addSourceLine(DIE *Die, DIGlobalVariable G); + void addSourceLine(DIE *Die, DISubprogram SP); + void addSourceLine(DIE *Die, DIType Ty); + void addSourceLine(DIE *Die, DINameSpace NS); /// addAddress - Add an address attribute to a die based on the location /// provided. @@ -376,6 +375,10 @@ private: void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location); + /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based + /// on provided frame index. + void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI); + /// addToContextOwner - Add Die into the list of its context owner's children. void addToContextOwner(DIE *Die, DIDescriptor Context); @@ -414,14 +417,11 @@ private: /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. DIE *constructEnumTypeDIE(DIEnumerator ETy); - /// createGlobalVariableDIE - Create new DIE using GV. - DIE *createGlobalVariableDIE(const DIGlobalVariable &GV); - /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(const DIDerivedType &DT); + DIE *createMemberDIE(DIDerivedType DT); /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false); + DIE *createSubprogramDIE(DISubprogram SP, bool MakeDecl = false); /// getOrCreateDbgScope - Create DbgScope for the scope. DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); @@ -560,12 +560,6 @@ private: /// construct SubprogramDIE - Construct subprogram DIE. void constructSubprogramDIE(const MDNode *N); - // FIXME: This should go away in favor of complex addresses. - /// Find the type the programmer originally declared the variable to be - /// and return that type. Obsolete, use GetComplexAddrType instead. - /// - DIType getBlockByrefType(DIType Ty, std::string Name); - /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp index c872840..86a3688 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -894,7 +894,7 @@ void DwarfException::EndModule() { if (!shouldEmitMovesModule && !shouldEmitTableModule) return; - const std::vector<const Function *> Personalities = MMI->getPersonalities(); + const std::vector<const Function*> &Personalities = MMI->getPersonalities(); for (unsigned I = 0, E = Personalities.size(); I < E; ++I) EmitCIE(Personalities[I], I); diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index 7f98df0..cb81aa3 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -65,7 +65,7 @@ namespace { public: static char ID; explicit BranchFolderPass(bool defaultEnableTailMerge) - : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {} + : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "Control Flow Optimizer"; } diff --git a/contrib/llvm/lib/CodeGen/CMakeLists.txt b/contrib/llvm/lib/CodeGen/CMakeLists.txt index ffeff1e..2ef115d 100644 --- a/contrib/llvm/lib/CodeGen/CMakeLists.txt +++ b/contrib/llvm/lib/CodeGen/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen LiveIntervalAnalysis.cpp LiveStackAnalysis.cpp LiveVariables.cpp + LocalStackSlotAllocation.cpp LowerSubregs.cpp MachineBasicBlock.cpp MachineCSE.cpp @@ -42,10 +43,10 @@ add_llvm_library(LLVMCodeGen MachineVerifier.cpp ObjectCodeEmitter.cpp OcamlGC.cpp - OptimizeExts.cpp OptimizePHIs.cpp PHIElimination.cpp Passes.cpp + PeepholeOptimizer.cpp PostRAHazardRecognizer.cpp PostRASchedulerList.cpp PreAllocSplitting.cpp @@ -57,6 +58,7 @@ add_llvm_library(LLVMCodeGen RegAllocPBQP.cpp RegisterCoalescer.cpp RegisterScavenging.cpp + RenderMachineFunction.cpp ScheduleDAG.cpp ScheduleDAGEmit.cpp ScheduleDAGInstrs.cpp @@ -67,6 +69,8 @@ add_llvm_library(LLVMCodeGen SjLjEHPrepare.cpp SlotIndexes.cpp Spiller.cpp + SplitKit.cpp + Splitter.cpp StackProtector.cpp StackSlotColoring.cpp StrongPHIElimination.cpp diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index 240a7b9..1b7e08a 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -25,8 +25,8 @@ using namespace llvm; char CalculateSpillWeights::ID = 0; -static RegisterPass<CalculateSpillWeights> X("calcspillweights", - "Calculate spill weights"); +INITIALIZE_PASS(CalculateSpillWeights, "calcspillweights", + "Calculate spill weights", false, false); void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<LiveIntervals>(); @@ -41,108 +41,184 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { << "********** Function: " << fn.getFunction()->getName() << '\n'); - LiveIntervals *lis = &getAnalysis<LiveIntervals>(); - MachineLoopInfo *loopInfo = &getAnalysis<MachineLoopInfo>(); - const TargetInstrInfo *tii = fn.getTarget().getInstrInfo(); - MachineRegisterInfo *mri = &fn.getRegInfo(); - - SmallSet<unsigned, 4> processed; - for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end(); - mbbi != mbbe; ++mbbi) { - MachineBasicBlock* mbb = mbbi; - SlotIndex mbbEnd = lis->getMBBEndIdx(mbb); - MachineLoop* loop = loopInfo->getLoopFor(mbb); - unsigned loopDepth = loop ? loop->getLoopDepth() : 0; - bool isExiting = loop ? loop->isLoopExiting(mbb) : false; - - for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end(); - mii != mie; ++mii) { - const MachineInstr *mi = mii; - if (tii->isIdentityCopy(*mi) || mi->isImplicitDef() || mi->isDebugValue()) - continue; - - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - const MachineOperand &mopi = mi->getOperand(i); - if (!mopi.isReg() || mopi.getReg() == 0) - continue; - unsigned reg = mopi.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) - continue; - // Multiple uses of reg by the same instruction. It should not - // contribute to spill weight again. - if (!processed.insert(reg)) - continue; - - bool hasDef = mopi.isDef(); - bool hasUse = !hasDef; - for (unsigned j = i+1; j != e; ++j) { - const MachineOperand &mopj = mi->getOperand(j); - if (!mopj.isReg() || mopj.getReg() != reg) - continue; - hasDef |= mopj.isDef(); - hasUse |= mopj.isUse(); - if (hasDef && hasUse) - break; - } - - LiveInterval ®Int = lis->getInterval(reg); - float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth); - if (hasDef && isExiting) { - // Looks like this is a loop count variable update. - SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex(); - const LiveRange *dlr = - lis->getInterval(reg).getLiveRangeContaining(defIdx); - if (dlr->end >= mbbEnd) - weight *= 3.0F; - } - regInt.weight += weight; - } - processed.clear(); - } + LiveIntervals &lis = getAnalysis<LiveIntervals>(); + VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>()); + for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) { + LiveInterval &li = *I->second; + if (TargetRegisterInfo::isVirtualRegister(li.reg)) + vrai.CalculateWeightAndHint(li); + } + return false; +} + +// Return the preferred allocation register for reg, given a COPY instruction. +static unsigned copyHint(const MachineInstr *mi, unsigned reg, + const TargetRegisterInfo &tri, + const MachineRegisterInfo &mri) { + unsigned sub, hreg, hsub; + if (mi->getOperand(0).getReg() == reg) { + sub = mi->getOperand(0).getSubReg(); + hreg = mi->getOperand(1).getReg(); + hsub = mi->getOperand(1).getSubReg(); + } else { + sub = mi->getOperand(1).getSubReg(); + hreg = mi->getOperand(0).getReg(); + hsub = mi->getOperand(0).getSubReg(); } - for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) { - LiveInterval &li = *I->second; - if (TargetRegisterInfo::isVirtualRegister(li.reg)) { - // If the live interval length is essentially zero, i.e. in every live - // range the use follows def immediately, it doesn't make sense to spill - // it and hope it will be easier to allocate for this li. - if (isZeroLengthInterval(&li)) { - li.weight = HUGE_VALF; - continue; - } - - bool isLoad = false; - SmallVector<LiveInterval*, 4> spillIs; - if (lis->isReMaterializable(li, spillIs, isLoad)) { - // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If none of the defs are - // loads, then it's potentially very cheap to re-materialize. - // FIXME: this gets much more complicated once we support non-trivial - // re-materialization. - if (isLoad) - li.weight *= 0.9F; - else - li.weight *= 0.5F; - } - - // Slightly prefer live interval that has been assigned a preferred reg. - std::pair<unsigned, unsigned> Hint = mri->getRegAllocationHint(li.reg); - if (Hint.first || Hint.second) - li.weight *= 1.01F; - - lis->normalizeSpillWeight(li); + if (!hreg) + return 0; + + if (TargetRegisterInfo::isVirtualRegister(hreg)) + return sub == hsub ? hreg : 0; + + const TargetRegisterClass *rc = mri.getRegClass(reg); + + // Only allow physreg hints in rc. + if (sub == 0) + return rc->contains(hreg) ? hreg : 0; + + // reg:sub should match the physreg hreg. + return tri.getMatchingSuperReg(hreg, sub, rc); +} + +void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { + MachineRegisterInfo &mri = mf_.getRegInfo(); + const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo(); + MachineBasicBlock *mbb = 0; + MachineLoop *loop = 0; + unsigned loopDepth = 0; + bool isExiting = false; + float totalWeight = 0; + SmallPtrSet<MachineInstr*, 8> visited; + + // Find the best physreg hist and the best virtreg hint. + float bestPhys = 0, bestVirt = 0; + unsigned hintPhys = 0, hintVirt = 0; + + // Don't recompute a target specific hint. + bool noHint = mri.getRegAllocationHint(li.reg).first != 0; + + for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg); + MachineInstr *mi = I.skipInstruction();) { + if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) + continue; + if (!visited.insert(mi)) + continue; + + // Get loop info for mi. + if (mi->getParent() != mbb) { + mbb = mi->getParent(); + loop = loops_.getLoopFor(mbb); + loopDepth = loop ? loop->getLoopDepth() : 0; + isExiting = loop ? loop->isLoopExiting(mbb) : false; + } + + // Calculate instr weight. + bool reads, writes; + tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + float weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); + + // Give extra weight to what looks like a loop induction variable update. + if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb)) + weight *= 3; + + totalWeight += weight; + + // Get allocation hints from copies. + if (noHint || !mi->isCopy()) + continue; + unsigned hint = copyHint(mi, li.reg, tri, mri); + if (!hint) + continue; + float hweight = hint_[hint] += weight; + if (TargetRegisterInfo::isPhysicalRegister(hint)) { + if (hweight > bestPhys && lis_.isAllocatable(hint)) + bestPhys = hweight, hintPhys = hint; + } else { + if (hweight > bestVirt) + bestVirt = hweight, hintVirt = hint; } } - - return false; + + hint_.clear(); + + // Always prefer the physreg hint. + if (unsigned hint = hintPhys ? hintPhys : hintVirt) { + mri.setRegAllocationHint(li.reg, 0, hint); + // Weakly boost the spill weifght of hinted registers. + totalWeight *= 1.01F; + } + + // Mark li as unspillable if all live ranges are tiny. + if (li.isZeroLength()) { + li.markNotSpillable(); + return; + } + + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. If none of the defs are + // loads, then it's potentially very cheap to re-materialize. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + bool isLoad = false; + SmallVector<LiveInterval*, 4> spillIs; + if (lis_.isReMaterializable(li, spillIs, isLoad)) { + if (isLoad) + totalWeight *= 0.9F; + else + totalWeight *= 0.5F; + } + + li.weight = totalWeight; + lis_.normalizeSpillWeight(li); } -/// Returns true if the given live interval is zero length. -bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const { - for (LiveInterval::Ranges::const_iterator - i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) - if (i->end.getPrevIndex() > i->start) - return false; - return true; +void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { + MachineRegisterInfo &mri = mf_.getRegInfo(); + const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo(); + const TargetRegisterClass *orc = mri.getRegClass(reg); + SmallPtrSet<const TargetRegisterClass*,8> rcs; + + for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg), + E = mri.reg_nodbg_end(); I != E; ++I) { + // The targets don't have accurate enough regclass descriptions that we can + // handle subregs. We need something similar to + // TRI::getMatchingSuperRegClass, but returning a super class instead of a + // sub class. + if (I.getOperand().getSubReg()) { + DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n'); + return; + } + if (const TargetRegisterClass *rc = + I->getDesc().getRegClass(I.getOperandNo(), tri)) + rcs.insert(rc); + } + + // If we found no regclass constraints, just leave reg as is. + // In theory, we could inflate to the largest superclass of reg's existing + // class, but that might not be legal for the current cpu setting. + // This could happen if reg is only used by COPY instructions, so we may need + // to improve on this. + if (rcs.empty()) { + return; + } + + // Compute the intersection of all classes in rcs. + // This ought to be independent of iteration order, but if the target register + // classes don't form a proper algebra, it is possible to get different + // results. The solution is to make sure the intersection of any two register + // classes is also a register class or the null set. + const TargetRegisterClass *rc = 0; + for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(), + E = rcs.end(); I != E; ++I) { + rc = rc ? getCommonSubClass(rc, *I) : *I; + assert(rc && "Incompatible regclass constraints found"); + } + + if (rc == orc) + return; + DEBUG(dbgs() << "Inflating " << orc->getName() << ":%reg" << reg << " to " + << rc->getName() <<".\n"); + mri.setRegClass(reg, rc); } diff --git a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp index e0e315c..91a9536 100644 --- a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp +++ b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp @@ -36,7 +36,7 @@ namespace { public: static char ID; - CodePlacementOpt() : MachineFunctionPass(&ID) {} + CodePlacementOpt() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index e3746a9..335d2d8 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -32,21 +32,21 @@ CriticalAntiDepBreaker(MachineFunction& MFi) : MRI(MF.getRegInfo()), TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), - AllocatableSet(TRI->getAllocatableSet(MF)) -{ -} + AllocatableSet(TRI->getAllocatableSet(MF)), + Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)), + KillIndices(TRI->getNumRegs(), 0), + DefIndices(TRI->getNumRegs(), 0) {} CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { } void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { - // Clear out the register class data. - std::fill(Classes, array_endof(Classes), - static_cast<const TargetRegisterClass *>(0)); - - // Initialize the indices to indicate that no registers are live. const unsigned BBSize = BB->size(); - for (unsigned i = 0; i < TRI->getNumRegs(); ++i) { + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { + // Clear out the register class data. + Classes[i] = static_cast<const TargetRegisterClass *>(0); + + // Initialize the indices to indicate that no registers are live. KillIndices[i] = ~0u; DefIndices[i] = BBSize; } @@ -65,6 +65,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; + // Repeat, for all aliases. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; @@ -86,6 +87,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; + // Repeat, for all aliases. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; @@ -106,6 +108,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; + // Repeat, for all aliases. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; @@ -134,8 +137,10 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); + // Mark this register to be non-renamable. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + // Move the def index to the end of the previous region, to reflect // that the def could theoretically have been scheduled at the end. DefIndices[Reg] = InsertPosIndex; @@ -325,6 +330,8 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI, for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), RE = RC->allocation_order_end(MF); R != RE; ++R) { unsigned NewReg = *R; + // Don't consider non-allocatable registers + if (!AllocatableSet.test(NewReg)) continue; // Don't replace a register with itself. if (NewReg == AntiDepReg) continue; // Don't replace a register with one that was recently used to repair @@ -433,7 +440,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // fix that remaining critical edge too. This is a little more involved, // because unlike the most recent register, less recent registers should // still be considered, though only if no other registers are available. - unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; + std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0); // Attempt to break anti-dependence edges on the critical path. Walk the // instructions from the bottom up, tracking information about liveness diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h index 5406300..0ed7c35 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -46,19 +46,18 @@ class TargetRegisterInfo; /// corresponding value is null. If the register is live but used in /// multiple register classes, the corresponding value is -1 casted to a /// pointer. - const TargetRegisterClass * - Classes[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<const TargetRegisterClass*> Classes; /// RegRegs - Map registers to all their references within a live range. std::multimap<unsigned, MachineOperand *> RegRefs; /// KillIndices - The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. - unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> KillIndices; /// DefIndices - The index of the most recent complete def (proceding bottom /// up), or ~0u if the register is live. - unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> DefIndices; /// KeepRegs - A set of registers which are live and cannot be changed to /// break anti-dependencies. diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index d69c995..318d922 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -36,7 +36,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - DeadMachineInstructionElim() : MachineFunctionPass(&ID) {} + DeadMachineInstructionElim() : MachineFunctionPass(ID) {} private: bool isDead(const MachineInstr *MI) const; @@ -44,9 +44,8 @@ namespace { } char DeadMachineInstructionElim::ID = 0; -static RegisterPass<DeadMachineInstructionElim> -Y("dead-mi-elimination", - "Remove dead machine instructions"); +INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination", + "Remove dead machine instructions", false, false); FunctionPass *llvm::createDeadMachineInstructionElimPass() { return new DeadMachineInstructionElim(); @@ -81,9 +80,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); - // Compute a bitvector to represent all non-allocatable physregs. - BitVector NonAllocatableRegs = TRI->getAllocatableSet(MF); - NonAllocatableRegs.flip(); + // Treat reserved registers as always live. + BitVector ReservedRegs = TRI->getReservedRegs(MF); // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will @@ -92,9 +90,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { I != E; ++I) { MachineBasicBlock *MBB = &*I; - // Start out assuming that all non-allocatable registers are live - // out of this block. - LivePhysRegs = NonAllocatableRegs; + // Start out assuming that reserved registers are live out of this block. + LivePhysRegs = ReservedRegs; // Also add any explicit live-out physregs for this block. if (!MBB->empty() && MBB->back().getDesc().isReturn()) @@ -105,6 +102,10 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { LivePhysRegs.set(Reg); } + // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs + // are not live across blocks, but some targets (x86) can have flags live + // out of a block. + // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 01b31b4..550fd3e 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -25,19 +25,17 @@ #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; STATISTIC(NumLandingPadsSplit, "Number of landing pads split"); STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered"); STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved"); -STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced"); namespace { class DwarfEHPrepare : public FunctionPass { const TargetMachine *TM; const TargetLowering *TLI; - bool CompileFast; // The eh.exception intrinsic. Function *ExceptionValueIntrinsic; @@ -54,9 +52,8 @@ namespace { // _Unwind_Resume or the target equivalent. Constant *RewindFunction; - // Dominator info is used when turning stack temporaries into registers. + // We both use and preserve dominator info. DominatorTree *DT; - DominanceFrontier *DF; // The function we are running on. Function *F; @@ -65,28 +62,14 @@ namespace { typedef SmallPtrSet<BasicBlock*, 8> BBSet; BBSet LandingPads; - // Stack temporary used to hold eh.exception values. - AllocaInst *ExceptionValueVar; - bool NormalizeLandingPads(); bool LowerUnwinds(); bool MoveExceptionValueCalls(); - bool FinishStackTemporaries(); - bool PromoteStackTemporaries(); Instruction *CreateExceptionValueCall(BasicBlock *BB); - Instruction *CreateValueLoad(BasicBlock *BB); - - /// CreateReadOfExceptionValue - Return the result of the eh.exception - /// intrinsic by calling the intrinsic if in a landing pad, or loading it - /// from the exception value variable otherwise. - Instruction *CreateReadOfExceptionValue(BasicBlock *BB) { - return LandingPads.count(BB) ? - CreateExceptionValueCall(BB) : CreateValueLoad(BB); - } /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still - /// use the ".llvm.eh.catch.all.value" call need to convert to using its + /// use the "llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels); @@ -112,69 +95,19 @@ namespace { bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, SmallPtrSet<IntrinsicInst*, 8> &SelCalls); - /// DoMem2RegPromotion - Take an alloca call and promote it from memory to a - /// register. - bool DoMem2RegPromotion(Value *V) { - AllocaInst *AI = dyn_cast<AllocaInst>(V); - if (!AI || !isAllocaPromotable(AI)) return false; - - // Turn the alloca into a register. - std::vector<AllocaInst*> Allocas(1, AI); - PromoteMemToReg(Allocas, *DT, *DF); - return true; - } - - /// PromoteStoreInst - Perform Mem2Reg on a StoreInst. - bool PromoteStoreInst(StoreInst *SI) { - if (!SI || !DT || !DF) return false; - if (DoMem2RegPromotion(SI->getOperand(1))) - return true; - return false; - } - - /// PromoteEHPtrStore - Promote the storing of an EH pointer into a - /// register. This should get rid of the store and subsequent loads. - bool PromoteEHPtrStore(IntrinsicInst *II) { - if (!DT || !DF) return false; - - bool Changed = false; - StoreInst *SI; - - while (1) { - SI = 0; - for (Value::use_iterator - I = II->use_begin(), E = II->use_end(); I != E; ++I) { - SI = dyn_cast<StoreInst>(I); - if (SI) break; - } - - if (!PromoteStoreInst(SI)) - break; - - Changed = true; - } - - return Changed; - } - public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare(const TargetMachine *tm, bool fast) : - FunctionPass(&ID), TM(tm), TLI(TM->getTargetLowering()), - CompileFast(fast), + DwarfEHPrepare(const TargetMachine *tm) : + FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()), ExceptionValueIntrinsic(0), SelectorIntrinsic(0), URoR(0), EHCatchAllValue(0), RewindFunction(0) {} virtual bool runOnFunction(Function &Fn); - // getAnalysisUsage - We need dominance frontiers for memory promotion. + // getAnalysisUsage - We need the dominator tree for handling URoR. virtual void getAnalysisUsage(AnalysisUsage &AU) const { - if (!CompileFast) - AU.addRequired<DominatorTree>(); + AU.addRequired<DominatorTree>(); AU.addPreserved<DominatorTree>(); - if (!CompileFast) - AU.addRequired<DominanceFrontier>(); - AU.addPreserved<DominanceFrontier>(); } const char *getPassName() const { @@ -186,8 +119,8 @@ namespace { char DwarfEHPrepare::ID = 0; -FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) { - return new DwarfEHPrepare(tm, fast); +FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { + return new DwarfEHPrepare(tm); } /// HasCatchAllInSelector - Return true if the intrinsic instruction has a @@ -207,7 +140,7 @@ FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels, for (Value::use_iterator I = SelectorIntrinsic->use_begin(), E = SelectorIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *II = cast<IntrinsicInst>(I); + IntrinsicInst *II = cast<IntrinsicInst>(*I); if (II->getParent()->getParent() != F) continue; @@ -225,13 +158,13 @@ FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) { for (Value::use_iterator I = URoR->use_begin(), E = URoR->use_end(); I != E; ++I) { - if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + if (InvokeInst *II = dyn_cast<InvokeInst>(*I)) URoRInvokes.insert(II); } } /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use -/// the ".llvm.eh.catch.all.value" call need to convert to using its +/// the "llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) { if (!EHCatchAllValue) return false; @@ -247,7 +180,7 @@ bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) { I = Sels.begin(), E = Sels.end(); I != E; ++I) { IntrinsicInst *Sel = *I; - // Index of the ".llvm.eh.catch.all.value" variable. + // Index of the "llvm.eh.catch.all.value" variable. unsigned OpIdx = Sel->getNumArgOperands() - 1; GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx)); if (GV != EHCatchAllValue) continue; @@ -268,10 +201,9 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, SmallPtrSet<PHINode*, 32> SeenPHIs; bool Changed = false; - restart: for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) { - Instruction *II = dyn_cast<Instruction>(I); + Instruction *II = dyn_cast<Instruction>(*I); if (!II || II->getParent()->getParent() != F) continue; if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) { @@ -282,11 +214,6 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, URoRInvoke = true; } else if (CastInst *CI = dyn_cast<CastInst>(II)) { Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls); - } else if (StoreInst *SI = dyn_cast<StoreInst>(II)) { - if (!PromoteStoreInst(SI)) continue; - Changed = true; - SeenPHIs.clear(); - goto restart; // Uses may have changed, restart loop. } else if (PHINode *PN = dyn_cast<PHINode>(II)) { if (SeenPHIs.insert(PN)) // Don't process a PHI node more than once. @@ -304,7 +231,7 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, bool DwarfEHPrepare::HandleURoRInvokes() { if (!EHCatchAllValue) { EHCatchAllValue = - F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value"); + F->getParent()->getNamedGlobal("llvm.eh.catch.all.value"); if (!EHCatchAllValue) return false; } @@ -318,10 +245,6 @@ bool DwarfEHPrepare::HandleURoRInvokes() { SmallPtrSet<IntrinsicInst*, 32> CatchAllSels; FindAllCleanupSelectors(Sels, CatchAllSels); - if (!DT) - // We require DominatorTree information. - return CleanupSelectors(CatchAllSels); - if (!URoR) { URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); if (!URoR) return CleanupSelectors(CatchAllSels); @@ -338,7 +261,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() { for (SmallPtrSet<InvokeInst*, 32>::iterator UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) { const BasicBlock *URoRBB = (*UI)->getParent(); - if (SelBB == URoRBB || DT->dominates(SelBB, URoRBB)) { + if (DT->dominates(SelBB, URoRBB)) { SelsToConvert.insert(*SI); break; } @@ -360,11 +283,9 @@ bool DwarfEHPrepare::HandleURoRInvokes() { for (Value::use_iterator I = ExceptionValueIntrinsic->use_begin(), E = ExceptionValueIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(I); + IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I); if (!EHPtr || EHPtr->getParent()->getParent() != F) continue; - Changed |= PromoteEHPtrStore(EHPtr); - bool URoRInvoke = false; SmallPtrSet<IntrinsicInst*, 8> SelCalls; Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls); @@ -532,11 +453,8 @@ bool DwarfEHPrepare::NormalizeLandingPads() { // Add a fallthrough from NewBB to the original landing pad. BranchInst::Create(LPad, NewBB); - // Now update DominatorTree and DominanceFrontier analysis information. - if (DT) - DT->splitBlock(NewBB); - if (DF) - DF->splitBlock(NewBB); + // Now update DominatorTree analysis information. + DT->splitBlock(NewBB); // Remember the newly constructed landing pad. The original landing pad // LPad is no longer a landing pad now that all unwind edges have been @@ -586,7 +504,7 @@ bool DwarfEHPrepare::LowerUnwinds() { // Create the call... CallInst *CI = CallInst::Create(RewindFunction, - CreateReadOfExceptionValue(TI->getParent()), + CreateExceptionValueCall(TI->getParent()), "", TI); CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); // ...followed by an UnreachableInst. @@ -602,9 +520,11 @@ bool DwarfEHPrepare::LowerUnwinds() { } /// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from -/// landing pads by replacing calls outside of landing pads with loads from a -/// stack temporary. Move eh.exception calls inside landing pads to the start -/// of the landing pad (optional, but may make things simpler for later passes). +/// landing pads by replacing calls outside of landing pads with direct use of +/// a register holding the appropriate value; this requires adding calls inside +/// all landing pads to initialize the register. Also, move eh.exception calls +/// inside landing pads to the start of the landing pad (optional, but may make +/// things simpler for later passes). bool DwarfEHPrepare::MoveExceptionValueCalls() { // If the eh.exception intrinsic is not declared in the module then there is // nothing to do. Speed up compilation by checking for this common case. @@ -614,61 +534,87 @@ bool DwarfEHPrepare::MoveExceptionValueCalls() { bool Changed = false; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) - if (CI->getIntrinsicID() == Intrinsic::eh_exception) { - if (!CI->use_empty()) { - Value *ExceptionValue = CreateReadOfExceptionValue(BB); - if (CI == ExceptionValue) { - // The call was at the start of a landing pad - leave it alone. - assert(LandingPads.count(BB) && - "Created eh.exception call outside landing pad!"); - continue; - } - CI->replaceAllUsesWith(ExceptionValue); - } - CI->eraseFromParent(); - ++NumExceptionValuesMoved; - Changed = true; + // Move calls to eh.exception that are inside a landing pad to the start of + // the landing pad. + for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end(); + LI != LE; ++LI) { + BasicBlock *LP = *LI; + for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end(); + II != IE;) + if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) { + // Found a call to eh.exception. + if (!EI->use_empty()) { + // If there is already a call to eh.exception at the start of the + // landing pad, then get hold of it; otherwise create such a call. + Value *CallAtStart = CreateExceptionValueCall(LP); + + // If the call was at the start of a landing pad then leave it alone. + if (EI == CallAtStart) + continue; + EI->replaceAllUsesWith(CallAtStart); } + EI->eraseFromParent(); + ++NumExceptionValuesMoved; + Changed = true; + } } - return Changed; -} - -/// FinishStackTemporaries - If we introduced a stack variable to hold the -/// exception value then initialize it in each landing pad. -bool DwarfEHPrepare::FinishStackTemporaries() { - if (!ExceptionValueVar) - // Nothing to do. - return false; + // Look for calls to eh.exception that are not in a landing pad. If one is + // found, then a register that holds the exception value will be created in + // each landing pad, and the SSAUpdater will be used to compute the values + // returned by eh.exception calls outside of landing pads. + SSAUpdater SSA; + + // Remember where we found the eh.exception call, to avoid rescanning earlier + // basic blocks which we already know contain no eh.exception calls. + bool FoundCallOutsideLandingPad = false; + Function::iterator BB = F->begin(); + for (Function::iterator BE = F->end(); BB != BE; ++BB) { + // Skip over landing pads. + if (LandingPads.count(BB)) + continue; - bool Changed = false; + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE; ++II) + if (isa<EHExceptionInst>(II)) { + SSA.Initialize(II->getType(), II->getName()); + FoundCallOutsideLandingPad = true; + break; + } - // Make sure that there is a store of the exception value at the start of - // each landing pad. - for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end(); - LI != LE; ++LI) { - Instruction *ExceptionValue = CreateReadOfExceptionValue(*LI); - Instruction *Store = new StoreInst(ExceptionValue, ExceptionValueVar); - Store->insertAfter(ExceptionValue); - Changed = true; + if (FoundCallOutsideLandingPad) + break; } - return Changed; -} + // If all calls to eh.exception are in landing pads then we are done. + if (!FoundCallOutsideLandingPad) + return Changed; -/// PromoteStackTemporaries - Turn any stack temporaries we introduced into -/// registers if possible. -bool DwarfEHPrepare::PromoteStackTemporaries() { - if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) { - // Turn the exception temporary into registers and phi nodes if possible. - std::vector<AllocaInst*> Allocas(1, ExceptionValueVar); - PromoteMemToReg(Allocas, *DT, *DF); - return true; + // Add a call to eh.exception at the start of each landing pad, and tell the + // SSAUpdater that this is the value produced by the landing pad. + for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end(); + LI != LE; ++LI) + SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI)); + + // Now turn all calls to eh.exception that are not in a landing pad into a use + // of the appropriate register. + for (Function::iterator BE = F->end(); BB != BE; ++BB) { + // Skip over landing pads. + if (LandingPads.count(BB)) + continue; + + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE;) + if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) { + // Found a call to eh.exception, replace it with the value from any + // upstream landing pad(s). + EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB)); + EI->eraseFromParent(); + ++NumExceptionValuesMoved; + } } - return false; + + return true; } /// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at @@ -691,36 +637,11 @@ Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) { return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start); } -/// CreateValueLoad - Insert a load of the exception value stack variable -/// (creating it if necessary) at the start of the basic block (unless -/// there already is a load, in which case the existing load is returned). -Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) { - Instruction *Start = BB->getFirstNonPHIOrDbg(); - // Is this a load of the exception temporary? - if (ExceptionValueVar) - if (LoadInst* LI = dyn_cast<LoadInst>(Start)) - if (LI->getPointerOperand() == ExceptionValueVar) - // Reuse the existing load. - return Start; - - // Create the temporary if we didn't already. - if (!ExceptionValueVar) { - ExceptionValueVar = new AllocaInst(PointerType::getUnqual( - Type::getInt8Ty(BB->getContext())), "eh.value", F->begin()->begin()); - ++NumStackTempsIntroduced; - } - - // Load the value. - return new LoadInst(ExceptionValueVar, "eh.value.load", Start); -} - bool DwarfEHPrepare::runOnFunction(Function &Fn) { bool Changed = false; // Initialize internal state. - DT = getAnalysisIfAvailable<DominatorTree>(); - DF = getAnalysisIfAvailable<DominanceFrontier>(); - ExceptionValueVar = 0; + DT = &getAnalysis<DominatorTree>(); F = &Fn; // Ensure that only unwind edges end at landing pads (a landing pad is a @@ -735,13 +656,6 @@ bool DwarfEHPrepare::runOnFunction(Function &Fn) { // Move eh.exception calls to landing pads. Changed |= MoveExceptionValueCalls(); - // Initialize any stack temporaries we introduced. - Changed |= FinishStackTemporaries(); - - // Turn any stack temporaries into registers if possible. - if (!CompileFast) - Changed |= PromoteStackTemporaries(); - Changed |= HandleURoRInvokes(); LandingPads.clear(); diff --git a/contrib/llvm/lib/CodeGen/ELF.h b/contrib/llvm/lib/CodeGen/ELF.h index cb5a8c0..fb884c9 100644 --- a/contrib/llvm/lib/CodeGen/ELF.h +++ b/contrib/llvm/lib/CodeGen/ELF.h @@ -22,36 +22,12 @@ #include "llvm/CodeGen/BinaryObject.h" #include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/Support/ELF.h" #include "llvm/System/DataTypes.h" namespace llvm { class GlobalValue; - // Identification Indexes - enum { - EI_MAG0 = 0, - EI_MAG1 = 1, - EI_MAG2 = 2, - EI_MAG3 = 3 - }; - - // File types - enum { - ET_NONE = 0, // No file type - ET_REL = 1, // Relocatable file - ET_EXEC = 2, // Executable file - ET_DYN = 3, // Shared object file - ET_CORE = 4, // Core file - ET_LOPROC = 0xff00, // Beginning of processor-specific codes - ET_HIPROC = 0xffff // Processor-specific - }; - - // Versioning - enum { - EV_NONE = 0, - EV_CURRENT = 1 - }; - /// ELFSym - This struct contains information about each symbol that is /// added to logical symbol table for the module. This is eventually /// turned into a real symbol table in the file. @@ -108,9 +84,9 @@ namespace llvm { static ELFSym *getExtSym(const char *Ext) { ELFSym *Sym = new ELFSym(); Sym->Source.Ext = Ext; - Sym->setBind(STB_GLOBAL); - Sym->setType(STT_NOTYPE); - Sym->setVisibility(STV_DEFAULT); + Sym->setBind(ELF::STB_GLOBAL); + Sym->setType(ELF::STT_NOTYPE); + Sym->setVisibility(ELF::STV_DEFAULT); Sym->SourceType = isExtSym; return Sym; } @@ -118,9 +94,9 @@ namespace llvm { // getSectionSym - Returns a elf symbol to represent an elf section static ELFSym *getSectionSym() { ELFSym *Sym = new ELFSym(); - Sym->setBind(STB_LOCAL); - Sym->setType(STT_SECTION); - Sym->setVisibility(STV_DEFAULT); + Sym->setBind(ELF::STB_LOCAL); + Sym->setType(ELF::STT_SECTION); + Sym->setVisibility(ELF::STV_DEFAULT); Sym->SourceType = isOther; return Sym; } @@ -128,9 +104,9 @@ namespace llvm { // getFileSym - Returns a elf symbol to represent the module identifier static ELFSym *getFileSym() { ELFSym *Sym = new ELFSym(); - Sym->setBind(STB_LOCAL); - Sym->setType(STT_FILE); - Sym->setVisibility(STV_DEFAULT); + Sym->setBind(ELF::STB_LOCAL); + Sym->setType(ELF::STT_FILE); + Sym->setVisibility(ELF::STV_DEFAULT); Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS; Sym->SourceType = isOther; return Sym; @@ -141,8 +117,8 @@ namespace llvm { ELFSym *Sym = new ELFSym(); Sym->Source.GV = GV; Sym->setBind(Bind); - Sym->setType(STT_NOTYPE); - Sym->setVisibility(STV_DEFAULT); + Sym->setType(ELF::STT_NOTYPE); + Sym->setVisibility(ELF::STV_DEFAULT); Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF; Sym->SourceType = isGV; return Sym; @@ -159,35 +135,14 @@ namespace llvm { // Symbol index into the Symbol table unsigned SymTabIdx; - enum { - STB_LOCAL = 0, // Local sym, not visible outside obj file containing def - STB_GLOBAL = 1, // Global sym, visible to all object files being combined - STB_WEAK = 2 // Weak symbol, like global but lower-precedence - }; - - enum { - STT_NOTYPE = 0, // Symbol's type is not specified - STT_OBJECT = 1, // Symbol is a data object (variable, array, etc.) - STT_FUNC = 2, // Symbol is executable code (function, etc.) - STT_SECTION = 3, // Symbol refers to a section - STT_FILE = 4 // Local, absolute symbol that refers to a file - }; - - enum { - STV_DEFAULT = 0, // Visibility is specified by binding type - STV_INTERNAL = 1, // Defined by processor supplements - STV_HIDDEN = 2, // Not visible to other components - STV_PROTECTED = 3 // Visible in other components but not preemptable - }; - ELFSym() : SourceType(isOther), NameIdx(0), Value(0), - Size(0), Info(0), Other(STV_DEFAULT), SectionIdx(0), + Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0), SymTabIdx(0) {} unsigned getBind() const { return (Info >> 4) & 0xf; } unsigned getType() const { return Info & 0xf; } - bool isLocalBind() const { return getBind() == STB_LOCAL; } - bool isFileType() const { return getType() == STT_FILE; } + bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; } + bool isFileType() const { return getType() == ELF::STT_FILE; } void setBind(unsigned X) { assert(X == (X & 0xF) && "Bind value out of range!"); @@ -222,51 +177,6 @@ namespace llvm { unsigned Align; // sh_addralign - Alignment of section. unsigned EntSize; // sh_entsize - Size of entries in the section e - // Section Header Flags - enum { - SHF_WRITE = 1 << 0, // Writable - SHF_ALLOC = 1 << 1, // Mapped into the process addr space - SHF_EXECINSTR = 1 << 2, // Executable - SHF_MERGE = 1 << 4, // Might be merged if equal - SHF_STRINGS = 1 << 5, // Contains null-terminated strings - SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index - SHF_LINK_ORDER = 1 << 7, // Preserve order after combining - SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required - SHF_GROUP = 1 << 9, // Section is a member of a group - SHF_TLS = 1 << 10 // Section holds thread-local data - }; - - // Section Types - enum { - SHT_NULL = 0, // No associated section (inactive entry). - SHT_PROGBITS = 1, // Program-defined contents. - SHT_SYMTAB = 2, // Symbol table. - SHT_STRTAB = 3, // String table. - SHT_RELA = 4, // Relocation entries; explicit addends. - SHT_HASH = 5, // Symbol hash table. - SHT_DYNAMIC = 6, // Information for dynamic linking. - SHT_NOTE = 7, // Information about the file. - SHT_NOBITS = 8, // Data occupies no space in the file. - SHT_REL = 9, // Relocation entries; no explicit addends. - SHT_SHLIB = 10, // Reserved. - SHT_DYNSYM = 11, // Symbol table. - SHT_LOPROC = 0x70000000, // Lowest processor arch-specific type. - SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type. - SHT_LOUSER = 0x80000000, // Lowest type reserved for applications. - SHT_HIUSER = 0xffffffff // Highest type reserved for applications. - }; - - // Special section indices. - enum { - SHN_UNDEF = 0, // Undefined, missing, irrelevant - SHN_LORESERVE = 0xff00, // Lowest reserved index - SHN_LOPROC = 0xff00, // Lowest processor-specific index - SHN_HIPROC = 0xff1f, // Highest processor-specific index - SHN_ABS = 0xfff1, // Symbol has absolute value; no relocation - SHN_COMMON = 0xfff2, // FORTRAN COMMON or C external global variables - SHN_HIRESERVE = 0xffff // Highest reserved index - }; - /// SectionIdx - The number of the section in the Section Table. unsigned short SectionIdx; diff --git a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp index 36b0e65..3fb087c 100644 --- a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp @@ -71,7 +71,7 @@ void ELFCodeEmitter::startFunction(MachineFunction &MF) { bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { // Add a symbol to represent the function. const Function *F = MF.getFunction(); - ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELFSym::STT_FUNC, + ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC, EW.getGlobalELFVisibility(F)); FnSym->SectionIdx = ES->SectionIdx; FnSym->Size = ES->getCurrentPCOffset()-FnStartOff; diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.cpp b/contrib/llvm/lib/CodeGen/ELFWriter.cpp index b644ebe..d14728d 100644 --- a/contrib/llvm/lib/CodeGen/ELFWriter.cpp +++ b/contrib/llvm/lib/CodeGen/ELFWriter.cpp @@ -63,7 +63,7 @@ char ELFWriter::ID = 0; //===----------------------------------------------------------------------===// ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) - : MachineFunctionPass(&ID), O(o), TM(tm), + : MachineFunctionPass(ID), O(o), TM(tm), OutContext(*new MCContext(*TM.getMCAsmInfo())), TLOF(TM.getTargetLowering()->getObjFileLowering()), is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64), @@ -129,12 +129,12 @@ bool ELFWriter::doInitialization(Module &M) { ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS] ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA] - ElfHdr.emitByte(EV_CURRENT); // e_ident[EI_VERSION] + ElfHdr.emitByte(ELF::EV_CURRENT); // e_ident[EI_VERSION] ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD] - ElfHdr.emitWord16(ET_REL); // e_type + ElfHdr.emitWord16(ELF::ET_REL); // e_type ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target - ElfHdr.emitWord32(EV_CURRENT); // e_version + ElfHdr.emitWord32(ELF::EV_CURRENT); // e_version ElfHdr.emitWord(0); // e_entry, no entry point in .o file ElfHdr.emitWord(0); // e_phoff, no program header for .o ELFHdr_e_shoff_Offset = ElfHdr.size(); @@ -252,7 +252,7 @@ ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) { // is true if the relocation section contains entries with addends. ELFSection &ELFWriter::getRelocSection(ELFSection &S) { unsigned SectionType = TEW->hasRelocationAddend() ? - ELFSection::SHT_RELA : ELFSection::SHT_REL; + ELF::SHT_RELA : ELF::SHT_REL; std::string SectionName(".rel"); if (TEW->hasRelocationAddend()) @@ -268,11 +268,11 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) { default: llvm_unreachable("unknown visibility type"); case GlobalValue::DefaultVisibility: - return ELFSym::STV_DEFAULT; + return ELF::STV_DEFAULT; case GlobalValue::HiddenVisibility: - return ELFSym::STV_HIDDEN; + return ELF::STV_HIDDEN; case GlobalValue::ProtectedVisibility: - return ELFSym::STV_PROTECTED; + return ELF::STV_PROTECTED; } return 0; } @@ -280,23 +280,23 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) { // getGlobalELFBinding - Returns the ELF specific binding type unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) { if (GV->hasInternalLinkage()) - return ELFSym::STB_LOCAL; + return ELF::STB_LOCAL; if (GV->isWeakForLinker() && !GV->hasCommonLinkage()) - return ELFSym::STB_WEAK; + return ELF::STB_WEAK; - return ELFSym::STB_GLOBAL; + return ELF::STB_GLOBAL; } // getGlobalELFType - Returns the ELF specific type for a global unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) { if (GV->isDeclaration()) - return ELFSym::STT_NOTYPE; + return ELF::STT_NOTYPE; if (isa<Function>(GV)) - return ELFSym::STT_FUNC; + return ELF::STT_FUNC; - return ELFSym::STT_OBJECT; + return ELF::STT_OBJECT; } // IsELFUndefSym - True if the global value must be marked as a symbol @@ -364,7 +364,7 @@ void ELFWriter::EmitGlobal(const GlobalValue *GV) { GblSym->Size = Size; if (S->HasCommonSymbols()) { // Symbol must go to a common section - GblSym->SectionIdx = ELFSection::SHN_COMMON; + GblSym->SectionIdx = ELF::SHN_COMMON; // A new linkonce section is created for each global in the // common section, the default alignment is 1 and the symbol diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.h b/contrib/llvm/lib/CodeGen/ELFWriter.h index db66ecc..b8bac55 100644 --- a/contrib/llvm/lib/CodeGen/ELFWriter.h +++ b/contrib/llvm/lib/CodeGen/ELFWriter.h @@ -39,6 +39,7 @@ namespace llvm { class raw_ostream; class SectionKind; class MCContext; + class TargetMachine; typedef std::vector<ELFSym*>::iterator ELFSymIter; typedef std::vector<ELFSection*>::iterator ELFSectionIter; @@ -160,29 +161,29 @@ namespace llvm { SN->SectionIdx = NumSections++; SN->Type = Type; SN->Flags = Flags; - SN->Link = ELFSection::SHN_UNDEF; + SN->Link = ELF::SHN_UNDEF; SN->Align = Align; return *SN; } ELFSection &getNonExecStackSection() { - return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1); + return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1); } ELFSection &getSymbolTableSection() { - return getSection(".symtab", ELFSection::SHT_SYMTAB, 0); + return getSection(".symtab", ELF::SHT_SYMTAB, 0); } ELFSection &getStringTableSection() { - return getSection(".strtab", ELFSection::SHT_STRTAB, 0, 1); + return getSection(".strtab", ELF::SHT_STRTAB, 0, 1); } ELFSection &getSectionHeaderStringTableSection() { - return getSection(".shstrtab", ELFSection::SHT_STRTAB, 0, 1); + return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1); } ELFSection &getNullSection() { - return getSection("", ELFSection::SHT_NULL, 0); + return getSection("", ELF::SHT_NULL, 0); } ELFSection &getDataSection(); diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp index ab0a800..0f6e882 100644 --- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp +++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp @@ -30,8 +30,8 @@ namespace { raw_ostream &OS; public: - Printer() : FunctionPass(&ID), OS(errs()) {} - explicit Printer(raw_ostream &OS) : FunctionPass(&ID), OS(OS) {} + Printer() : FunctionPass(ID), OS(errs()) {} + explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} const char *getPassName() const; @@ -55,8 +55,8 @@ namespace { } -static RegisterPass<GCModuleInfo> -X("collector-metadata", "Create Garbage Collector Module Metadata"); +INITIALIZE_PASS(GCModuleInfo, "collector-metadata", + "Create Garbage Collector Module Metadata", false, false); // ----------------------------------------------------------------------------- @@ -70,7 +70,7 @@ GCFunctionInfo::~GCFunctionInfo() {} char GCModuleInfo::ID = 0; GCModuleInfo::GCModuleInfo() - : ImmutablePass(&ID) {} + : ImmutablePass(ID) {} GCModuleInfo::~GCModuleInfo() { clear(); @@ -189,7 +189,7 @@ FunctionPass *llvm::createGCInfoDeleter() { return new Deleter(); } -Deleter::Deleter() : FunctionPass(&ID) {} +Deleter::Deleter() : FunctionPass(ID) {} const char *Deleter::getPassName() const { return "Delete Garbage Collector Information"; diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp index 71506cc..719fa19 100644 --- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp +++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp @@ -130,7 +130,7 @@ FunctionPass *llvm::createGCLoweringPass() { char LowerIntrinsics::ID = 0; LowerIntrinsics::LowerIntrinsics() - : FunctionPass(&ID) {} + : FunctionPass(ID) {} const char *LowerIntrinsics::getPassName() const { return "Lower Garbage Collection Instructions"; @@ -260,7 +260,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { bool LowerRd = !S.customReadBarrier(); bool InitRoots = S.initializeRoots(); - SmallVector<AllocaInst*,32> Roots; + SmallVector<AllocaInst*, 32> Roots; bool MadeChange = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { @@ -271,7 +271,8 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { case Intrinsic::gcwrite: if (LowerWr) { // Replace a write barrier with a simple store. - Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI); + Value *St = new StoreInst(CI->getArgOperand(0), + CI->getArgOperand(2), CI); CI->replaceAllUsesWith(St); CI->eraseFromParent(); } @@ -317,7 +318,7 @@ FunctionPass *llvm::createGCMachineCodeAnalysisPass() { char MachineCodeAnalysis::ID = 0; MachineCodeAnalysis::MachineCodeAnalysis() - : MachineFunctionPass(&ID) {} + : MachineFunctionPass(ID) {} const char *MachineCodeAnalysis::getPassName() const { return "Analyze Machine Code For Garbage Collection"; diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index 6b445e0..0ea30d7 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -154,7 +154,7 @@ namespace { int FnNum; public: static char ID; - IfConverter() : MachineFunctionPass(&ID), FnNum(-1) {} + IfConverter() : MachineFunctionPass(ID), FnNum(-1) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "If Converter"; } @@ -230,8 +230,7 @@ namespace { char IfConverter::ID = 0; } -static RegisterPass<IfConverter> -X("if-converter", "If Converter"); +INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false); FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 12adcaa..b965bfd 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -14,10 +14,12 @@ #define DEBUG_TYPE "spiller" #include "Spiller.h" +#include "SplitKit.h" #include "VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -28,8 +30,10 @@ using namespace llvm; namespace { class InlineSpiller : public Spiller { + MachineFunctionPass &pass_; MachineFunction &mf_; LiveIntervals &lis_; + MachineLoopInfo &loops_; VirtRegMap &vrm_; MachineFrameInfo &mfi_; MachineRegisterInfo &mri_; @@ -37,9 +41,11 @@ class InlineSpiller : public Spiller { const TargetRegisterInfo &tri_; const BitVector reserved_; + SplitAnalysis splitAnalysis_; + // Variables that are valid during spill(), but used by multiple methods. LiveInterval *li_; - std::vector<LiveInterval*> *newIntervals_; + SmallVectorImpl<LiveInterval*> *newIntervals_; const TargetRegisterClass *rc_; int stackSlot_; const SmallVectorImpl<LiveInterval*> *spillIs_; @@ -53,25 +59,34 @@ class InlineSpiller : public Spiller { ~InlineSpiller() {} public: - InlineSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) - : mf_(*mf), lis_(*lis), vrm_(*vrm), - mfi_(*mf->getFrameInfo()), - mri_(mf->getRegInfo()), - tii_(*mf->getTarget().getInstrInfo()), - tri_(*mf->getTarget().getRegisterInfo()), - reserved_(tri_.getReservedRegs(mf_)) {} + InlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) + : pass_(pass), + mf_(mf), + lis_(pass.getAnalysis<LiveIntervals>()), + loops_(pass.getAnalysis<MachineLoopInfo>()), + vrm_(vrm), + mfi_(*mf.getFrameInfo()), + mri_(mf.getRegInfo()), + tii_(*mf.getTarget().getInstrInfo()), + tri_(*mf.getTarget().getRegisterInfo()), + reserved_(tri_.getReservedRegs(mf_)), + splitAnalysis_(mf, lis_, loops_) {} void spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex *earliestIndex); + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs); private: + bool split(); + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, SlotIndex UseIdx); bool reMaterializeFor(MachineBasicBlock::iterator MI); void reMaterializeAll(); + bool coalesceStackAccess(MachineInstr *MI); bool foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl<unsigned> &Ops); void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI); @@ -80,12 +95,43 @@ private: } namespace llvm { -Spiller *createInlineSpiller(MachineFunction *mf, - LiveIntervals *lis, - const MachineLoopInfo *mli, - VirtRegMap *vrm) { - return new InlineSpiller(mf, lis, vrm); +Spiller *createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) { + return new InlineSpiller(pass, mf, vrm); +} } + +/// split - try splitting the current interval into pieces that may allocate +/// separately. Return true if successful. +bool InlineSpiller::split() { + splitAnalysis_.analyze(li_); + + if (const MachineLoop *loop = splitAnalysis_.getBestSplitLoop()) { + // We can split, but li_ may be left intact with fewer uses. + if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) + .splitAroundLoop(loop)) + return true; + } + + // Try splitting into single block intervals. + SplitAnalysis::BlockPtrSet blocks; + if (splitAnalysis_.getMultiUseBlocks(blocks)) { + if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) + .splitSingleBlocks(blocks)) + return true; + } + + // Try splitting inside a basic block. + if (const MachineBasicBlock *MBB = splitAnalysis_.getBlockForInsideSplit()) { + if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) + .splitInsideBlock(MBB)) + return true; + } + + // We may have been able to split out some uses, but the original interval is + // intact, and it should still be spilled. + return false; } /// allUsesAvailableAt - Return true if all registers used by OrigMI at @@ -237,7 +283,7 @@ void InlineSpiller::reMaterializeAll() { lis_.RemoveMachineInstrFromMaps(DefMI); vrm_.RemoveMachineInstrFromMaps(DefMI); DefMI->eraseFromParent(); - li_->removeValNo(VNI); + VNI->setIsDefAccurate(false); anyRemoved = true; } @@ -253,8 +299,8 @@ void InlineSpiller::reMaterializeAll() { MachineBasicBlock::iterator NextMI = MI; ++NextMI; if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { - SlotIndex NearIdx = lis_.getInstructionIndex(NextMI); - if (li_->liveAt(NearIdx)) + VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI)); + if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI))) continue; } DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI); @@ -262,6 +308,24 @@ void InlineSpiller::reMaterializeAll() { } } +/// If MI is a load or store of stackSlot_, it can be removed. +bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) { + int FI = 0; + unsigned reg; + if (!(reg = tii_.isLoadFromStackSlot(MI, FI)) && + !(reg = tii_.isStoreToStackSlot(MI, FI))) + return false; + + // We have a stack access. Is it the right register and slot? + if (reg != li_->reg || FI != stackSlot_) + return false; + + DEBUG(dbgs() << "Coalescing stack access: " << *MI); + lis_.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + return true; +} + /// foldMemoryOperand - Try folding stack slot references in Ops into MI. /// Return true on success, and MI will be erased. bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, @@ -323,9 +387,8 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, } void InlineSpiller::spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex *earliestIndex) { + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) { DEBUG(dbgs() << "Inline spilling " << *li << "\n"); assert(li->isSpillable() && "Attempting to spill already spilled value."); assert(!li->isStackSlot() && "Trying to spill a stack slot."); @@ -335,13 +398,18 @@ void InlineSpiller::spill(LiveInterval *li, rc_ = mri_.getRegClass(li->reg); spillIs_ = &spillIs; + if (split()) + return; + reMaterializeAll(); // Remat may handle everything. if (li_->empty()) return; - stackSlot_ = vrm_.assignVirt2StackSlot(li->reg); + stackSlot_ = vrm_.getStackSlot(li->reg); + if (stackSlot_ == VirtRegMap::NO_STACK_SLOT) + stackSlot_ = vrm_.assignVirt2StackSlot(li->reg); // Iterate over instructions using register. for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg); @@ -365,6 +433,10 @@ void InlineSpiller::spill(LiveInterval *li, continue; } + // Stack slot accesses may coalesce away. + if (coalesceStackAccess(MI)) + continue; + // Analyze instruction. bool Reads, Writes; SmallVector<unsigned, 8> Ops; diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index 03ae214..3852eba 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -481,7 +481,8 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { Value *Ops[3]; Ops[0] = CI->getArgOperand(0); // Extend the amount to i32. - Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context), + Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), + Type::getInt32Ty(Context), /* isSigned */ false); Ops[2] = Size; ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index bf3137e..3603802 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -85,7 +85,7 @@ static bool getVerboseAsm() { case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault(); case cl::BOU_TRUE: return true; case cl::BOU_FALSE: return false; - } + } } // Enable or disable FastISel. Both options are needed, because @@ -139,8 +139,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI); // Create a code emitter if asked to show the encoding. - // - // FIXME: These are currently leaked. MCCodeEmitter *MCE = 0; if (ShowMCEncoding) MCE = getTarget().createCodeEmitter(*this, *Context); @@ -154,8 +152,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - // - // FIXME: These are currently leaked. MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context); TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); if (MCE == 0 || TAB == 0) @@ -180,12 +176,12 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); if (Printer == 0) return true; - + // If successful, createAsmPrinter took ownership of AsmStreamer. AsmStreamer.take(); - + PM.add(Printer); - + // Make sure the code model is set. setCodeModelForStatic(); PM.add(createGCInfoDeleter()); @@ -204,7 +200,7 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, bool DisableVerify) { // Make sure the code model is set. setCodeModelForJIT(); - + // Add common CodeGen passes. MCContext *Ctx = 0; if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) @@ -216,19 +212,36 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return false; // success! } +/// addPassesToEmitMC - Add passes to the specified pass manager to get +/// machine code emitted with the MCJIT. This method returns true if machine +/// code is not supported. It fills the MCContext Ctx pointer which can be +/// used to build custom MCStreamer. +/// +bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, + MCContext *&Ctx, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { + // Add common CodeGen passes. + if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) + return true; + // Make sure the code model is set. + setCodeModelForJIT(); + + return false; // success! +} + static void printNoVerify(PassManagerBase &PM, const char *Banner) { if (PrintMachineCode) PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); } static void printAndVerify(PassManagerBase &PM, - const char *Banner, - bool allowDoubleDefs = false) { + const char *Banner) { if (PrintMachineCode) PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); if (VerifyMachineCode) - PM.add(createMachineVerifierPass(allowDoubleDefs)); + PM.add(createMachineVerifierPass()); } /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both @@ -258,6 +271,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } + PM.add(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + // Turn exception handling constructs into something the code generators can // handle. switch (getMCAsmInfo()->getExceptionHandlingType()) { @@ -269,26 +287,25 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. PM.add(createSjLjEHPass(getTargetLowering())); - PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None)); - break; + // FALLTHROUGH case ExceptionHandling::Dwarf: - PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None)); + PM.add(createDwarfEHPass(this)); break; case ExceptionHandling::None: PM.add(createLowerInvokePass(getTargetLowering())); + + // The lower invoke pass may create unreachable code. Remove it. + PM.add(createUnreachableBlockEliminationPass()); break; } - PM.add(createGCLoweringPass()); - - // Make sure that no unreachable blocks are instruction selected. - PM.add(createUnreachableBlockEliminationPass()); - if (OptLevel != CodeGenOpt::None && !DisableCGP) PM.add(createCodeGenPreparePass(getTargetLowering())); PM.add(createStackProtectorPass(getTargetLowering())); + addPreISel(PM, OptLevel); + if (PrintISelInput) PM.add(createPrintFunctionPass("\n\n" "*** Final LLVM Code input to ISel ***\n", @@ -300,13 +317,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createVerifierPass()); // Standard Lower-Level Passes. - + // Install a MachineModuleInfo class, which is an immutable pass that holds // all the per-module stuff we're generating, including MCContext. MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo()); PM.add(MMI); OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. - // Set up a MachineFunction for the rest of CodeGen to work on. PM.add(new MachineFunctionAnalysis(*this, OptLevel)); @@ -321,44 +337,43 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, return true; // Print the instruction selected machine code... - printAndVerify(PM, "After Instruction Selection", - /* allowDoubleDefs= */ true); + printAndVerify(PM, "After Instruction Selection"); // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. if (OptLevel != CodeGenOpt::None) PM.add(createOptimizePHIsPass()); + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + PM.add(createLocalStackSlotAllocationPass()); + if (OptLevel != CodeGenOpt::None) { // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only // used by tail calls, where the tail calls reuse the incoming stack // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). PM.add(createDeadMachineInstructionElimPass()); - printAndVerify(PM, "After codegen DCE pass", - /* allowDoubleDefs= */ true); + printAndVerify(PM, "After codegen DCE pass"); - PM.add(createOptimizeExtsPass()); + PM.add(createPeepholeOptimizerPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); PM.add(createMachineCSEPass()); if (!DisableMachineSink) PM.add(createMachineSinkingPass()); - printAndVerify(PM, "After Machine LICM, CSE and Sinking passes", - /* allowDoubleDefs= */ true); + printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); } // Pre-ra tail duplication. if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { PM.add(createTailDuplicatePass(true)); - printAndVerify(PM, "After Pre-RegAlloc TailDuplicate", - /* allowDoubleDefs= */ true); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); } // Run pre-ra passes. if (addPreRegAlloc(PM, OptLevel)) - printAndVerify(PM, "After PreRegAlloc passes", - /* allowDoubleDefs= */ true); + printAndVerify(PM, "After PreRegAlloc passes"); // Perform register allocation. PM.add(createRegisterAllocator(OptLevel)); diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index ad57284..59f380a 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -166,6 +166,56 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { return I != begin() && (--I)->end > Start; } + +/// ValNo is dead, remove it. If it is the largest value number, just nuke it +/// (and any other deleted values neighboring it), otherwise mark it as ~1U so +/// it can be nuked later. +void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { + if (ValNo->id == getNumValNums()-1) { + do { + valnos.pop_back(); + } while (!valnos.empty() && valnos.back()->isUnused()); + } else { + ValNo->setIsUnused(true); + } +} + +/// RenumberValues - Renumber all values in order of appearance and delete the +/// remaining unused values. +void LiveInterval::RenumberValues(LiveIntervals &lis) { + SmallPtrSet<VNInfo*, 8> Seen; + bool seenPHIDef = false; + valnos.clear(); + for (const_iterator I = begin(), E = end(); I != E; ++I) { + VNInfo *VNI = I->valno; + if (!Seen.insert(VNI)) + continue; + assert(!VNI->isUnused() && "Unused valno used by live range"); + VNI->id = (unsigned)valnos.size(); + valnos.push_back(VNI); + VNI->setHasPHIKill(false); + if (VNI->isPHIDef()) + seenPHIDef = true; + } + + // Recompute phi kill flags. + if (!seenPHIDef) + return; + for (const_vni_iterator I = vni_begin(), E = vni_end(); I != E; ++I) { + VNInfo *VNI = *I; + if (!VNI->isPHIDef()) + continue; + const MachineBasicBlock *PHIBB = lis.getMBBFromIndex(VNI->def); + assert(PHIBB && "No basic block for phi-def"); + for (MachineBasicBlock::const_pred_iterator PI = PHIBB->pred_begin(), + PE = PHIBB->pred_end(); PI != PE; ++PI) { + VNInfo *KVNI = getVNInfoAt(lis.getMBBEndIdx(*PI).getPrevSlot()); + if (KVNI) + KVNI->setHasPHIKill(true); + } + } +} + /// extendIntervalEndTo - This method is used when we want to extend the range /// specified by I to end at the specified endpoint. To do this, we should /// merge and eliminate all ranges that this will overlap with. The iterator is @@ -175,7 +225,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { VNInfo *ValNo = I->valno; // Search for the first interval that we can't merge with. - Ranges::iterator MergeTo = next(I); + Ranges::iterator MergeTo = llvm::next(I); for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) { assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); } @@ -184,11 +234,11 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { I->end = std::max(NewEnd, prior(MergeTo)->end); // Erase any dead ranges. - ranges.erase(next(I), MergeTo); + ranges.erase(llvm::next(I), MergeTo); // If the newly formed range now touches the range after it and if they have // the same value number, merge the two ranges into one range. - Ranges::iterator Next = next(I); + Ranges::iterator Next = llvm::next(I); if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) { I->end = Next->end; ranges.erase(Next); @@ -227,7 +277,7 @@ LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) { MergeTo->end = I->end; } - ranges.erase(next(MergeTo), next(I)); + ranges.erase(llvm::next(MergeTo), llvm::next(I)); return MergeTo; } @@ -280,7 +330,7 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { return ranges.insert(it, LR); } -/// isInOneLiveRange - Return true if the range specified is entirely in +/// isInOneLiveRange - Return true if the range specified is entirely in /// a single LiveRange of the live interval. bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) { Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); @@ -314,16 +364,8 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, break; } if (isDead) { - // Now that ValNo is dead, remove it. If it is the largest value - // number, just nuke it (and any other deleted values neighboring it), - // otherwise mark it as ~1U so it can be nuked later. - if (ValNo->id == getNumValNums()-1) { - do { - valnos.pop_back(); - } while (!valnos.empty() && valnos.back()->isUnused()); - } else { - ValNo->setIsUnused(true); - } + // Now that ValNo is dead, remove it. + markValNoForDeletion(ValNo); } } @@ -345,7 +387,7 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, I->end = Start; // Trim the old interval. // Insert the new one. - ranges.insert(next(I), LiveRange(End, OldEnd, ValNo)); + ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo)); } /// removeValNo - Remove all the ranges defined by the specified value#. @@ -359,21 +401,13 @@ void LiveInterval::removeValNo(VNInfo *ValNo) { if (I->valno == ValNo) ranges.erase(I); } while (I != E); - // Now that ValNo is dead, remove it. If it is the largest value - // number, just nuke it (and any other deleted values neighboring it), - // otherwise mark it as ~1U so it can be nuked later. - if (ValNo->id == getNumValNums()-1) { - do { - valnos.pop_back(); - } while (!valnos.empty() && valnos.back()->isUnused()); - } else { - ValNo->setIsUnused(true); - } + // Now that ValNo is dead, remove it. + markValNoForDeletion(ValNo); } /// getLiveRangeContaining - Return the live range that contains the /// specified index, or null if there is none. -LiveInterval::const_iterator +LiveInterval::const_iterator LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const { const_iterator It = std::upper_bound(begin(), end(), Idx); if (It != ranges.begin()) { @@ -385,7 +419,7 @@ LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const { return end(); } -LiveInterval::iterator +LiveInterval::iterator LiveInterval::FindLiveRangeContaining(SlotIndex Idx) { iterator It = std::upper_bound(begin(), end(), Idx); if (It != begin()) { @@ -393,7 +427,7 @@ LiveInterval::FindLiveRangeContaining(SlotIndex Idx) { if (It->contains(Idx)) return It; } - + return end(); } @@ -425,11 +459,11 @@ VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const { /// the intervals are not joinable, this aborts. void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments, - const int *RHSValNoAssignments, + const int *RHSValNoAssignments, SmallVector<VNInfo*, 16> &NewVNInfo, MachineRegisterInfo *MRI) { // Determine if any of our live range values are mapped. This is uncommon, so - // we want to avoid the interval scan if not. + // we want to avoid the interval scan if not. bool MustMapCurValNos = false; unsigned NumVals = getNumValNums(); unsigned NumNewVals = NewVNInfo.size(); @@ -449,7 +483,7 @@ void LiveInterval::join(LiveInterval &Other, ++OutIt; for (iterator I = OutIt, E = end(); I != E; ++I) { OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]]; - + // If this live range has the same value # as its immediate predecessor, // and if they are neighbors, remove one LiveRange. This happens when we // have [0,3:0)[4,7:1) and map 0/1 onto the same value #. @@ -460,12 +494,12 @@ void LiveInterval::join(LiveInterval &Other, OutIt->start = I->start; OutIt->end = I->end; } - + // Didn't merge, on to the next one. ++OutIt; } } - + // If we merge some live ranges, chop off the end. ranges.erase(OutIt, end()); } @@ -483,7 +517,7 @@ void LiveInterval::join(LiveInterval &Other, if (VNI) { if (NumValNos >= NumVals) valnos.push_back(VNI); - else + else valnos[NumValNos] = VNI; VNI->id = NumValNos++; // Renumber val#. } @@ -502,25 +536,13 @@ void LiveInterval::join(LiveInterval &Other, } ComputeJoinedWeight(Other); - - // Update regalloc hint if currently there isn't one. - if (TargetRegisterInfo::isVirtualRegister(reg) && - TargetRegisterInfo::isVirtualRegister(Other.reg)) { - std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(reg); - if (Hint.first == 0 && Hint.second == 0) { - std::pair<unsigned, unsigned> OtherHint = - MRI->getRegAllocationHint(Other.reg); - if (OtherHint.first || OtherHint.second) - MRI->setRegAllocationHint(reg, OtherHint.first, OtherHint.second); - } - } } /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live /// interval as the specified value number. The LiveRanges in RHS are /// allowed to overlap with LiveRanges in the current interval, but only if /// the overlapping LiveRanges have the specified value number. -void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, +void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, VNInfo *LHSValNo) { // TODO: Make this more efficient. iterator InsertPos = begin(); @@ -569,7 +591,7 @@ void LiveInterval::MergeValueInAsValue( // If this trimmed away the whole range, ignore it. if (Start == End) continue; } - + // Map the valno in the other live range to the current live range. IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP); } @@ -584,18 +606,10 @@ void LiveInterval::MergeValueInAsValue( if (I->valno == V1) { isDead = false; break; - } - if (isDead) { - // Now that V1 is dead, remove it. If it is the largest value number, - // just nuke it (and any other deleted values neighboring it), otherwise - // mark it as ~1U so it can be nuked later. - if (V1->id == getNumValNums()-1) { - do { - valnos.pop_back(); - } while (!valnos.empty() && valnos.back()->isUnused()); - } else { - V1->setIsUnused(true); } + if (isDead) { + // Now that V1 is dead, remove it. + markValNoForDeletion(V1); } } } @@ -609,7 +623,7 @@ void LiveInterval::MergeInClobberRanges(LiveIntervals &li_, const LiveInterval &Clobbers, VNInfo::Allocator &VNInfoAllocator) { if (Clobbers.empty()) return; - + DenseMap<VNInfo*, VNInfo*> ValNoMaps; VNInfo *UnusedValNo = 0; iterator IP = begin(); @@ -679,10 +693,10 @@ void LiveInterval::MergeInClobberRange(LiveIntervals &li_, // for unknown values, use it. VNInfo *ClobberValNo = getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator); - + iterator IP = begin(); IP = std::upper_bound(IP, end(), Start); - + // If the start of this range overlaps with an existing liverange, trim it. if (IP != begin() && IP[-1].end > Start) { Start = IP[-1].end; @@ -695,7 +709,7 @@ void LiveInterval::MergeInClobberRange(LiveIntervals &li_, // If this trimmed away the whole range, ignore it. if (Start == End) return; } - + // Insert the clobber interval. addRangeFrom(LiveRange(Start, End, ClobberValNo), IP); } @@ -722,7 +736,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { for (iterator I = begin(); I != end(); ) { iterator LR = I++; if (LR->valno != V1) continue; // Not a V1 LiveRange. - + // Okay, we found a V1 live range. If it had a previous, touching, V2 live // range, extend it. if (LR != begin()) { @@ -736,11 +750,11 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { LR = Prev; } } - + // Okay, now we have a V1 or V2 live range that is maximally merged forward. // Ensure that it is a V2 live-range. LR->valno = V2; - + // If we can merge it into later V2 live ranges, do so now. We ignore any // following V1 live ranges, as they will be merged in subsequent iterations // of the loop. @@ -752,18 +766,10 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { } } } - - // Now that V1 is dead, remove it. If it is the largest value number, just - // nuke it (and any other deleted values neighboring it), otherwise mark it as - // ~1U so it can be nuked later. - if (V1->id == getNumValNums()-1) { - do { - valnos.pop_back(); - } while (valnos.back()->isUnused()); - } else { - V1->setIsUnused(true); - } - + + // Now that V1 is dead, remove it. + markValNoForDeletion(V1); + return V2; } diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index 194d03d..2726fc3 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -47,7 +47,7 @@ using namespace llvm; // Hidden options for help debugging. -static cl::opt<bool> DisableReMat("disable-rematerialization", +static cl::opt<bool> DisableReMat("disable-rematerialization", cl::init(false), cl::Hidden); STATISTIC(numIntervals , "Number of original intervals"); @@ -55,22 +55,24 @@ STATISTIC(numFolds , "Number of loads/stores folded into instructions"); STATISTIC(numSplits , "Number of intervals split"); char LiveIntervals::ID = 0; -static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis"); +INITIALIZE_PASS(LiveIntervals, "liveintervals", + "Live Interval Analysis", false, false); void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); - AU.addPreserved<LiveVariables>(); AU.addRequired<LiveVariables>(); - AU.addPreservedID(MachineLoopInfoID); + AU.addPreserved<LiveVariables>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); AU.addPreservedID(MachineDominatorsID); - + if (!StrongPHIElim) { AU.addPreservedID(PHIEliminationID); AU.addRequiredID(PHIEliminationID); } - + AU.addRequiredID(TwoAddressInstructionPassID); AU.addPreserved<ProcessImplicitDefs>(); AU.addRequired<ProcessImplicitDefs>(); @@ -84,7 +86,7 @@ void LiveIntervals::releaseMemory() { for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(), E = r2iMap_.end(); I != E; ++I) delete I->second; - + r2iMap_.clear(); // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. @@ -188,10 +190,6 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, const MachineInstr &MI = *I; // Allow copies to and from li.reg - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) - if (SrcReg == li.reg || DstReg == li.reg) - continue; if (MI.isCopy()) if (MI.getOperand(0).getReg() == li.reg || MI.getOperand(1).getReg() == li.reg) @@ -278,7 +276,7 @@ bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { /// isPartialRedef - Return true if the specified def at the specific index is /// partially re-defining the specified live interval. A common case of this is -/// a definition of the sub-register. +/// a definition of the sub-register. bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, LiveInterval &interval) { if (!MO.getSubReg() || MO.isEarlyClobber()) @@ -324,9 +322,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, mi->addRegisterDefined(interval.reg); MachineInstr *CopyMI = NULL; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->isCopyLike() || - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) { + if (mi->isCopyLike()) { CopyMI = mi; } @@ -420,8 +416,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // def-and-use register operand. // It may also be partial redef like this: - // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0 - // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0 + // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0 + // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0 bool PartReDef = isPartialRedef(MIIdx, MO, interval); if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) { // If this is a two-address definition, then we have already processed @@ -454,11 +450,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, OldValNo->setCopy(0); // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ... - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (PartReDef && (mi->isCopyLike() || - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))) + if (PartReDef && mi->isCopyLike()) OldValNo->setCopy(&*mi); - + // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); DEBUG(dbgs() << " replace range with " << LR); @@ -485,12 +479,10 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, VNInfo *ValNo; MachineInstr *CopyMI = NULL; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->isCopyLike() || - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (mi->isCopyLike()) CopyMI = mi; ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); - + SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); @@ -567,10 +559,10 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, goto exit; } } - + baseIndex = baseIndex.getNextIndex(); } - + // The only case we should have a dead physreg here without a killing or // instruction where we know it's dead is if it is live-in to the function // and never used. Another possible case is the implicit use of the @@ -602,9 +594,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, getOrCreateInterval(MO.getReg())); else if (allocatableRegs_[MO.getReg()]) { MachineInstr *CopyMI = NULL; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (MI->isCopyLike() || - tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (MI->isCopyLike()) CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, getOrCreateInterval(MO.getReg()), CopyMI); @@ -696,7 +686,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, /// registers. for some ordering of the machine instructions [1,N] a /// live interval is an interval [i, j) where 1 <= i <= j < N for /// which a variable is live -void LiveIntervals::computeIntervals() { +void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" << "********** Function: " << ((Value*)mf_->getFunction())->getName() << '\n'); @@ -723,11 +713,11 @@ void LiveIntervals::computeIntervals() { handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS), true); } - + // Skip over empty initial indices. if (getInstructionFromIndex(MIIndex) == 0) MIIndex = indexes_->getNextNonNullIndex(MIIndex); - + for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); MI != miEnd; ++MI) { DEBUG(dbgs() << MIIndex << "\t" << *MI); @@ -746,7 +736,7 @@ void LiveIntervals::computeIntervals() { else if (MO.isUndef()) UndefUses.push_back(MO.getReg()); } - + // Move to the next instr slot. MIIndex = indexes_->getNextNonNullIndex(MIIndex); } @@ -791,7 +781,7 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, unsigned Reg = MO.getReg(); if (Reg == 0 || Reg == li.reg) continue; - + if (TargetRegisterInfo::isPhysicalRegister(Reg) && !allocatableRegs_[Reg]) continue; @@ -810,7 +800,7 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, /// which reaches the given instruction also reaches the specified use index. bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, SlotIndex UseIdx) const { - SlotIndex Index = getInstructionIndex(MI); + SlotIndex Index = getInstructionIndex(MI); VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno; LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx); return UI != li.end() && UI->valno == ValNo; @@ -915,7 +905,7 @@ static bool FilterFoldedOps(MachineInstr *MI, } return false; } - + /// tryFoldMemoryOperand - Attempts to fold either a spill / restore from /// slot / to reg or any rematerialized load into ith operand of specified @@ -1035,7 +1025,7 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, /// for addIntervalsForSpills to rewrite uses / defs for the given live range. bool LiveIntervals:: rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, - bool TrySplit, SlotIndex index, SlotIndex end, + bool TrySplit, SlotIndex index, SlotIndex end, MachineInstr *MI, MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, unsigned Slot, int LdSlot, @@ -1094,7 +1084,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, // keep the src/dst regs pinned. // // Keep track of whether we replace a use and/or def so that we can - // create the spill interval with the appropriate range. + // create the spill interval with the appropriate range. SmallVector<unsigned, 2> Ops; tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops); @@ -1156,7 +1146,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (mopj.isImplicit()) rewriteImplicitOps(li, MI, NewVReg, vrm); } - + if (CreatedNewVReg) { if (DefIsReMat) { vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI); @@ -1696,7 +1686,7 @@ addIntervalsForSpills(const LiveInterval &li, if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) { if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT) Slot = vrm.assignVirt2StackSlot(li.reg); - + // This case only occurs when the prealloc splitter has already assigned // a stack slot to this vreg. else @@ -1753,7 +1743,7 @@ addIntervalsForSpills(const LiveInterval &li, Ops.push_back(j); if (MO.isDef()) continue; - if (isReMat || + if (isReMat || (!FoundUse && !alsoFoldARestore(Id, index, VReg, RestoreMBBs, RestoreIdxes))) { // MI has two-address uses of the same register. If the use @@ -1866,7 +1856,6 @@ addIntervalsForSpills(const LiveInterval &li, for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) { LiveInterval *LI = NewLIs[i]; if (!LI->empty()) { - LI->weight /= SlotIndex::NUM * getApproximateInstructionCount(*LI); if (!AddedKill.count(LI)) { LiveRange *LR = &LI->ranges[LI->ranges.size()-1]; SlotIndex LastUseIdx = LR->end.getBaseIndex(); @@ -1899,7 +1888,7 @@ bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const { /// getRepresentativeReg - Find the largest super register of the specified /// physical register. unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const { - // Find the largest super-register that is allocatable. + // Find the largest super-register that is allocatable. unsigned BestReg = Reg; for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) { unsigned SuperReg = *AS; @@ -2013,7 +2002,7 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, SlotIndex(getInstructionIndex(startInst).getDefIndex()), getMBBEndIdx(startInst->getParent()), VN); Interval.addRange(LR); - + return LR; } diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp index 709e2c6..b5c385f 100644 --- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp @@ -25,7 +25,8 @@ using namespace llvm; char LiveStacks::ID = 0; -static RegisterPass<LiveStacks> X("livestacks", "Live Stack Slot Analysis"); +INITIALIZE_PASS(LiveStacks, "livestacks", + "Live Stack Slot Analysis", false, false); void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index 41b891d..375307b 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -42,7 +42,8 @@ using namespace llvm; char LiveVariables::ID = 0; -static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis"); +INITIALIZE_PASS(LiveVariables, "livevars", + "Live Variable Analysis", false, false); void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { @@ -482,21 +483,6 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, } } -namespace { - struct RegSorter { - const TargetRegisterInfo *TRI; - - RegSorter(const TargetRegisterInfo *tri) : TRI(tri) { } - bool operator()(unsigned A, unsigned B) { - if (TRI->isSubRegister(A, B)) - return true; - else if (TRI->isSubRegister(B, A)) - return false; - return A < B; - } - }; -} - bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MRI = &mf.getRegInfo(); diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp new file mode 100644 index 0000000..7e366f0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -0,0 +1,354 @@ +//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass assigns local frame indices to stack slots relative to one another +// and allocates additional base registers to access them when the target +// estimates the are likely to be out of range of stack pointer and frame +// pointer relative addressing. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "localstackalloc" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" + +using namespace llvm; + +STATISTIC(NumAllocations, "Number of frame indices allocated into local block"); +STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated"); +STATISTIC(NumReplacements, "Number of frame indices references replaced"); + +namespace { + class FrameRef { + MachineBasicBlock::iterator MI; // Instr referencing the frame + int64_t LocalOffset; // Local offset of the frame idx referenced + public: + FrameRef(MachineBasicBlock::iterator I, int64_t Offset) : + MI(I), LocalOffset(Offset) {} + bool operator<(const FrameRef &RHS) const { + return LocalOffset < RHS.LocalOffset; + } + MachineBasicBlock::iterator getMachineInstr() { return MI; } + }; + + class LocalStackSlotPass: public MachineFunctionPass { + SmallVector<int64_t,16> LocalOffsets; + + void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset, + bool StackGrowsDown, unsigned &MaxAlign); + void calculateFrameObjectOffsets(MachineFunction &Fn); + bool insertFrameReferenceRegisters(MachineFunction &Fn); + public: + static char ID; // Pass identification, replacement for typeid + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { } + bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + const char *getPassName() const { + return "Local Stack Slot Allocation"; + } + + private: + }; +} // end anonymous namespace + +char LocalStackSlotPass::ID = 0; + +FunctionPass *llvm::createLocalStackSlotAllocationPass() { + return new LocalStackSlotPass(); +} + +bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + unsigned LocalObjectCount = MFI->getObjectIndexEnd(); + + // If the target doesn't want/need this pass, or if there are no locals + // to consider, early exit. + if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0) + return true; + + // Make sure we have enough space to store the local offsets. + LocalOffsets.resize(MFI->getObjectIndexEnd()); + + // Lay out the local blob. + calculateFrameObjectOffsets(MF); + + // Insert virtual base registers to resolve frame index references. + bool UsedBaseRegs = insertFrameReferenceRegisters(MF); + + // Tell MFI whether any base registers were allocated. PEI will only + // want to use the local block allocations from this pass if there were any. + // Otherwise, PEI can do a bit better job of getting the alignment right + // without a hole at the start since it knows the alignment of the stack + // at the start of local allocation, and this pass doesn't. + MFI->setUseLocalStackAllocationBlock(UsedBaseRegs); + + return true; +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, + int FrameIdx, int64_t &Offset, + bool StackGrowsDown, + unsigned &MaxAlign) { + // If the stack grows down, add the object size to find the lowest address. + if (StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + int64_t LocalOffset = StackGrowsDown ? -Offset : Offset; + DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " + << LocalOffset << "\n"); + // Keep the offset available for base register allocation + LocalOffsets[FrameIdx] = LocalOffset; + // And tell MFI about it for PEI to use later + MFI->mapLocalFrameObject(FrameIdx, LocalOffset); + + if (!StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + ++NumAllocations; +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + int64_t Offset = 0; + unsigned MaxAlign = 0; + + // Make sure that the stack protector comes before the local variables on the + // stack. + SmallSet<int, 16> LargeStackObjs; + if (MFI->getStackProtectorIndex() >= 0) { + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset, + StackGrowsDown, MaxAlign); + + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (!MFI->MayNeedStackProtector(i)) + continue; + + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + LargeStackObjs.insert(i); + } + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (LargeStackObjs.count(i)) + continue; + + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + } + + // Remember how big this blob of stack space is + MFI->setLocalFrameSize(Offset); + MFI->setLocalFrameMaxAlign(MaxAlign); +} + +static inline bool +lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs, + std::pair<unsigned, int64_t> &RegOffset, + int64_t FrameSizeAdjust, + int64_t LocalFrameOffset, + const MachineInstr *MI, + const TargetRegisterInfo *TRI) { + unsigned e = Regs.size(); + for (unsigned i = 0; i < e; ++i) { + RegOffset = Regs[i]; + // Check if the relative offset from the where the base register references + // to the target address is in range for the instruction. + int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second; + if (TRI->isFrameOffsetLegal(MI, Offset)) + return true; + } + return false; +} + +bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { + // Scan the function's instructions looking for frame index references. + // For each, ask the target if it wants a virtual base register for it + // based on what we can tell it about where the local will end up in the + // stack frame. If it wants one, re-use a suitable one we've previously + // allocated, or if there isn't one that fits the bill, allocate a new one + // and ask the target to create a defining instruction for it. + bool UsedBaseReg = false; + + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + MachineBasicBlock::iterator InsertionPt = Fn.begin()->begin(); + + // Collect all of the instructions in the block that reference + // a frame index. Also store the frame index referenced to ease later + // lookup. (For any insn that has more than one FI reference, we arbitrarily + // choose the first one). + SmallVector<FrameRef, 64> FrameReferenceInsns; + // A base register definition is a register+offset pair. + SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters; + + + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + MachineInstr *MI = I; + // Debug value instructions can't be out of range, so they don't need + // any updates. + if (MI->isDebugValue()) + continue; + // For now, allocate the base register(s) within the basic block + // where they're used, and don't try to keep them around outside + // of that. It may be beneficial to try sharing them more broadly + // than that, but the increased register pressure makes that a + // tricky thing to balance. Investigate if re-materializing these + // becomes an issue. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + // Consider replacing all frame index operands that reference + // an object allocated in the local block. + if (MI->getOperand(i).isFI()) { + // Don't try this with values not in the local block. + if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex())) + break; + FrameReferenceInsns. + push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()])); + break; + } + } + } + } + // Sort the frame references by local offset + array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); + + + // Loop throught the frame references and allocate for them as necessary + for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) { + MachineBasicBlock::iterator I = + FrameReferenceInsns[ref].getMachineInstr(); + MachineInstr *MI = I; + for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) { + // Consider replacing all frame index operands that reference + // an object allocated in the local block. + if (MI->getOperand(idx).isFI()) { + int FrameIdx = MI->getOperand(idx).getIndex(); + + assert(MFI->isObjectPreAllocated(FrameIdx) && + "Only pre-allocated locals expected!"); + + DEBUG(dbgs() << "Considering: " << *MI); + if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) { + unsigned BaseReg = 0; + int64_t Offset = 0; + int64_t FrameSizeAdjust = + StackGrowsDown ? MFI->getLocalFrameSize() : 0; + + DEBUG(dbgs() << " Replacing FI in: " << *MI); + + // If we have a suitable base register available, use it; otherwise + // create a new one. Note that any offset encoded in the + // instruction itself will be taken into account by the target, + // so we don't have to adjust for it here when reusing a base + // register. + std::pair<unsigned, int64_t> RegOffset; + if (lookupCandidateBaseReg(BaseRegisters, RegOffset, + FrameSizeAdjust, + LocalOffsets[FrameIdx], + MI, TRI)) { + DEBUG(dbgs() << " Reusing base register " << + RegOffset.first << "\n"); + // We found a register to reuse. + BaseReg = RegOffset.first; + Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] - + RegOffset.second; + } else { + // No previously defined register was in range, so create a + // new one. + int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); + const TargetRegisterClass *RC = TRI->getPointerRegClass(); + BaseReg = Fn.getRegInfo().createVirtualRegister(RC); + + DEBUG(dbgs() << " Materializing base register " << BaseReg << + " at frame local offset " << + LocalOffsets[FrameIdx] + InstrOffset << "\n"); + // Tell the target to insert the instruction to initialize + // the base register. + TRI->materializeFrameBaseRegister(InsertionPt, BaseReg, + FrameIdx, InstrOffset); + + // The base register already includes any offset specified + // by the instruction, so account for that so it doesn't get + // applied twice. + Offset = -InstrOffset; + + int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] + + InstrOffset; + BaseRegisters.push_back( + std::pair<unsigned, int64_t>(BaseReg, BaseOffset)); + ++NumBaseRegisters; + UsedBaseReg = true; + } + assert(BaseReg != 0 && "Unable to allocate virtual base register!"); + + // Modify the instruction to use the new base register rather + // than the frame index operand. + TRI->resolveFrameIndex(I, BaseReg, Offset); + DEBUG(dbgs() << "Resolved: " << *MI); + + ++NumReplacements; + } + } + } + } + return UsedBaseReg; +} diff --git a/contrib/llvm/lib/CodeGen/LowerSubregs.cpp b/contrib/llvm/lib/CodeGen/LowerSubregs.cpp index dfd4eae..ad1c537 100644 --- a/contrib/llvm/lib/CodeGen/LowerSubregs.cpp +++ b/contrib/llvm/lib/CodeGen/LowerSubregs.cpp @@ -36,7 +36,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - LowerSubregsInstructionPass() : MachineFunctionPass(&ID) {} + LowerSubregsInstructionPass() : MachineFunctionPass(ID) {} const char *getPassName() const { return "Subregister lowering instruction pass"; @@ -58,9 +58,6 @@ namespace { void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, const TargetRegisterInfo *TRI); - void TransferKillFlag(MachineInstr *MI, unsigned SrcReg, - const TargetRegisterInfo *TRI, - bool AddIfNotFound = false); void TransferImplicitDefs(MachineInstr *MI); }; @@ -87,23 +84,6 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, } } -/// TransferKillFlag - MI is a pseudo-instruction with SrcReg killed, -/// and the lowered replacement instructions immediately precede it. -/// Mark the replacement instructions with the kill flag. -void -LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI, - unsigned SrcReg, - const TargetRegisterInfo *TRI, - bool AddIfNotFound) { - for (MachineBasicBlock::iterator MII = - prior(MachineBasicBlock::iterator(MI)); ; --MII) { - if (MII->addRegisterKilled(SrcReg, TRI, AddIfNotFound)) - break; - assert(MII != MI->getParent()->begin() && - "copyPhysReg output doesn't reference source register!"); - } -} - /// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered /// replacement instructions immediately precede it. Copy any implicit-def /// operands from MI to the replacement instruction. diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index a27ee47..50f3f67 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -441,7 +441,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB); - DEBUG(dbgs() << "PHIElimination splitting critical edge:" + DEBUG(dbgs() << "Splitting critical edge:" " BB#" << getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); @@ -468,11 +468,33 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LV->addNewBlock(NMBB, this, Succ); if (MachineDominatorTree *MDT = - P->getAnalysisIfAvailable<MachineDominatorTree>()) - MDT->addNewBlock(NMBB, this); + P->getAnalysisIfAvailable<MachineDominatorTree>()) { + // Update dominator information. + MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ); + + bool IsNewIDom = true; + for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end(); + PI != E; ++PI) { + MachineBasicBlock *PredBB = *PI; + if (PredBB == NMBB) + continue; + if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) { + IsNewIDom = false; + break; + } + } + + // We know "this" dominates the newly created basic block. + MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this); + + // If all the other predecessors of "Succ" are dominated by "Succ" itself + // then the new block is the new immediate dominator of "Succ". Otherwise, + // the new block doesn't dominate anything. + if (IsNewIDom) + MDT->changeImmediateDominator(SucccDTNode, NewDTNode); + } - if (MachineLoopInfo *MLI = - P->getAnalysisIfAvailable<MachineLoopInfo>()) + if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) if (MachineLoop *TIL = MLI->getLoopFor(this)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 833cc00..92e2299 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -41,7 +41,7 @@ namespace { MachineRegisterInfo *MRI; public: static char ID; // Pass identification - MachineCSE() : MachineFunctionPass(&ID), LookAheadLimit(5), CurrVN(0) {} + MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -49,6 +49,7 @@ namespace { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); + AU.addPreservedID(MachineLoopInfoID); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); } @@ -85,8 +86,8 @@ namespace { } // end anonymous namespace char MachineCSE::ID = 0; -static RegisterPass<MachineCSE> -X("machine-cse", "Machine Common Subexpression Elimination"); +INITIALIZE_PASS(MachineCSE, "machine-cse", + "Machine Common Subexpression Elimination", false, false); FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } @@ -107,29 +108,9 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, MachineInstr *DefMI = MRI->getVRegDef(Reg); if (DefMI->getParent() != MBB) continue; - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - TargetRegisterInfo::isVirtualRegister(SrcReg) && - !SrcSubIdx && !DstSubIdx) { - const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); - if (!NewRC) - continue; - DEBUG(dbgs() << "Coalescing: " << *DefMI); - DEBUG(dbgs() << "*** to: " << *MI); - MO.setReg(SrcReg); - MRI->clearKillFlags(SrcReg); - if (NewRC != SRC) - MRI->setRegClass(SrcReg, NewRC); - DefMI->eraseFromParent(); - ++NumCoalesces; - Changed = true; - } - if (!DefMI->isCopy()) continue; - SrcReg = DefMI->getOperand(1).getReg(); + unsigned SrcReg = DefMI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) continue; if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) @@ -261,19 +242,13 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, return false; } -static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) { - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - return MI->isCopyLike() || - TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); -} - bool MachineCSE::isCSECandidate(MachineInstr *MI) { if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) return false; // Ignore copies. - if (isCopy(MI, TII)) + if (MI->isCopyLike()) return false; // Ignore stuff that we obviously can't move. @@ -329,7 +304,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, E = MRI->use_nodbg_end(); I != E; ++I) { MachineInstr *Use = &*I; // Ignore copies. - if (!isCopy(Use, TII)) { + if (!Use->isCopyLike()) { HasNonCopyUse = true; break; } @@ -385,7 +360,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Look for trivial copy coalescing opportunities. if (PerformTrivialCoalescing(MI, MBB)) { // After coalescing MI itself may become a copy. - if (isCopy(MI, TII)) + if (MI->isCopyLike()) continue; FoundCSE = VNT.count(MI); } diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp index b5f8fbb..3c67478 100644 --- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp +++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp @@ -24,10 +24,10 @@ TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>); char MachineDominatorTree::ID = 0; -static RegisterPass<MachineDominatorTree> -E("machinedomtree", "MachineDominator Tree Construction", true); +INITIALIZE_PASS(MachineDominatorTree, "machinedomtree", + "MachineDominator Tree Construction", true, true); -const PassInfo *const llvm::MachineDominatorsID = &E; +char &llvm::MachineDominatorsID = MachineDominatorTree::ID; void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -41,7 +41,7 @@ bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { } MachineDominatorTree::MachineDominatorTree() - : MachineFunctionPass(&ID) { + : MachineFunctionPass(ID) { DT = new DominatorTreeBase<MachineBasicBlock>(false); } diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index 666120f..0171700 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -397,7 +397,6 @@ void MachineFunction::viewCFGOnly() const /// create a corresponding virtual register for it. unsigned MachineFunction::addLiveIn(unsigned PReg, const TargetRegisterClass *RC) { - assert(RC->contains(PReg) && "Not the correct regclass!"); MachineRegisterInfo &MRI = getRegInfo(); unsigned VReg = MRI.getLiveInVirtReg(PReg); if (VReg) { @@ -447,7 +446,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, unsigned StackAlign = TFI.getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/false)); + /*isSS*/false, false)); return -++NumFixedObjects; } diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp index 07a0f45..4f84b95 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -20,14 +20,14 @@ using namespace llvm; // a default constructor. static PassInfo X("Machine Function Analysis", "machine-function-analysis", - intptr_t(&MachineFunctionAnalysis::ID), 0, + &MachineFunctionAnalysis::ID, 0, /*CFGOnly=*/false, /*is_analysis=*/true); char MachineFunctionAnalysis::ID = 0; MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm, CodeGenOpt::Level OL) : - FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) { + FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) { } MachineFunctionAnalysis::~MachineFunctionAnalysis() { diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp index 547c4fe..2aaa798 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -29,7 +29,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { const std::string Banner; MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) - : MachineFunctionPass(&ID), OS(os), Banner(banner) {} + : MachineFunctionPass(ID), OS(os), Banner(banner) {} const char *getPassName() const { return "MachineFunction Printer"; } diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 6b2e985..446e461 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -1236,12 +1236,18 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. const MachineFunction *MF = 0; + const MachineRegisterInfo *MRI = 0; if (const MachineBasicBlock *MBB = getParent()) { MF = MBB->getParent(); if (!TM && MF) TM = &MF->getTarget(); + if (MF) + MRI = &MF->getRegInfo(); } + // Save a list of virtual registers. + SmallVector<unsigned, 8> VirtRegs; + // Print explicitly defined operands on the left of an assignment syntax. unsigned StartOp = 0, e = getNumOperands(); for (; StartOp < e && getOperand(StartOp).isReg() && @@ -1250,6 +1256,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { ++StartOp) { if (StartOp != 0) OS << ", "; getOperand(StartOp).print(OS, TM); + unsigned Reg = getOperand(StartOp).getReg(); + if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) + VirtRegs.push_back(Reg); } if (StartOp != 0) @@ -1264,6 +1273,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + VirtRegs.push_back(MO.getReg()); + // Omit call-clobbered registers which aren't used anywhere. This makes // call instructions much less noisy on targets where calls clobber lots // of registers. Don't rely on MO.isDead() because we may be called before @@ -1325,11 +1338,29 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); i != e; ++i) { OS << **i; - if (next(i) != e) + if (llvm::next(i) != e) OS << " "; } } + // Print the regclass of any virtual registers encountered. + if (MRI && !VirtRegs.empty()) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + for (unsigned i = 0; i != VirtRegs.size(); ++i) { + const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); + OS << " " << RC->getName() << ":%reg" << VirtRegs[i]; + for (unsigned j = i+1; j != VirtRegs.size();) { + if (MRI->getRegClass(VirtRegs[j]) != RC) { + ++j; + continue; + } + if (VirtRegs[i] != VirtRegs[j]) + OS << "," << VirtRegs[j]; + VirtRegs.erase(VirtRegs.begin()+j); + } + } + } + if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; OS << " dbg:"; diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index 4c054f5..1a74b74 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -68,16 +68,16 @@ namespace { BitVector AllocatableSet; - // For each opcode, keep a list of potentail CSE instructions. + // For each opcode, keep a list of potential CSE instructions. DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; public: static char ID; // Pass identification, replacement for typeid MachineLICM() : - MachineFunctionPass(&ID), PreRegAlloc(true) {} + MachineFunctionPass(ID), PreRegAlloc(true) {} explicit MachineLICM(bool PreRA) : - MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} + MachineFunctionPass(ID), PreRegAlloc(PreRA) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -189,8 +189,8 @@ namespace { } // end anonymous namespace char MachineLICM::ID = 0; -static RegisterPass<MachineLICM> -X("machinelicm", "Machine Loop Invariant Code Motion"); +INITIALIZE_PASS(MachineLICM, "machinelicm", + "Machine Loop Invariant Code Motion", false, false); FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { return new MachineLICM(PreRegAlloc); @@ -488,9 +488,14 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) { MII = NextMII; } - const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); - for (unsigned I = 0, E = Children.size(); I != E; ++I) - HoistRegion(Children[I]); + // Don't hoist things out of a large switch statement. This often causes + // code to be hoisted that wasn't going to be executed, and increases + // register pressure in a situation where it's likely to matter. + if (BB->succ_size() < 25) { + const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); + for (unsigned I = 0, E = Children.size(); I != E; ++I) + HoistRegion(Children[I]); + } } /// IsLICMCandidate - Returns true if the instruction may be a suitable diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp index 269538b..bca4b0c 100644 --- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -30,10 +30,10 @@ TEMPLATE_INSTANTIATION(MLIB); } char MachineLoopInfo::ID = 0; -static RegisterPass<MachineLoopInfo> -X("machine-loops", "Machine Natural Loop Construction", true); +INITIALIZE_PASS(MachineLoopInfo, "machine-loops", + "Machine Natural Loop Construction", true, true); -const PassInfo *const llvm::MachineLoopInfoID = &X; +char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { releaseMemory(); diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 15778b4..b647a4d 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -28,8 +28,8 @@ using namespace llvm; using namespace llvm::dwarf; // Handle the Pass registration stuff necessary to use TargetData's. -static RegisterPass<MachineModuleInfo> -X("machinemoduleinfo", "Machine Module Information"); +INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", + "Machine Module Information", false, false); char MachineModuleInfo::ID = 0; // Out of line virtual method. @@ -254,7 +254,7 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { //===----------------------------------------------------------------------===// MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI) -: ImmutablePass(&ID), Context(MAI), +: ImmutablePass(ID), Context(MAI), ObjFileMMI(0), CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){ // Always emit some info, by default "no personality" info. @@ -264,7 +264,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI) } MachineModuleInfo::MachineModuleInfo() -: ImmutablePass(&ID), Context(*(MCAsmInfo*)0) { +: ImmutablePass(ID), Context(*(MCAsmInfo*)0) { assert(0 && "This MachineModuleInfo constructor should never be called, MMI " "should always be explicitly constructed by LLVMTargetMachine"); abort(); @@ -579,10 +579,3 @@ namespace { } }; } - -MachineModuleInfo::VariableDbgInfoMapTy & -MachineModuleInfo::getVariableDbgInfo() { - std::stable_sort(VariableDbgInfo.begin(), VariableDbgInfo.end(), - VariableDebugSorter()); - return VariableDbgInfo; -} diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 61334fc..c8f8faf 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -26,11 +26,21 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -STATISTIC(NumSunk, "Number of machine instructions sunk"); +static cl::opt<bool> +SplitEdges("machine-sink-split", + cl::desc("Split critical edges during machine sinking"), + cl::init(false), cl::Hidden); +static cl::opt<unsigned> +SplitLimit("split-limit", + cl::init(~0u), cl::Hidden); + +STATISTIC(NumSunk, "Number of machine instructions sunk"); +STATISTIC(NumSplit, "Number of critical edges split"); namespace { class MachineSinking : public MachineFunctionPass { @@ -44,7 +54,7 @@ namespace { public: static char ID; // Pass identification - MachineSinking() : MachineFunctionPass(&ID) {} + MachineSinking() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -59,21 +69,28 @@ namespace { } private: bool ProcessBlock(MachineBasicBlock &MBB); + MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *From, + MachineBasicBlock *To); bool SinkInstruction(MachineInstr *MI, bool &SawStore); - bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const; + bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, bool &LocalUse) const; }; } // end anonymous namespace char MachineSinking::ID = 0; -static RegisterPass<MachineSinking> -X("machine-sink", "Machine code sinking"); +INITIALIZE_PASS(MachineSinking, "machine-sink", + "Machine code sinking", false, false); FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); } /// AllUsesDominatedByBlock - Return true if all uses of the specified register -/// occur in blocks dominated by the specified block. +/// occur in blocks dominated by the specified block. If any use is in the +/// definition block, then return false since it is never legal to move def +/// after uses. bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, - MachineBasicBlock *MBB) const { + MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, + bool &LocalUse) const { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); // Ignoring debug uses is necessary so debug info doesn't affect the code. @@ -91,6 +108,9 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // PHI nodes use the operand in the predecessor block, not the block with // the PHI. UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); + } else if (UseBlock == DefMBB) { + LocalUse = true; + return false; } // Check that it dominates. @@ -166,6 +186,66 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { return MadeChange; } +MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB) { + // Avoid breaking back edge. From == To means backedge for single BB loop. + if (!SplitEdges || NumSplit == SplitLimit || FromBB == ToBB) + return 0; + + // Check for more "complex" loops. + if (LI->getLoopFor(FromBB) != LI->getLoopFor(ToBB) || + !LI->isLoopHeader(ToBB)) { + // It's not always legal to break critical edges and sink the computation + // to the edge. + // + // BB#1: + // v1024 + // Beq BB#3 + // <fallthrough> + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted: + // + // BB#1: + // ... + // Bne BB#2 + // BB#4: + // v1024 = + // B BB#3 + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3 + // flow. We need to ensure the new basic block where the computation is + // sunk to dominates all the uses. + // It's only legal to break critical edge and sink the computation to the + // new block if all the predecessors of "To", except for "From", are + // not dominated by "From". Given SSA property, this means these + // predecessors are dominated by "To". + for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(), + E = ToBB->pred_end(); PI != E; ++PI) { + if (*PI == FromBB) + continue; + if (!DT->dominates(ToBB, *PI)) + return 0; + } + + // FIXME: Determine if it's cost effective to break this edge. + return FromBB->SplitCriticalEdge(ToBB, this); + } + + return 0; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { @@ -246,7 +326,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (SuccToSinkTo) { // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. - if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo)) + bool LocalUse = false; + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, LocalUse)) return false; continue; @@ -256,10 +337,14 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // we should sink to. for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), E = ParentBlock->succ_end(); SI != E; ++SI) { - if (AllUsesDominatedByBlock(Reg, *SI)) { + bool LocalUse = false; + if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, LocalUse)) { SuccToSinkTo = *SI; break; } + if (LocalUse) + // Def is used locally, it's never safe to move this def. + return false; } // If we couldn't find a block to sink to, ignore this instruction. @@ -303,27 +388,44 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. + bool TryBreak = false; bool store = true; if (!MI->isSafeToMove(TII, AA, store)) { - DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n"); - return false; + DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); + TryBreak = true; } // We don't want to sink across a critical edge if we don't dominate the // successor. We could be introducing calculations to new code paths. - if (!DT->dominates(ParentBlock, SuccToSinkTo)) { - DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); - return false; + if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { + DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); + TryBreak = true; } // Don't sink instructions into a loop. - if (LI->isLoopHeader(SuccToSinkTo)) { - DEBUG(dbgs() << " *** PUNTING: Loop header found\n"); - return false; + if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { + DEBUG(dbgs() << " *** NOTE: Loop header found\n"); + TryBreak = true; } // Otherwise we are OK with sinking along a critical edge. - DEBUG(dbgs() << "Sinking along critical edge.\n"); + if (!TryBreak) + DEBUG(dbgs() << "Sinking along critical edge.\n"); + else { + MachineBasicBlock *NewSucc = SplitCriticalEdge(ParentBlock, SuccToSinkTo); + if (!NewSucc) { + DEBUG(dbgs() << + " *** PUNTING: Not legal or profitable to break critical edge\n"); + return false; + } else { + DEBUG(dbgs() << " *** Splitting critical edge:" + " BB#" << ParentBlock->getNumber() + << " -- BB#" << NewSucc->getNumber() + << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); + SuccToSinkTo = NewSucc; + ++NumSplit; + } + } } // Determine where to insert into. Skip phi nodes. diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index 2297c90..1e88562 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1,4 +1,4 @@ -//===-- MachineVerifier.cpp - Machine Code Verifier -------------*- C++ -*-===// +//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===// // // The LLVM Compiler Infrastructure // @@ -24,6 +24,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -44,19 +45,14 @@ using namespace llvm; namespace { struct MachineVerifier { - MachineVerifier(Pass *pass, bool allowDoubleDefs) : + MachineVerifier(Pass *pass) : PASS(pass), - allowVirtDoubleDefs(allowDoubleDefs), - allowPhysDoubleDefs(true), OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) {} bool runOnMachineFunction(MachineFunction &MF); Pass *const PASS; - const bool allowVirtDoubleDefs; - const bool allowPhysDoubleDefs; - const char *const OutFileName; raw_ostream *OS; const MachineFunction *MF; @@ -91,10 +87,6 @@ namespace { // defined. Map value is the user. RegMap vregsLiveIn; - // Vregs that must be dead in because they are defined without being - // killed first. Map value is the defining instruction. - RegMap vregsDeadIn; - // Regs killed in MBB. They may be defined again, and will then be in both // regsKilled and regsLiveOut. RegSet regsKilled; @@ -175,6 +167,7 @@ namespace { // Analysis information if available LiveVariables *LiveVars; + const LiveIntervals *LiveInts; void visitMachineFunctionBefore(); void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); @@ -195,15 +188,14 @@ namespace { void calcRegsRequired(); void verifyLiveVariables(); + void verifyLiveIntervals(); }; struct MachineVerifierPass : public MachineFunctionPass { static char ID; // Pass ID, replacement for typeid - bool AllowDoubleDefs; - explicit MachineVerifierPass(bool allowDoubleDefs = false) - : MachineFunctionPass(&ID), - AllowDoubleDefs(allowDoubleDefs) {} + MachineVerifierPass() + : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -211,7 +203,7 @@ namespace { } bool runOnMachineFunction(MachineFunction &MF) { - MF.verify(this, AllowDoubleDefs); + MF.verify(this); return false; } }; @@ -219,17 +211,15 @@ namespace { } char MachineVerifierPass::ID = 0; -static RegisterPass<MachineVerifierPass> -MachineVer("machineverifier", "Verify generated machine code"); -static const PassInfo *const MachineVerifyID = &MachineVer; +INITIALIZE_PASS(MachineVerifierPass, "machineverifier", + "Verify generated machine code", false, false); -FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) { - return new MachineVerifierPass(allowPhysDoubleDefs); +FunctionPass *llvm::createMachineVerifierPass() { + return new MachineVerifierPass(); } -void MachineFunction::verify(Pass *p, bool allowDoubleDefs) const { - MachineVerifier(p, allowDoubleDefs) - .runOnMachineFunction(const_cast<MachineFunction&>(*this)); +void MachineFunction::verify(Pass *p) const { + MachineVerifier(p).runOnMachineFunction(const_cast<MachineFunction&>(*this)); } bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { @@ -255,10 +245,13 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { TRI = TM->getRegisterInfo(); MRI = &MF.getRegInfo(); + LiveVars = NULL; + LiveInts = NULL; if (PASS) { - LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>(); - } else { - LiveVars = NULL; + LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>(); + // We don't want to verify LiveVariables if LiveIntervals is available. + if (!LiveInts) + LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>(); } visitMachineFunctionBefore(); @@ -512,6 +505,20 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if ((*I)->isStore() && !TI.mayStore()) report("Missing mayStore flag", MI); } + + // Debug values must not have a slot index. + // Other instructions must have one. + if (LiveInts) { + bool mapped = !LiveInts->isNotInMIMap(MI); + if (MI->isDebugValue()) { + if (mapped) + report("Debug instruction has a slot index", MI); + } else { + if (!mapped) + report("Missing slot index", MI); + } + } + } void @@ -570,15 +577,30 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } else isKill = MO->isKill(); - if (isKill) { + if (isKill) addRegWithSubRegs(regsKilled, Reg); - // Check that LiveVars knows this kill - if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg)) { - LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); - if (std::find(VI.Kills.begin(), - VI.Kills.end(), MI) == VI.Kills.end()) - report("Kill missing from LiveVariables", MO, MONum); + // Check that LiveVars knows this kill. + if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && + MO->isKill()) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + if (std::find(VI.Kills.begin(), + VI.Kills.end(), MI) == VI.Kills.end()) + report("Kill missing from LiveVariables", MO, MONum); + } + + // Check LiveInts liveness and kill. + if (LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex(); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (!LI.liveAt(UseIdx)) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // TODO: Verify isKill == LI.killedAt. + } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + report("Virtual register has no Live interval", MO, MONum); } } @@ -607,6 +629,28 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { addRegWithSubRegs(regsDead, Reg); else addRegWithSubRegs(regsDefined, Reg); + + // Check LiveInts for a live range, but only for virtual registers. + if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && + !LiveInts->isNotInMIMap(MI)) { + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex(); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) { + assert(LR->valno && "NULL valno is not allowed"); + if (LR->valno->def != DefIdx) { + report("Inconsistent valno->def", MO, MONum); + *OS << "Valno " << LR->valno->id << " is not defined at " + << DefIdx << " in " << LI << '\n'; + } + } else { + report("No live range at def", MO, MONum); + *OS << DefIdx << " is not live in " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } } // Check register classes. @@ -670,40 +714,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; set_union(MInfo.regsKilled, regsKilled); - set_subtract(regsLive, regsKilled); - regsKilled.clear(); - - // Verify that both <def> and <def,dead> operands refer to dead registers. - RegVector defs(regsDefined); - defs.append(regsDead.begin(), regsDead.end()); - - for (RegVector::const_iterator I = defs.begin(), E = defs.end(); - I != E; ++I) { - if (regsLive.count(*I)) { - if (TargetRegisterInfo::isPhysicalRegister(*I)) { - if (!allowPhysDoubleDefs && !isReserved(*I) && - !regsLiveInButUnused.count(*I)) { - report("Redefining a live physical register", MI); - *OS << "Register " << TRI->getName(*I) - << " was defined but already live.\n"; - } - } else { - if (!allowVirtDoubleDefs) { - report("Redefining a live virtual register", MI); - *OS << "Virtual register %reg" << *I - << " was defined but already live.\n"; - } - } - } else if (TargetRegisterInfo::isVirtualRegister(*I) && - !MInfo.regsKilled.count(*I)) { - // Virtual register defined without being killed first must be dead on - // entry. - MInfo.vregsDeadIn.insert(std::make_pair(*I, MI)); - } - } - - set_subtract(regsLive, regsDead); regsDead.clear(); - set_union(regsLive, regsDefined); regsDefined.clear(); + set_subtract(regsLive, regsKilled); regsKilled.clear(); + set_subtract(regsLive, regsDead); regsDead.clear(); + set_union(regsLive, regsDefined); regsDefined.clear(); } void @@ -828,35 +841,15 @@ void MachineVerifier::visitMachineFunctionAfter() { continue; checkPHIOps(MFI); - - // Verify dead-in virtual registers. - if (!allowVirtDoubleDefs) { - for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), - PrE = MFI->pred_end(); PrI != PrE; ++PrI) { - BBInfo &PrInfo = MBBInfoMap[*PrI]; - if (!PrInfo.reachable) - continue; - - for (RegMap::iterator I = MInfo.vregsDeadIn.begin(), - E = MInfo.vregsDeadIn.end(); I != E; ++I) { - // DeadIn register must be in neither regsLiveOut or vregsPassed of - // any predecessor. - if (PrInfo.isLiveOut(I->first)) { - report("Live-in virtual register redefined", I->second); - *OS << "Register %reg" << I->first - << " was live-out from predecessor MBB #" - << (*PrI)->getNumber() << ".\n"; - } - } - } - } } - // Now check LiveVariables info if available - if (LiveVars) { + // Now check liveness info if available + if (LiveVars || LiveInts) calcRegsRequired(); + if (LiveVars) verifyLiveVariables(); - } + if (LiveInts) + verifyLiveIntervals(); } void MachineVerifier::verifyLiveVariables() { @@ -886,4 +879,55 @@ void MachineVerifier::verifyLiveVariables() { } } +void MachineVerifier::verifyLiveIntervals() { + assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); + for (LiveIntervals::const_iterator LVI = LiveInts->begin(), + LVE = LiveInts->end(); LVI != LVE; ++LVI) { + const LiveInterval &LI = *LVI->second; + assert(LVI->first == LI.reg && "Invalid reg to interval mapping"); + + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I!=E; ++I) { + VNInfo *VNI = *I; + const LiveRange *DefLR = LI.getLiveRangeContaining(VNI->def); + + if (!DefLR) { + if (!VNI->isUnused()) { + report("Valno not live at def and not marked unused", MF); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + } + continue; + } + + if (VNI->isUnused()) + continue; + + if (DefLR->valno != VNI) { + report("Live range at def has different valno", MF); + DefLR->print(*OS); + *OS << " should use valno #" << VNI->id << " in " << LI << '\n'; + } + + } + + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) { + const LiveRange &LR = *I; + assert(LR.valno && "Live range has no valno"); + + if (LR.valno->id >= LI.getNumValNums() || + LR.valno != LI.getValNumInfo(LR.valno->id)) { + report("Foreign valno in live range", MF); + LR.print(*OS); + *OS << " has a valno not in " << LI << '\n'; + } + + if (LR.valno->isUnused()) { + report("Live range valno is marked unused", MF); + LR.print(*OS); + *OS << " in " << LI << '\n'; + } + + } + } +} diff --git a/contrib/llvm/lib/CodeGen/OptimizeExts.cpp b/contrib/llvm/lib/CodeGen/OptimizeExts.cpp deleted file mode 100644 index dcdc243..0000000 --- a/contrib/llvm/lib/CodeGen/OptimizeExts.cpp +++ /dev/null @@ -1,220 +0,0 @@ -//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass performs optimization of sign / zero extension instructions. It -// may be extended to handle other instructions of similar property. -// -// On some targets, some instructions, e.g. X86 sign / zero extension, may -// leave the source value in the lower part of the result. This pass will -// replace (some) uses of the pre-extension value with uses of the sub-register -// of the results. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ext-opt" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden, - cl::desc("Aggressive extension optimization")); - -STATISTIC(NumReuse, "Number of extension results reused"); - -namespace { - class OptimizeExts : public MachineFunctionPass { - const TargetMachine *TM; - const TargetInstrInfo *TII; - MachineRegisterInfo *MRI; - MachineDominatorTree *DT; // Machine dominator tree - - public: - static char ID; // Pass identification - OptimizeExts() : MachineFunctionPass(&ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - if (Aggressive) { - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); - } - } - - private: - bool OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &LocalMIs); - }; -} - -char OptimizeExts::ID = 0; -static RegisterPass<OptimizeExts> -X("opt-exts", "Optimize sign / zero extensions"); - -FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); } - -/// OptimizeInstr - If instruction is a copy-like instruction, i.e. it reads -/// a single register and writes a single register and it does not modify -/// the source, and if the source value is preserved as a sub-register of -/// the result, then replace all reachable uses of the source with the subreg -/// of the result. -/// Do not generate an EXTRACT that is used only in a debug use, as this -/// changes the code. Since this code does not currently share EXTRACTs, just -/// ignore all debug uses. -bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &LocalMIs) { - bool Changed = false; - LocalMIs.insert(MI); - - unsigned SrcReg, DstReg, SubIdx; - if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) { - if (TargetRegisterInfo::isPhysicalRegister(DstReg) || - TargetRegisterInfo::isPhysicalRegister(SrcReg)) - return false; - - MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); - if (++UI == MRI->use_nodbg_end()) - // No other uses. - return false; - - // Ok, the source has other uses. See if we can replace the other uses - // with use of the result of the extension. - SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; - UI = MRI->use_nodbg_begin(DstReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); - UI != UE; ++UI) - ReachedBBs.insert(UI->getParent()); - - bool ExtendLife = true; - // Uses that are in the same BB of uses of the result of the instruction. - SmallVector<MachineOperand*, 8> Uses; - // Uses that the result of the instruction can reach. - SmallVector<MachineOperand*, 8> ExtendedUses; - - UI = MRI->use_nodbg_begin(SrcReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); - UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; - if (UseMI == MI) - continue; - if (UseMI->isPHI()) { - ExtendLife = false; - continue; - } - - // It's an error to translate this: - // - // %reg1025 = <sext> %reg1024 - // ... - // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 - // - // into this: - // - // %reg1025 = <sext> %reg1024 - // ... - // %reg1027 = COPY %reg1025:4 - // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 - // - // The problem here is that SUBREG_TO_REG is there to assert that an - // implicit zext occurs. It doesn't insert a zext instruction. If we allow - // the COPY here, it will give us the value after the <sext>, - // not the original value of %reg1024 before <sext>. - if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) - continue; - - MachineBasicBlock *UseMBB = UseMI->getParent(); - if (UseMBB == MBB) { - // Local uses that come after the extension. - if (!LocalMIs.count(UseMI)) - Uses.push_back(&UseMO); - } else if (ReachedBBs.count(UseMBB)) - // Non-local uses where the result of extension is used. Always - // replace these unless it's a PHI. - Uses.push_back(&UseMO); - else if (Aggressive && DT->dominates(MBB, UseMBB)) - // We may want to extend live range of the extension result in order - // to replace these uses. - ExtendedUses.push_back(&UseMO); - else { - // Both will be live out of the def MBB anyway. Don't extend live - // range of the extension result. - ExtendLife = false; - break; - } - } - - if (ExtendLife && !ExtendedUses.empty()) - // Ok, we'll extend the liveness of the extension result. - std::copy(ExtendedUses.begin(), ExtendedUses.end(), - std::back_inserter(Uses)); - - // Now replace all uses. - if (!Uses.empty()) { - SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; - // Look for PHI uses of the extended result, we don't want to extend the - // liveness of a PHI input. It breaks all kinds of assumptions down - // stream. A PHI use is expected to be the kill of its source values. - UI = MRI->use_nodbg_begin(DstReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); - UI != UE; ++UI) - if (UI->isPHI()) - PHIBBs.insert(UI->getParent()); - - const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - for (unsigned i = 0, e = Uses.size(); i != e; ++i) { - MachineOperand *UseMO = Uses[i]; - MachineInstr *UseMI = UseMO->getParent(); - MachineBasicBlock *UseMBB = UseMI->getParent(); - if (PHIBBs.count(UseMBB)) - continue; - unsigned NewVR = MRI->createVirtualRegister(RC); - BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVR) - .addReg(DstReg, 0, SubIdx); - UseMO->setReg(NewVR); - ++NumReuse; - Changed = true; - } - } - } - - return Changed; -} - -bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) { - TM = &MF.getTarget(); - TII = TM->getInstrInfo(); - MRI = &MF.getRegInfo(); - DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; - - bool Changed = false; - - SmallPtrSet<MachineInstr*, 8> LocalMIs; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = &*I; - LocalMIs.clear(); - for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME; - ++MII) { - MachineInstr *MI = &*MII; - Changed |= OptimizeInstr(MI, MBB, LocalMIs); - } - } - - return Changed; -} diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp index 1613fe2..edb4eea 100644 --- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -33,7 +33,7 @@ namespace { public: static char ID; // Pass identification - OptimizePHIs() : MachineFunctionPass(&ID) {} + OptimizePHIs() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -54,8 +54,8 @@ namespace { } char OptimizePHIs::ID = 0; -static RegisterPass<OptimizePHIs> -X("opt-phis", "Optimize machine instruction PHIs"); +INITIALIZE_PASS(OptimizePHIs, "opt-phis", + "Optimize machine instruction PHIs", false, false); FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); } @@ -101,16 +101,10 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); // Skip over register-to-register moves. - unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx; - if (SrcMI && - TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) && - SrcSubIdx == 0 && DstSubIdx == 0 && - TargetRegisterInfo::isVirtualRegister(MvSrcReg)) - SrcMI = MRI->getVRegDef(MvSrcReg); - else if (SrcMI && SrcMI->isCopy() && - !SrcMI->getOperand(0).getSubReg() && - !SrcMI->getOperand(1).getSubReg() && - TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) + if (SrcMI && SrcMI->isCopy() && + !SrcMI->getOperand(0).getSubReg() && + !SrcMI->getOperand(1).getSubReg() && + TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); if (!SrcMI) return false; diff --git a/contrib/llvm/lib/CodeGen/PBQP/HeuristicBase.h b/contrib/llvm/lib/CodeGen/PBQP/HeuristicBase.h index 3bb24e1..791c227 100644 --- a/contrib/llvm/lib/CodeGen/PBQP/HeuristicBase.h +++ b/contrib/llvm/lib/CodeGen/PBQP/HeuristicBase.h @@ -173,9 +173,13 @@ namespace PBQP { bool finished = false; while (!finished) { - if (!optimalReduce()) - if (!impl().heuristicReduce()) + if (!optimalReduce()) { + if (impl().heuristicReduce()) { + getSolver().recordRN(); + } else { finished = true; + } + } } } diff --git a/contrib/llvm/lib/CodeGen/PBQP/HeuristicSolver.h b/contrib/llvm/lib/CodeGen/PBQP/HeuristicSolver.h index 02938df..35514f9 100644 --- a/contrib/llvm/lib/CodeGen/PBQP/HeuristicSolver.h +++ b/contrib/llvm/lib/CodeGen/PBQP/HeuristicSolver.h @@ -226,6 +226,8 @@ namespace PBQP { // Nothing to do. Just push the node onto the reduction stack. pushToStack(nItr); + + s.recordR0(); } /// \brief Apply rule R1. @@ -274,6 +276,7 @@ namespace PBQP { assert(nd.getSolverDegree() == 0 && "Degree 1 with edge removed should be 0."); pushToStack(xnItr); + s.recordR1(); } /// \brief Apply rule R2. @@ -378,8 +381,14 @@ namespace PBQP { removeSolverEdge(zxeItr); pushToStack(xnItr); + s.recordR2(); } + /// \brief Record an application of the RN rule. + /// + /// For use by the HeuristicBase. + void recordRN() { s.recordRN(); } + private: NodeData& getSolverNodeData(Graph::NodeItr nItr) { diff --git a/contrib/llvm/lib/CodeGen/PBQP/Heuristics/Briggs.h b/contrib/llvm/lib/CodeGen/PBQP/Heuristics/Briggs.h index 4c1ce11..18eaf7c 100644 --- a/contrib/llvm/lib/CodeGen/PBQP/Heuristics/Briggs.h +++ b/contrib/llvm/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -52,9 +52,7 @@ namespace PBQP { bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const { if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr)) return true; - if (s->getSolverDegree(n1Itr) < s->getSolverDegree(n2Itr)) - return false; - return (&*n1Itr < &*n2Itr); + return false; } private: HeuristicSolverImpl<Briggs> *s; @@ -69,9 +67,7 @@ namespace PBQP { cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr); if (cost1 < cost2) return true; - if (cost1 > cost2) - return false; - return (&*n1Itr < &*n2Itr); + return false; } private: diff --git a/contrib/llvm/lib/CodeGen/PBQP/Solution.h b/contrib/llvm/lib/CodeGen/PBQP/Solution.h index 294b537..047fd04 100644 --- a/contrib/llvm/lib/CodeGen/PBQP/Solution.h +++ b/contrib/llvm/lib/CodeGen/PBQP/Solution.h @@ -26,15 +26,46 @@ namespace PBQP { /// To get the selection for each node in the problem use the getSelection method. class Solution { private: + typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap; SelectionsMap selections; + unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions; + public: /// \brief Number of nodes for which selections have been made. /// @return Number of nodes for which selections have been made. unsigned numNodes() const { return selections.size(); } + /// \brief Records a reduction via the R0 rule. Should be called from the + /// solver only. + void recordR0() { ++r0Reductions; } + + /// \brief Returns the number of R0 reductions applied to solve the problem. + unsigned numR0Reductions() const { return r0Reductions; } + + /// \brief Records a reduction via the R1 rule. Should be called from the + /// solver only. + void recordR1() { ++r1Reductions; } + + /// \brief Returns the number of R1 reductions applied to solve the problem. + unsigned numR1Reductions() const { return r1Reductions; } + + /// \brief Records a reduction via the R2 rule. Should be called from the + /// solver only. + void recordR2() { ++r2Reductions; } + + /// \brief Returns the number of R2 reductions applied to solve the problem. + unsigned numR2Reductions() const { return r2Reductions; } + + /// \brief Records a reduction via the RN rule. Should be called from the + /// solver only. + void recordRN() { ++ rNReductions; } + + /// \brief Returns the number of RN reductions applied to solve the problem. + unsigned numRNReductions() const { return rNReductions; } + /// \brief Set the selection for a given node. /// @param nItr Node iterator. /// @param selection Selection for nItr. diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index ea6b094..d4df4c5 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Function.h" @@ -37,16 +38,15 @@ STATISTIC(NumAtomic, "Number of atomic phis lowered"); STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; -static RegisterPass<PHIElimination> -X("phi-node-elimination", "Eliminate PHI nodes for register allocation"); +INITIALIZE_PASS(PHIElimination, "phi-node-elimination", + "Eliminate PHI nodes for register allocation", false, false); -const PassInfo *const llvm::PHIEliminationID = &X; +char &llvm::PHIEliminationID = PHIElimination::ID; void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<LiveVariables>(); AU.addPreserved<MachineDominatorTree>(); - // rdar://7401784 This would be nice: - // AU.addPreservedID(MachineLoopInfoID); + AU.addPreserved<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -56,9 +56,11 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; // Split critical edges to help the coalescer - if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) + if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) { + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= SplitPHIEdges(MF, *I, *LV); + Changed |= SplitPHIEdges(MF, *I, *LV, MLI); + } // Populate VRegPHIUseCount analyzePHINodes(MF); @@ -179,6 +181,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; unsigned DestReg = MPhi->getOperand(0).getReg(); + assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs"); bool isDead = MPhi->getOperand(0).isDead(); // Create a new register for the incoming PHI arguments. @@ -265,6 +268,8 @@ void llvm::PHIElimination::LowerAtomicPHINode( SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; for (int i = NumSrcs - 1; i >= 0; --i) { unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); + unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); @@ -294,7 +299,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Insert the copy. if (!reusedIncoming && IncomingReg) BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg); + TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg); // Now update live variable information if we have it. Otherwise we're done if (!LV) continue; @@ -378,10 +383,12 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) { bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - LiveVariables &LV) { + LiveVariables &LV, + MachineLoopInfo *MLI) { if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. + bool Changed = false; for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { @@ -390,8 +397,15 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, // We break edges when registers are live out from the predecessor block // (not considering PHI nodes). If the register is live in to this block // anyway, we would gain nothing from splitting. - if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) - PreMBB->SplitCriticalEdge(&MBB, this); + // Avoid splitting backedges of loops. It would introduce small + // out-of-line blocks into the loop which is very bad for code placement. + if (PreMBB != &MBB && + !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) { + if (!MLI || + !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) && + MLI->isLoopHeader(&MBB))) + Changed |= PreMBB->SplitCriticalEdge(&MBB, this) != 0; + } } } return true; diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.h b/contrib/llvm/lib/CodeGen/PHIElimination.h index 7dedf03..45a9718 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.h +++ b/contrib/llvm/lib/CodeGen/PHIElimination.h @@ -13,19 +13,21 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { class LiveVariables; + class MachineRegisterInfo; + class MachineLoopInfo; /// Lower PHI instructions to copies. class PHIElimination : public MachineFunctionPass { - MachineRegisterInfo *MRI; // Machine register information + MachineRegisterInfo *MRI; // Machine register information public: static char ID; // Pass identification, replacement for typeid - PHIElimination() : MachineFunctionPass(&ID) {} + PHIElimination() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -49,7 +51,7 @@ namespace llvm { /// Split critical edges where necessary for good coalescer performance. bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - LiveVariables &LV); + LiveVariables &LV, MachineLoopInfo *MLI); /// SplitCriticalEdge - Split a critical edge from A to B by /// inserting a new MBB. Update branches in A and PHI instructions diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp new file mode 100644 index 0000000..17cee46 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -0,0 +1,287 @@ +//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Perform peephole optimizations on the machine code: +// +// - Optimize Extensions +// +// Optimization of sign / zero extension instructions. It may be extended to +// handle other instructions with similar properties. +// +// On some targets, some instructions, e.g. X86 sign / zero extension, may +// leave the source value in the lower part of the result. This optimization +// will replace some uses of the pre-extension value with uses of the +// sub-register of the results. +// +// - Optimize Comparisons +// +// Optimization of comparison instructions. For instance, in this code: +// +// sub r1, 1 +// cmp r1, 0 +// bz L1 +// +// If the "sub" instruction all ready sets (or could be modified to set) the +// same flag that the "cmp" instruction sets and that "bz" uses, then we can +// eliminate the "cmp" instruction. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "peephole-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +// Optimize Extensions +static cl::opt<bool> +Aggressive("aggressive-ext-opt", cl::Hidden, + cl::desc("Aggressive extension optimization")); + +STATISTIC(NumReuse, "Number of extension results reused"); +STATISTIC(NumEliminated, "Number of compares eliminated"); + +namespace { + class PeepholeOptimizer : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + MachineDominatorTree *DT; // Machine dominator tree + + public: + static char ID; // Pass identification + PeepholeOptimizer() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + if (Aggressive) { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + } + } + + private: + bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); + bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &LocalMIs); + }; +} + +char PeepholeOptimizer::ID = 0; +INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts", + "Peephole Optimizations", false, false); + +FunctionPass *llvm::createPeepholeOptimizerPass() { + return new PeepholeOptimizer(); +} + +/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads +/// a single register and writes a single register and it does not modify the +/// source, and if the source value is preserved as a sub-register of the +/// result, then replace all reachable uses of the source with the subreg of the +/// result. +/// +/// Do not generate an EXTRACT that is used only in a debug use, as this changes +/// the code. Since this code does not currently share EXTRACTs, just ignore all +/// debug uses. +bool PeepholeOptimizer:: +OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &LocalMIs) { + LocalMIs.insert(MI); + + unsigned SrcReg, DstReg, SubIdx; + if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) + return false; + + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); + if (++UI == MRI->use_nodbg_end()) + // No other uses. + return false; + + // The source has other uses. See if we can replace the other uses with use of + // the result of the extension. + SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; + UI = MRI->use_nodbg_begin(DstReg); + for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + UI != UE; ++UI) + ReachedBBs.insert(UI->getParent()); + + // Uses that are in the same BB of uses of the result of the instruction. + SmallVector<MachineOperand*, 8> Uses; + + // Uses that the result of the instruction can reach. + SmallVector<MachineOperand*, 8> ExtendedUses; + + bool ExtendLife = true; + UI = MRI->use_nodbg_begin(SrcReg); + for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + if (UseMI == MI) + continue; + + if (UseMI->isPHI()) { + ExtendLife = false; + continue; + } + + // It's an error to translate this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 + // + // into this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1027 = COPY %reg1025:4 + // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 + // + // The problem here is that SUBREG_TO_REG is there to assert that an + // implicit zext occurs. It doesn't insert a zext instruction. If we allow + // the COPY here, it will give us the value after the <sext>, not the + // original value of %reg1024 before <sext>. + if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) + continue; + + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (UseMBB == MBB) { + // Local uses that come after the extension. + if (!LocalMIs.count(UseMI)) + Uses.push_back(&UseMO); + } else if (ReachedBBs.count(UseMBB)) { + // Non-local uses where the result of the extension is used. Always + // replace these unless it's a PHI. + Uses.push_back(&UseMO); + } else if (Aggressive && DT->dominates(MBB, UseMBB)) { + // We may want to extend the live range of the extension result in order + // to replace these uses. + ExtendedUses.push_back(&UseMO); + } else { + // Both will be live out of the def MBB anyway. Don't extend live range of + // the extension result. + ExtendLife = false; + break; + } + } + + if (ExtendLife && !ExtendedUses.empty()) + // Extend the liveness of the extension result. + std::copy(ExtendedUses.begin(), ExtendedUses.end(), + std::back_inserter(Uses)); + + // Now replace all uses. + bool Changed = false; + if (!Uses.empty()) { + SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; + + // Look for PHI uses of the extended result, we don't want to extend the + // liveness of a PHI input. It breaks all kinds of assumptions down + // stream. A PHI use is expected to be the kill of its source values. + UI = MRI->use_nodbg_begin(DstReg); + for (MachineRegisterInfo::use_nodbg_iterator + UE = MRI->use_nodbg_end(); UI != UE; ++UI) + if (UI->isPHI()) + PHIBBs.insert(UI->getParent()); + + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + for (unsigned i = 0, e = Uses.size(); i != e; ++i) { + MachineOperand *UseMO = Uses[i]; + MachineInstr *UseMI = UseMO->getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (PHIBBs.count(UseMBB)) + continue; + + unsigned NewVR = MRI->createVirtualRegister(RC); + BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) + .addReg(DstReg, 0, SubIdx); + + UseMO->setReg(NewVR); + ++NumReuse; + Changed = true; + } + } + + return Changed; +} + +/// OptimizeCmpInstr - If the instruction is a compare and the previous +/// instruction it's comparing against all ready sets (or could be modified to +/// set) the same flag as the compare, then we can remove the comparison and use +/// the flag from the previous instruction. +bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, + MachineBasicBlock *MBB) { + // If this instruction is a comparison against zero and isn't comparing a + // physical register, we can try to optimize it. + unsigned SrcReg; + int CmpValue; + if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) || + TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0) + return false; + + MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); + if (llvm::next(DI) != MRI->def_end()) + // Only support one definition. + return false; + + // Attempt to convert the defining instruction to set the "zero" flag. + if (TII->ConvertToSetZeroFlag(&*DI, MI)) { + ++NumEliminated; + return true; + } + + return false; +} + +bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + MRI = &MF.getRegInfo(); + DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; + + bool Changed = false; + + SmallPtrSet<MachineInstr*, 8> LocalMIs; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + LocalMIs.clear(); + + for (MachineBasicBlock::iterator + MII = I->begin(), ME = I->end(); MII != ME; ) { + MachineInstr *MI = &*MII; + + if (MI->getDesc().isCompare() && + !MI->getDesc().hasUnmodeledSideEffects()) { + ++MII; // The iterator may become invalid if the compare is deleted. + Changed |= OptimizeCmpInstr(MI, MBB); + } else { + Changed |= OptimizeExtInstr(MI, MBB, LocalMIs); + ++MII; + } + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 4af8e07..f0bd6d1 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -85,7 +85,7 @@ namespace { public: static char ID; PostRAScheduler(CodeGenOpt::Level ol) : - MachineFunctionPass(&ID), OptLevel(ol) {} + MachineFunctionPass(ID), OptLevel(ol) {} void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -130,7 +130,7 @@ namespace { /// KillIndices - The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. - unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> KillIndices; public: SchedulePostRATDList(MachineFunction &MF, @@ -140,7 +140,8 @@ namespace { AntiDepBreaker *ADB, AliasAnalysis *aa) : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), - HazardRec(HR), AntiDepBreak(ADB), AA(aa) {} + HazardRec(HR), AntiDepBreak(ADB), AA(aa), + KillIndices(TRI->getNumRegs()) {} ~SchedulePostRATDList() { } diff --git a/contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp b/contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp index fb2f909..cd9d83e 100644 --- a/contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp +++ b/contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp @@ -92,7 +92,7 @@ namespace { public: static char ID; PreAllocSplitting() - : MachineFunctionPass(&ID) {} + : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -203,10 +203,11 @@ namespace { char PreAllocSplitting::ID = 0; -static RegisterPass<PreAllocSplitting> -X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting"); +INITIALIZE_PASS(PreAllocSplitting, "pre-alloc-splitting", + "Pre-Register Allocation Live Interval Splitting", + false, false); -const PassInfo *const llvm::PreAllocSplittingID = &X; +char &llvm::PreAllocSplittingID = PreAllocSplitting::ID; /// findSpillPoint - Find a gap as far away from the given MI that's suitable /// for spilling the current live interval. The index must be before any @@ -676,11 +677,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc); // If the def is a move, set the copy field. - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (DstReg == LI->reg) - NewVN->setCopy(&*DI); - } else if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) + if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) NewVN->setCopy(&*DI); NewVNs[&*DI] = NewVN; diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index 2e31908..b8831db 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -26,8 +26,8 @@ using namespace llvm; char ProcessImplicitDefs::ID = 0; -static RegisterPass<ProcessImplicitDefs> X("processimpdefs", - "Process Implicit Definitions."); +INITIALIZE_PASS(ProcessImplicitDefs, "processimpdefs", + "Process Implicit Definitions.", false, false); void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -46,12 +46,6 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg, unsigned OpIdx, const TargetInstrInfo *tii_, SmallSet<unsigned, 8> &ImpDefRegs) { - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg && - (DstSubReg == 0 || ImpDefRegs.count(DstReg))) - return true; - switch(OpIdx) { case 1: return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 || @@ -75,14 +69,6 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg, return true; return false; } - - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) { - if (Reg != SrcReg) - return false; - if (DstSubReg == 0 || ImpDefRegs.count(DstReg)) - return true; - } return false; } diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 3843b25..e2802c1 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -19,6 +19,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "pei" #include "PrologEpilogInserter.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -32,7 +33,10 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include <climits> @@ -40,8 +44,11 @@ using namespace llvm; char PEI::ID = 0; -static RegisterPass<PEI> -X("prologepilog", "Prologue/Epilogue Insertion"); +INITIALIZE_PASS(PEI, "prologepilog", + "Prologue/Epilogue Insertion", false, false); + +STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered"); +STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); /// createPrologEpilogCodeInserter - This function returns a pass that inserts /// prolog and epilog code, and eliminates abstract frame references. @@ -56,7 +63,6 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); - FrameConstantRegMap.clear(); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo @@ -72,10 +78,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: - // - with shrink wrapping, place spills and restores to tightly + // - With shrink wrapping, place spills and restores to tightly // enclose regions in the Machine CFG of the function where - // they are used. Without shrink wrapping - // - default (no shrink wrapping), place all spills in the + // they are used. + // - Without shink wrapping (default), place all spills in the // entry block, all restores in return blocks. placeCSRSpillsAndRestores(Fn); @@ -461,8 +467,10 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, Offset = (Offset + Align - 1) / Align * Align; if (StackGrowsDown) { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset } else { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); MFI->setObjectOffset(FrameIdx, Offset); Offset += MFI->getObjectSize(FrameIdx); } @@ -547,15 +555,66 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } + // FIXME: Once this is working, then enable flag will change to a target + // check for whether the frame is large enough to want to use virtual + // frame index registers. Functions which don't want/need this optimization + // will continue to use the existing code path. + if (MFI->getUseLocalStackAllocationBlock()) { + unsigned Align = MFI->getLocalFrameMaxAlign(); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); + + // Resolve offsets for objects in the local block. + for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { + std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); + int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; + DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << + FIOffset << "]\n"); + MFI->setObjectOffset(Entry.first, FIOffset); + } + // Allocate the local block + Offset += MFI->getLocalFrameSize(); + + MaxAlign = std::max(Align, MaxAlign); + } + // Make sure that the stack protector comes before the local variables on the // stack. - if (MFI->getStackProtectorIndex() >= 0) + SmallSet<int, 16> LargeStackObjs; + if (MFI->getStackProtectorIndex() >= 0) { AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && (int)i == RS->getScavengingFrameIndex()) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (!MFI->MayNeedStackProtector(i)) + continue; + + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + LargeStackObjs.insert(i); + } + } + // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) @@ -564,6 +623,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; + if (LargeStackObjs.count(i)) + continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); } @@ -694,16 +755,8 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. - TargetRegisterInfo::FrameIndexValue Value; - unsigned VReg = - TRI.eliminateFrameIndex(MI, SPAdj, &Value, + TRI.eliminateFrameIndex(MI, SPAdj, FrameIndexVirtualScavenging ? NULL : RS); - if (VReg) { - assert (FrameIndexVirtualScavenging && - "Not scavenging, but virtual returned from " - "eliminateFrameIndex()!"); - FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj); - } // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { @@ -731,38 +784,6 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { } } -/// findLastUseReg - find the killing use of the specified register within -/// the instruciton range. Return the operand number of the kill in Operand. -static MachineBasicBlock::iterator -findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME, - unsigned Reg) { - // Scan forward to find the last use of this virtual register - for (++I; I != ME; ++I) { - MachineInstr *MI = I; - bool isDefInsn = false; - bool isKillInsn = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) - if (MI->getOperand(i).isReg()) { - unsigned OpReg = MI->getOperand(i).getReg(); - if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg)) - continue; - assert (OpReg == Reg - && "overlapping use of scavenged index register!"); - // If this is the killing use, we have a candidate. - if (MI->getOperand(i).isKill()) - isKillInsn = true; - else if (MI->getOperand(i).isDef()) - isDefInsn = true; - } - if (isKillInsn && !isDefInsn) - return I; - } - // If we hit the end of the basic block, there was no kill of - // the virtual register, which is wrong. - assert (0 && "scavenged index register never killed!"); - return ME; -} - /// scavengeFrameVirtualRegs - Replace all frame index virtual registers /// with physical registers. Use the register scavenger to find an /// appropriate register to use. @@ -772,27 +793,14 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { E = Fn.end(); BB != E; ++BB) { RS->enterBasicBlock(BB); - // FIXME: The logic flow in this function is still too convoluted. - // It needs a cleanup refactoring. Do that in preparation for tracking - // more than one scratch register value and using ranges to find - // available scratch registers. - unsigned CurrentVirtReg = 0; - unsigned CurrentScratchReg = 0; - bool havePrevValue = false; - TargetRegisterInfo::FrameIndexValue PrevValue(0,0); - TargetRegisterInfo::FrameIndexValue Value(0,0); - MachineInstr *PrevLastUseMI = NULL; - unsigned PrevLastUseOp = 0; - bool trackingCurrentValue = false; + unsigned VirtReg = 0; + unsigned ScratchReg = 0; int SPAdj = 0; // The instruction stream may change in the loop, so check BB->end() // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { MachineInstr *MI = I; - bool isDefInsn = false; - bool isKillInsn = false; - bool clobbersScratchReg = false; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { @@ -800,121 +808,30 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { unsigned Reg = MO.getReg(); if (Reg == 0) continue; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - // If we have a previous scratch reg, check and see if anything - // here kills whatever value is in there. - if (Reg == CurrentScratchReg) { - if (MO.isUse()) { - // Two-address operands implicitly kill - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) - clobbersScratchReg = true; - } else { - assert (MO.isDef()); - clobbersScratchReg = true; - } - } + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - } - // If this is a def, remember that this insn defines the value. - // This lets us properly consider insns which re-use the scratch - // register, such as r2 = sub r2, #imm, in the middle of the - // scratch range. - if (MO.isDef()) - isDefInsn = true; + + ++NumVirtualFrameRegs; // Have we already allocated a scratch register for this virtual? - if (Reg != CurrentVirtReg) { + if (Reg != VirtReg) { // When we first encounter a new virtual register, it // must be a definition. assert(MI->getOperand(i).isDef() && "frame index virtual missing def!"); - // We can't have nested virtual register live ranges because - // there's only a guarantee of one scavenged register at a time. - assert (CurrentVirtReg == 0 && - "overlapping frame index virtual registers!"); - - // If the target gave us information about what's in the register, - // we can use that to re-use scratch regs. - DenseMap<unsigned, FrameConstantEntry>::iterator Entry = - FrameConstantRegMap.find(Reg); - trackingCurrentValue = Entry != FrameConstantRegMap.end(); - if (trackingCurrentValue) { - SPAdj = (*Entry).second.second; - Value = (*Entry).second.first; - } else { - SPAdj = 0; - Value.first = 0; - Value.second = 0; - } - - // If the scratch register from the last allocation is still - // available, see if the value matches. If it does, just re-use it. - if (trackingCurrentValue && havePrevValue && PrevValue == Value) { - // FIXME: This assumes that the instructions in the live range - // for the virtual register are exclusively for the purpose - // of populating the value in the register. That's reasonable - // for these frame index registers, but it's still a very, very - // strong assumption. rdar://7322732. Better would be to - // explicitly check each instruction in the range for references - // to the virtual register. Only delete those insns that - // touch the virtual register. - - // Find the last use of the new virtual register. Remove all - // instruction between here and there, and update the current - // instruction to reference the last use insn instead. - MachineBasicBlock::iterator LastUseMI = - findLastUseReg(I, BB->end(), Reg); - - // Remove all instructions up 'til the last use, since they're - // just calculating the value we already have. - BB->erase(I, LastUseMI); - I = LastUseMI; - - // Extend the live range of the scratch register - PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false); - RS->setUsed(CurrentScratchReg); - CurrentVirtReg = Reg; - - // We deleted the instruction we were scanning the operands of. - // Jump back to the instruction iterator loop. Don't increment - // past this instruction since we updated the iterator already. - DoIncr = false; - break; - } - // Scavenge a new scratch register - CurrentVirtReg = Reg; + VirtReg = Reg; const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); - PrevValue = Value; + ScratchReg = RS->scavengeRegister(RC, I, SPAdj); + ++NumScavengedRegs; } // replace this reference to the virtual register with the // scratch register. - assert (CurrentScratchReg && "Missing scratch register!"); - MI->getOperand(i).setReg(CurrentScratchReg); + assert (ScratchReg && "Missing scratch register!"); + MI->getOperand(i).setReg(ScratchReg); - if (MI->getOperand(i).isKill()) { - isKillInsn = true; - PrevLastUseOp = i; - PrevLastUseMI = MI; - } } } - // If this is the last use of the scratch, stop tracking it. The - // last use will be a kill operand in an instruction that does - // not also define the scratch register. - if (isKillInsn && !isDefInsn) { - CurrentVirtReg = 0; - havePrevValue = trackingCurrentValue; - } - // Similarly, notice if instruction clobbered the value in the - // register we're tracking for possible later reuse. This is noted - // above, but enforced here since the value is still live while we - // process the rest of the operands of the instruction. - if (clobbersScratchReg) { - havePrevValue = false; - CurrentScratchReg = 0; - } if (DoIncr) { RS->forward(I); ++I; diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h index aa95773..d575124 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h @@ -36,7 +36,7 @@ namespace llvm { class PEI : public MachineFunctionPass { public: static char ID; - PEI() : MachineFunctionPass(&ID) {} + PEI() : MachineFunctionPass(ID) {} const char *getPassName() const { return "Prolog/Epilog Insertion & Frame Finalization"; @@ -99,13 +99,6 @@ namespace llvm { // TRI->requiresFrameIndexScavenging() for the curren function. bool FrameIndexVirtualScavenging; - // When using the scavenger post-pass to resolve frame reference - // materialization registers, maintain a map of the registers to - // the constant value and SP adjustment associated with it. - typedef std::pair<TargetRegisterInfo::FrameIndexValue, int> - FrameConstantEntry; - DenseMap<unsigned, FrameConstantEntry> FrameConstantRegMap; - #ifndef NDEBUG // Machine function handle. MachineFunction* MF; diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index f44478e..fc150d5 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -16,6 +16,7 @@ #include "llvm/BasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -46,7 +47,7 @@ namespace { class RAFast : public MachineFunctionPass { public: static char ID; - RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1), + RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), isBulkSpilling(false) {} private: const TargetMachine *TM; @@ -80,6 +81,8 @@ namespace { // that is currently available in a physical register. LiveRegMap LiveVirtRegs; + DenseMap<unsigned, MachineInstr *> LiveDbgValueMap; + // RegState - Track the state of a physical register. enum RegState { // A disabled register is not available for allocation, but an alias may @@ -110,9 +113,9 @@ namespace { // Allocatable - vector of allocatable physical registers. BitVector Allocatable; - // SkippedInstrs - Descriptors of instructions whose clobber list was ignored - // because all registers were spilled. It is still necessary to mark all the - // clobbered registers as used by the function. + // SkippedInstrs - Descriptors of instructions whose clobber list was + // ignored because all registers were spilled. It is still necessary to + // mark all the clobbered registers as used by the function. SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs; // isBulkSpilling - This flag is set when LiveRegMap will be cleared @@ -236,8 +239,7 @@ void RAFast::killVirtReg(unsigned VirtReg) { } /// spillVirtReg - This method spills the value specified by VirtReg into the -/// corresponding stack slot if needed. If isKill is set, the register is also -/// killed. +/// corresponding stack slot if needed. void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); @@ -265,6 +267,31 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI); ++NumStores; // Update statistics + // If this register is used by DBG_VALUE then insert new DBG_VALUE to + // identify spilled location as the place to find corresponding variable's + // value. + if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) { + const MDNode *MDPtr = + DBG->getOperand(DBG->getNumOperands()-1).getMetadata(); + int64_t Offset = 0; + if (DBG->getOperand(1).isImm()) + Offset = DBG->getOperand(1).getImm(); + DebugLoc DL; + if (MI == MBB->end()) { + // If MI is at basic block end then use last instruction's location. + MachineBasicBlock::iterator EI = MI; + DL = (--EI)->getDebugLoc(); + } + else + DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = + TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) { + MachineBasicBlock *MBB = DBG->getParent(); + MBB->insert(MI, NewDV); + DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); + LiveDbgValueMap[LRI->first] = NewDV; + } + } if (SpillKill) LR.LastUse = 0; // Don't kill register again } @@ -471,7 +498,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { // First try to find a completely free register. for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { unsigned PhysReg = *I; - if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) + if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg) && + Allocatable.test(PhysReg)) return assignVirtToPhysReg(LRE, PhysReg); } @@ -480,6 +508,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { unsigned BestReg = 0, BestCost = spillImpossible; for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { + if (!Allocatable.test(*I)) + continue; unsigned Cost = calcSpillCost(*I); // Cost is 0 when all aliases are already disabled. if (Cost == 0) @@ -520,12 +550,9 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg); - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; // It's a copy, use the destination register as a hint. if (UseMI.isCopyLike()) Hint = UseMI.getOperand(0).getReg(); - else if (TII->isMoveInstr(UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) - Hint = DstReg; } allocVirtReg(MI, *LRI, Hint); } else if (LR.LastUse) { @@ -712,7 +739,8 @@ void RAFast::AllocateBasicBlock() { // Add live-in registers as live. for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) - definePhysReg(MII, *I, regReserved); + if (Allocatable.test(*I)) + definePhysReg(MII, *I, regReserved); SmallVector<unsigned, 8> VirtDead; SmallVector<MachineInstr*, 32> Coalesced; @@ -756,31 +784,43 @@ void RAFast::AllocateBasicBlock() { // Debug values are not allowed to change codegen in any way. if (MI->isDebugValue()) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); - if (LRI != LiveVirtRegs.end()) - setPhysReg(MI, i, LRI->second.PhysReg); - else { - int SS = StackSlotForVirtReg[Reg]; - if (SS == -1) - MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + bool ScanDbgValue = true; + while (ScanDbgValue) { + ScanDbgValue = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + LiveDbgValueMap[Reg] = MI; + LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); + if (LRI != LiveVirtRegs.end()) + setPhysReg(MI, i, LRI->second.PhysReg); else { - // Modify DBG_VALUE now that the value is in a spill slot. - uint64_t Offset = MI->getOperand(1).getImm(); - const MDNode *MDPtr = - MI->getOperand(MI->getNumOperands()-1).getMetadata(); - DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = - TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - } else - MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + int SS = StackSlotForVirtReg[Reg]; + if (SS == -1) + // We can't allocate a physreg for a DebugValue, sorry! + MO.setReg(0); + else { + // Modify DBG_VALUE now that the value is in a spill slot. + int64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = + MI->getOperand(MI->getNumOperands()-1).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = + TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << + "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + // Scan NewDV operands from the beginning. + MI = NewDV; + ScanDbgValue = true; + break; + } else + // We can't allocate a physreg for a DebugValue; sorry! + MO.setReg(0); + } } } } @@ -789,14 +829,13 @@ void RAFast::AllocateBasicBlock() { } // If this is a copy, we may be able to coalesce. - unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub; + unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0; if (MI->isCopy()) { CopyDst = MI->getOperand(0).getReg(); CopySrc = MI->getOperand(1).getReg(); CopyDstSub = MI->getOperand(0).getSubReg(); CopySrcSub = MI->getOperand(1).getSubReg(); - } else if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub)) - CopySrc = CopyDst = 0; + } // Track registers used by instruction. UsedInInstr.reset(); @@ -843,13 +882,18 @@ void RAFast::AllocateBasicBlock() { // operands. If there are also physical defs, these registers must avoid // both physical defs and uses, making them more constrained than normal // operands. + // Similarly, if there are multiple defs and tied operands, we must make + // sure the same register is allocated to uses and defs. // We didn't detect inline asm tied operands above, so just make this extra // pass for all inline asm. if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || - (hasTiedOps && hasPhysDefs)) { + (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) { handleThroughOperands(MI, VirtDead); // Don't attempt coalescing when we have funny stuff going on. CopyDst = 0; + // Pretend we have early clobbers so the use operands get marked below. + // This is not necessary for the common case of a single tied use. + hasEarlyClobbers = true; } // Second scan. @@ -870,14 +914,17 @@ void RAFast::AllocateBasicBlock() { MRI->addPhysRegsUsed(UsedInInstr); - // Track registers defined by instruction - early clobbers at this point. + // Track registers defined by instruction - early clobbers and tied uses at + // this point. UsedInInstr.reset(); if (hasEarlyClobbers) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) continue; + if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + // Look for physreg defs and tied uses. + if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; UsedInInstr.set(Reg); for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) UsedInInstr.set(*AS); @@ -887,9 +934,9 @@ void RAFast::AllocateBasicBlock() { unsigned DefOpEnd = MI->getNumOperands(); if (TID.isCall()) { // Spill all virtregs before a call. This serves two purposes: 1. If an - // exception is thrown, the landing pad is going to expect to find registers - // in their spill slots, and 2. we don't have to wade through all the - // <imp-def> operands on the call instruction. + // exception is thrown, the landing pad is going to expect to find + // registers in their spill slots, and 2. we don't have to wade through + // all the <imp-def> operands on the call instruction. DefOpEnd = VirtOpEnd; DEBUG(dbgs() << " Spilling remaining registers before call.\n"); spillAll(MI); @@ -992,6 +1039,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { SkippedInstrs.clear(); StackSlotForVirtReg.clear(); + LiveDbgValueMap.clear(); return true; } diff --git a/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp index 044672d..5c62354 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp @@ -87,10 +87,10 @@ namespace { "to skip."), cl::init(0), cl::Hidden); - + struct RALinScan : public MachineFunctionPass { static char ID; - RALinScan() : MachineFunctionPass(&ID) { + RALinScan() : MachineFunctionPass(ID) { // Initialize the queue to record recently-used registers. if (NumRecentlyUsedRegs > 0) RecentRegs.resize(NumRecentlyUsedRegs, 0); @@ -125,9 +125,10 @@ namespace { const TargetRegisterInfo* tri_; const TargetInstrInfo* tii_; BitVector allocatableRegs_; + BitVector reservedRegs_; LiveIntervals* li_; LiveStacks* ls_; - const MachineLoopInfo *loopInfo; + MachineLoopInfo *loopInfo; /// handled_ - Intervals are added to the handled_ set in the order of their /// start value. This is uses for backtracking. @@ -255,9 +256,9 @@ namespace { SmallVector<LiveInterval*, 8> &SpillIntervals); /// attemptTrivialCoalescing - If a simple interval is defined by a copy, - /// try allocate the definition the same register as the source register - /// if the register is not defined during live time of the interval. This - /// eliminate a copy. This is used to coalesce copies which were not + /// try to allocate the definition to the same register as the source, + /// if the register is not defined during the life time of the interval. + /// This eliminates a copy, and is used to coalesce copies which were not /// coalesced away before allocation either due to dest and src being in /// different register classes or because the coalescer was overly /// conservative. @@ -335,6 +336,17 @@ namespace { SmallVector<unsigned, 256> &inactiveCounts, bool SkipDGRegs); + /// getFirstNonReservedPhysReg - return the first non-reserved physical + /// register in the register class. + unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) { + TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_); + TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_); + while (i != aoe && reservedRegs_.test(*i)) + ++i; + assert(i != aoe && "All registers reserved?!"); + return *i; + } + void ComputeRelatedRegClasses(); template <typename ItTy> @@ -358,8 +370,8 @@ namespace { char RALinScan::ID = 0; } -static RegisterPass<RALinScan> -X("linearscan-regalloc", "Linear Scan Register Allocator"); +INITIALIZE_PASS(RALinScan, "linearscan-regalloc", + "Linear Scan Register Allocator", false, false); void RALinScan::ComputeRelatedRegClasses() { // First pass, add all reg classes to the union, and determine at least one @@ -371,7 +383,7 @@ void RALinScan::ComputeRelatedRegClasses() { for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); I != E; ++I) { HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0; - + const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; if (PRC) { // Already processed this register. Just make sure we know that @@ -382,7 +394,7 @@ void RALinScan::ComputeRelatedRegClasses() { } } } - + // Second pass, now that we know conservatively what register classes each reg // belongs to, add info about aliases. We don't need to do this for targets // without register aliases. @@ -419,20 +431,15 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { unsigned CandReg; { MachineInstr *CopyMI; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (vni->def != SlotIndex() && vni->isDefAccurate() && - (CopyMI = li_->getInstructionFromIndex(vni->def)) && - (CopyMI->isCopy() || - tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))) + (CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy()) // Defined by a copy, try to extend SrcReg forward - CandReg = CopyMI->isCopy() ? CopyMI->getOperand(1).getReg() : SrcReg; + CandReg = CopyMI->getOperand(1).getReg(); else if (TrivCoalesceEnds && - (CopyMI = - li_->getInstructionFromIndex(range.end.getBaseIndex())) && - tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - cur.reg == SrcReg) + (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) && + CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg()) // Only used by a copy, try to extend DstReg backwards - CandReg = DstReg; + CandReg = CopyMI->getOperand(0).getReg(); else return Reg; } @@ -469,6 +476,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { tri_ = tm_->getRegisterInfo(); tii_ = tm_->getInstrInfo(); allocatableRegs_ = tri_->getAllocatableSet(fn); + reservedRegs_ = tri_->getReservedRegs(fn); li_ = &getAnalysis<LiveIntervals>(); ls_ = &getAnalysis<LiveStacks>(); loopInfo = &getAnalysis<MachineLoopInfo>(); @@ -487,9 +495,9 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { vrm_ = &getAnalysis<VirtRegMap>(); if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); - - spiller_.reset(createSpiller(mf_, li_, loopInfo, vrm_)); - + + spiller_.reset(createSpiller(*this, *mf_, *vrm_)); + initIntervalSets(); linearScan(); @@ -543,7 +551,7 @@ void RALinScan::linearScan() { // linear scan algorithm DEBUG({ dbgs() << "********** LINEAR SCAN **********\n" - << "********** Function: " + << "********** Function: " << mf_->getFunction()->getName() << '\n'; printIntervals("fixed", fixed_.begin(), fixed_.end()); }); @@ -765,7 +773,8 @@ FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { return IP.end(); } -static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, SlotIndex Point){ +static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, + SlotIndex Point){ for (unsigned i = 0, e = V.size(); i != e; ++i) { RALinScan::IntervalPtr &IP = V[i]; LiveInterval::iterator I = std::upper_bound(IP.first->begin(), @@ -804,7 +813,7 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_, static float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_, MachineRegisterInfo *mri_, - const MachineLoopInfo *loopInfo) { + MachineLoopInfo *loopInfo) { float Conflicts = 0; for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), E = mri_->reg_end(); I != E; ++I) { @@ -837,7 +846,7 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur, dbgs() << tri_->getName(Candidates[i].first) << " "; dbgs() << "\n"; }); - + // Calculate the number of conflicts of each candidate. for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { unsigned Reg = i->first->reg; @@ -955,7 +964,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { if (cur->empty()) { unsigned physReg = vrm_->getRegAllocPref(cur->reg); if (!physReg) - physReg = *RC->allocation_order_begin(*mf_); + physReg = getFirstNonReservedPhysReg(RC); DEBUG(dbgs() << tri_->getName(physReg) << '\n'); // Note the register is not really in use. vrm_->assignVirt2Phys(cur->reg, physReg); @@ -978,27 +987,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { if ((vni->def != SlotIndex()) && !vni->isUnused() && vni->isDefAccurate()) { MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (CopyMI && - tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) { - unsigned Reg = 0; - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) - Reg = SrcReg; - else if (vrm_->isAssignedReg(SrcReg)) - Reg = vrm_->getPhys(SrcReg); - if (Reg) { - if (SrcSubReg) - Reg = tri_->getSubReg(Reg, SrcSubReg); - if (DstSubReg) - Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); - if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) - mri_->setRegAllocationHint(cur->reg, 0, Reg); - } - } else if (CopyMI && CopyMI->isCopy()) { - DstReg = CopyMI->getOperand(0).getReg(); - DstSubReg = CopyMI->getOperand(0).getSubReg(); - SrcReg = CopyMI->getOperand(1).getReg(); - SrcSubReg = CopyMI->getOperand(1).getSubReg(); + if (CopyMI && CopyMI->isCopy()) { + unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); + unsigned SrcReg = CopyMI->getOperand(1).getReg(); + unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg(); unsigned Reg = 0; if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) Reg = SrcReg; @@ -1024,7 +1016,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Can only allocate virtual registers!"); const TargetRegisterClass *RegRC = mri_->getRegClass(Reg); - // If this is not in a related reg class to the register we're allocating, + // If this is not in a related reg class to the register we're allocating, // don't check it. if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && cur->overlapsFrom(*i->first, i->second-1)) { @@ -1033,7 +1025,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight)); } } - + // Speculatively check to see if we can get a register right now. If not, // we know we won't be able to by adding more constraints. If so, we can // check to see if it is valid. Doing an exhaustive search of the fixed_ list @@ -1048,7 +1040,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { SmallSet<unsigned, 8> RegAliases; for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS) RegAliases.insert(*AS); - + bool ConflictsWithFixed = false; for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { IntervalPtr &IP = fixed_[i]; @@ -1068,7 +1060,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { } } } - + // Okay, the register picked by our speculative getFreePhysReg call turned // out to be in use. Actually add all of the conflicting fixed registers to // regUse_ so we can do an accurate query. @@ -1080,7 +1072,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { LiveInterval *I = IP.first; const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && I->endIndex() > StartPosition) { LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); IP.second = II; @@ -1099,11 +1091,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { physReg = getFreePhysReg(cur); } } - + // Restore the physical register tracker, removing information about the // future. restoreRegUses(); - + // If we find a free register, we are done: assign this virtual to // the free physical register and add this interval to the active // list. @@ -1118,7 +1110,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { UpgradeRegister(physReg); if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { // "Downgrade" physReg to try to keep physReg from being allocated until - // the next reload from the same SS is allocated. + // the next reload from the same SS is allocated. mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg); DowngradeRegister(cur, physReg); } @@ -1131,7 +1123,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { for (std::vector<std::pair<unsigned, float> >::iterator I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) updateSpillWeights(SpillWeights, I->first, I->second, RC); - + // for each interval in active, update spill weights. for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); i != e; ++i) { @@ -1141,7 +1133,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { reg = vrm_->getPhys(reg); updateSpillWeights(SpillWeights, reg, i->first->weight, RC); } - + DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n"); // Find a register to spill. @@ -1155,17 +1147,22 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { e = RC->allocation_order_end(*mf_); i != e; ++i) { unsigned reg = *i; float regWeight = SpillWeights[reg]; - // Skip recently allocated registers. + // Don't even consider reserved regs. + if (reservedRegs_.test(reg)) + continue; + // Skip recently allocated registers and reserved registers. if (minWeight > regWeight && !isRecentlyUsed(reg)) Found = true; RegsWeights.push_back(std::make_pair(reg, regWeight)); } - + // If we didn't find a register that is spillable, try aliases? if (!Found) { for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), e = RC->allocation_order_end(*mf_); i != e; ++i) { unsigned reg = *i; + if (reservedRegs_.test(reg)) + continue; // No need to worry about if the alias register size < regsize of RC. // We are going to spill all registers that alias it anyway. for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) @@ -1179,7 +1176,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { minWeight = RegsWeights[0].second; if (minWeight == HUGE_VALF) { // All registers must have inf weight. Just grab one! - minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_); + minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC); if (cur->weight == HUGE_VALF || li_->getApproximateInstructionCount(*cur) == 0) { // Spill a physical register around defs and uses. @@ -1224,8 +1221,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // linearscan. if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); - SmallVector<LiveInterval*, 8> spillIs; - std::vector<LiveInterval*> added; + SmallVector<LiveInterval*, 8> spillIs, added; spiller_->spill(cur, added, spillIs); std::sort(added.begin(), added.end(), LISorter()); @@ -1288,27 +1284,33 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // The earliest start of a Spilled interval indicates up to where // in handled we need to roll back - assert(!spillIs.empty() && "No spill intervals?"); + assert(!spillIs.empty() && "No spill intervals?"); SlotIndex earliestStart = spillIs[0]->beginIndex(); - + // Spill live intervals of virtual regs mapped to the physical register we // want to clear (and its aliases). We only spill those that overlap with the // current interval as the rest do not affect its allocation. we also keep // track of the earliest start of all spilled live intervals since this will // mark our rollback point. - std::vector<LiveInterval*> added; + SmallVector<LiveInterval*, 8> added; while (!spillIs.empty()) { LiveInterval *sli = spillIs.back(); spillIs.pop_back(); DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); if (sli->beginIndex() < earliestStart) earliestStart = sli->beginIndex(); - - spiller_->spill(sli, added, spillIs, &earliestStart); + spiller_->spill(sli, added, spillIs); addStackInterval(sli, ls_, li_, mri_, *vrm_); spilled.insert(sli->reg); } + // Include any added intervals in earliestStart. + for (unsigned i = 0, e = added.size(); i != e; ++i) { + SlotIndex SI = added[i]->beginIndex(); + if (SI < earliestStart) + earliestStart = SI; + } + DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n'); // Scan handled in reverse order up to the earliest start of a @@ -1431,6 +1433,9 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, // Ignore "downgraded" registers. if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; + // Skip reserved registers. + if (reservedRegs_.test(Reg)) + continue; // Skip recently allocated registers. if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) { FreeReg = Reg; @@ -1459,6 +1464,9 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, // Ignore "downgraded" registers. if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; + // Skip reserved registers. + if (reservedRegs_.test(Reg)) + continue; if (isRegAvail(Reg) && Reg < inactiveCounts.size() && FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) { FreeReg = Reg; @@ -1479,17 +1487,17 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { SmallVector<unsigned, 256> inactiveCounts; unsigned MaxInactiveCount = 0; - + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - + for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); i != e; ++i) { unsigned reg = i->first->reg; assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); - // If this is not in a related reg class to the register we're allocating, + // If this is not in a related reg class to the register we're allocating, // don't check it. const TargetRegisterClass *RegRC = mri_->getRegClass(reg); if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { @@ -1506,7 +1514,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { unsigned Preference = vrm_->getRegAllocPref(cur->reg); if (Preference) { DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") "); - if (isRegAvail(Preference) && + if (isRegAvail(Preference) && RC->contains(Preference)) return Preference; } diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index 7e61a12..61f337b 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -34,6 +34,8 @@ #include "PBQP/HeuristicSolver.h" #include "PBQP/Graph.h" #include "PBQP/Heuristics/Briggs.h" +#include "RenderMachineFunction.h" +#include "Splitter.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" #include "llvm/CodeGen/CalcSpillWeights.h" @@ -65,6 +67,11 @@ pbqpCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden); +static cl::opt<bool> +pbqpPreSplitting("pbqp-pre-splitting", + cl::desc("Pre-splite before PBQP register allocation."), + cl::init(false), cl::Hidden); + namespace { /// @@ -77,7 +84,7 @@ namespace { static char ID; /// Construct a PBQP register allocator. - PBQPRegAlloc() : MachineFunctionPass(&ID) {} + PBQPRegAlloc() : MachineFunctionPass(ID) {} /// Return the pass name. virtual const char* getPassName() const { @@ -96,7 +103,10 @@ namespace { au.addPreserved<LiveStacks>(); au.addRequired<MachineLoopInfo>(); au.addPreserved<MachineLoopInfo>(); + if (pbqpPreSplitting) + au.addRequired<LoopSplitter>(); au.addRequired<VirtRegMap>(); + au.addRequired<RenderMachineFunction>(); MachineFunctionPass::getAnalysisUsage(au); } @@ -104,7 +114,15 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); private: - typedef std::map<const LiveInterval*, unsigned> LI2NodeMap; + + class LIOrdering { + public: + bool operator()(const LiveInterval *li1, const LiveInterval *li2) const { + return li1->reg < li2->reg; + } + }; + + typedef std::map<const LiveInterval*, unsigned, LIOrdering> LI2NodeMap; typedef std::vector<const LiveInterval*> Node2LIMap; typedef std::vector<unsigned> AllowedSet; typedef std::vector<AllowedSet> AllowedSetMap; @@ -112,7 +130,7 @@ namespace { typedef std::pair<unsigned, unsigned> RegPair; typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; - typedef std::set<LiveInterval*> LiveIntervalSet; + typedef std::set<LiveInterval*, LIOrdering> LiveIntervalSet; typedef std::vector<PBQP::Graph::NodeItr> NodeVector; @@ -122,6 +140,7 @@ namespace { const TargetInstrInfo *tii; const MachineLoopInfo *loopInfo; MachineRegisterInfo *mri; + RenderMachineFunction *rmf; LiveIntervals *lis; LiveStacks *lss; @@ -379,12 +398,14 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { iItr != iEnd; ++iItr) { const MachineInstr *instr = &*iItr; - unsigned srcReg, dstReg, srcSubReg, dstSubReg; // If this isn't a copy then continue to the next instruction. - if (!tii->isMoveInstr(*instr, srcReg, dstReg, srcSubReg, dstSubReg)) + if (!instr->isCopy()) continue; + unsigned srcReg = instr->getOperand(1).getReg(); + unsigned dstReg = instr->getOperand(0).getReg(); + // If the registers are already the same our job is nice and easy. if (dstReg == srcReg) continue; @@ -567,6 +588,8 @@ PBQP::Graph PBQPRegAlloc::constructPBQPProblem() { // Resize allowedSets container appropriately. allowedSets.resize(vregIntervalsToAlloc.size()); + BitVector ReservedRegs = tri->getReservedRegs(*mf); + // Iterate over virtual register intervals to compute allowed sets... for (unsigned node = 0; node < node2LI.size(); ++node) { @@ -575,8 +598,12 @@ PBQP::Graph PBQPRegAlloc::constructPBQPProblem() { const TargetRegisterClass *liRC = mri->getRegClass(li->reg); // Start by assuming all allocable registers in the class are allowed... - RegVector liAllowed(liRC->allocation_order_begin(*mf), - liRC->allocation_order_end(*mf)); + RegVector liAllowed; + TargetRegisterClass::iterator aob = liRC->allocation_order_begin(*mf); + TargetRegisterClass::iterator aoe = liRC->allocation_order_end(*mf); + for (TargetRegisterClass::iterator it = aob; it != aoe; ++it) + if (!ReservedRegs.test(*it)) + liAllowed.push_back(*it); // Eliminate the physical registers which overlap with this range, along // with all their aliases. @@ -735,9 +762,11 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { const LiveInterval *spillInterval = node2LI[node]; double oldSpillWeight = spillInterval->weight; SmallVector<LiveInterval*, 8> spillIs; + rmf->rememberUseDefs(spillInterval); std::vector<LiveInterval*> newSpills = lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm); addStackInterval(spillInterval, mri); + rmf->rememberSpills(spillInterval, newSpills); (void) oldSpillWeight; DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: " @@ -845,9 +874,11 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { lis = &getAnalysis<LiveIntervals>(); lss = &getAnalysis<LiveStacks>(); loopInfo = &getAnalysis<MachineLoopInfo>(); + rmf = &getAnalysis<RenderMachineFunction>(); vrm = &getAnalysis<VirtRegMap>(); + DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); // Allocator main loop: @@ -884,6 +915,8 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { // Finalise allocation, allocate empty ranges. finalizeAlloc(); + rmf->renderMachineFunction("After PBQP register allocation.", vrm); + vregIntervalsToAlloc.clear(); emptyVRegIntervals.clear(); li2Node.clear(); diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index ab0bc2d..02b5539 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -54,9 +54,8 @@ bool CoalescerPair::isMoveInstr(const MachineInstr *MI, DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm()); Src = MI->getOperand(2).getReg(); SrcSub = MI->getOperand(2).getSubReg(); - } else if (!tii_.isMoveInstr(*MI, Src, Dst, SrcSub, DstSub)) { + } else return false; - } return true; } diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index 43b3fb6..a2580b8 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -21,7 +21,9 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -226,19 +228,14 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { used = ~RegsAvailable & ~ReservedRegs; } -/// CreateRegClassMask - Set the bits that represent the registers in the -/// TargetRegisterClass. -static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) { - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; - ++I) - Mask.set(*I); -} - unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (!isAliasUsed(*I)) + if (!isAliasUsed(*I)) { + DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) << + "\n"); return *I; + } return 0; } @@ -325,11 +322,9 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { - // Mask off the registers which are not in the TargetRegisterClass. - BitVector Candidates(NumPhysRegs, false); - CreateRegClassMask(RC, Candidates); - // Do not include reserved registers. - Candidates ^= ReservedRegs & Candidates; + // Consider all allocatable registers in the register class initially + BitVector Candidates = + TRI->getAllocatableSet(*I->getParent()->getParent(), RC); // Exclude all the registers being used by the instruction. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { @@ -349,8 +344,10 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); // If we found an unused register there is no reason to spill it. - if (!isAliasUsed(SReg)) + if (!isAliasUsed(SReg)) { + DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); return SReg; + } assert(ScavengedReg == 0 && "Scavenger slot is live, unable to scavenge another register!"); @@ -366,12 +363,12 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, "Cannot scavenge register without an emergency spill slot!"); TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI); MachineBasicBlock::iterator II = prior(I); - TRI->eliminateFrameIndex(II, SPAdj, NULL, this); + TRI->eliminateFrameIndex(II, SPAdj, this); // Restore the scavenged register before its use (or first terminator). TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI); II = prior(UseMI); - TRI->eliminateFrameIndex(II, SPAdj, NULL, this); + TRI->eliminateFrameIndex(II, SPAdj, this); } ScavengeRestore = prior(UseMI); @@ -380,5 +377,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // ScavengedReg = SReg; ScavengedRC = RC; + DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << + "\n"); + return SReg; } diff --git a/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp new file mode 100644 index 0000000..93426ee --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp @@ -0,0 +1,1014 @@ +//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "rendermf" + +#include "RenderMachineFunction.h" + +#include "VirtRegMap.h" + +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + +#include <sstream> + +using namespace llvm; + +char RenderMachineFunction::ID = 0; +INITIALIZE_PASS(RenderMachineFunction, "rendermf", + "Render machine functions (and related info) to HTML pages", + false, false); + +static cl::opt<std::string> +outputFileSuffix("rmf-file-suffix", + cl::desc("Appended to function name to get output file name " + "(default: \".html\")"), + cl::init(".html"), cl::Hidden); + +static cl::opt<std::string> +machineFuncsToRender("rmf-funcs", + cl::desc("Coma seperated list of functions to render" + ", or \"*\"."), + cl::init(""), cl::Hidden); + +static cl::opt<std::string> +pressureClasses("rmf-classes", + cl::desc("Register classes to render pressure for."), + cl::init(""), cl::Hidden); + +static cl::opt<std::string> +showIntervals("rmf-intervals", + cl::desc("Live intervals to show alongside code."), + cl::init(""), cl::Hidden); + +static cl::opt<bool> +filterEmpty("rmf-filter-empty-intervals", + cl::desc("Don't display empty intervals."), + cl::init(true), cl::Hidden); + +static cl::opt<bool> +showEmptyIndexes("rmf-empty-indexes", + cl::desc("Render indexes not associated with instructions or " + "MBB starts."), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +useFancyVerticals("rmf-fancy-verts", + cl::desc("Use SVG for vertical text."), + cl::init(true), cl::Hidden); + +static cl::opt<bool> +prettyHTML("rmf-pretty-html", + cl::desc("Pretty print HTML. For debugging the renderer only.."), + cl::init(false), cl::Hidden); + + +namespace llvm { + + bool MFRenderingOptions::renderingOptionsProcessed; + std::set<std::string> MFRenderingOptions::mfNamesToRender; + bool MFRenderingOptions::renderAllMFs = false; + + std::set<std::string> MFRenderingOptions::classNamesToRender; + bool MFRenderingOptions::renderAllClasses = false; + + std::set<std::pair<unsigned, unsigned> > + MFRenderingOptions::intervalNumsToRender; + unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly; + + template <typename OutputItr> + void MFRenderingOptions::splitComaSeperatedList(const std::string &s, + OutputItr outItr) { + std::string::const_iterator curPos = s.begin(); + std::string::const_iterator nextComa = std::find(curPos, s.end(), ','); + while (nextComa != s.end()) { + std::string elem; + std::copy(curPos, nextComa, std::back_inserter(elem)); + *outItr = elem; + ++outItr; + curPos = llvm::next(nextComa); + nextComa = std::find(curPos, s.end(), ','); + } + + if (curPos != s.end()) { + std::string elem; + std::copy(curPos, s.end(), std::back_inserter(elem)); + *outItr = elem; + ++outItr; + } + } + + void MFRenderingOptions::processOptions() { + if (!renderingOptionsProcessed) { + processFuncNames(); + processRegClassNames(); + processIntervalNumbers(); + renderingOptionsProcessed = true; + } + } + + void MFRenderingOptions::processFuncNames() { + if (machineFuncsToRender == "*") { + renderAllMFs = true; + } else { + splitComaSeperatedList(machineFuncsToRender, + std::inserter(mfNamesToRender, + mfNamesToRender.begin())); + } + } + + void MFRenderingOptions::processRegClassNames() { + if (pressureClasses == "*") { + renderAllClasses = true; + } else { + splitComaSeperatedList(pressureClasses, + std::inserter(classNamesToRender, + classNamesToRender.begin())); + } + } + + void MFRenderingOptions::processIntervalNumbers() { + std::set<std::string> intervalRanges; + splitComaSeperatedList(showIntervals, + std::inserter(intervalRanges, + intervalRanges.begin())); + std::for_each(intervalRanges.begin(), intervalRanges.end(), + processIntervalRange); + } + + void MFRenderingOptions::processIntervalRange( + const std::string &intervalRangeStr) { + if (intervalRangeStr == "*") { + intervalTypesToRender |= All; + } else if (intervalRangeStr == "virt-nospills*") { + intervalTypesToRender |= VirtNoSpills; + } else if (intervalRangeStr == "spills*") { + intervalTypesToRender |= VirtSpills; + } else if (intervalRangeStr == "virt*") { + intervalTypesToRender |= AllVirt; + } else if (intervalRangeStr == "phys*") { + intervalTypesToRender |= AllPhys; + } else { + std::istringstream iss(intervalRangeStr); + unsigned reg1, reg2; + if ((iss >> reg1 >> std::ws)) { + if (iss.eof()) { + intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1)); + } else { + char c; + iss >> c; + if (c == '-' && (iss >> reg2)) { + intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1)); + } else { + dbgs() << "Warning: Invalid interval range \"" + << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; + } + } + } else { + dbgs() << "Warning: Invalid interval number \"" + << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; + } + } + } + + void MFRenderingOptions::setup(MachineFunction *mf, + const TargetRegisterInfo *tri, + LiveIntervals *lis, + const RenderMachineFunction *rmf) { + this->mf = mf; + this->tri = tri; + this->lis = lis; + this->rmf = rmf; + + clear(); + } + + void MFRenderingOptions::clear() { + regClassesTranslatedToCurrentFunction = false; + regClassSet.clear(); + + intervalsTranslatedToCurrentFunction = false; + intervalSet.clear(); + } + + void MFRenderingOptions::resetRenderSpecificOptions() { + intervalSet.clear(); + intervalsTranslatedToCurrentFunction = false; + } + + bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const { + processOptions(); + + return (renderAllMFs || + mfNamesToRender.find(mf->getFunction()->getName()) != + mfNamesToRender.end()); + } + + const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{ + translateRegClassNamesToCurrentFunction(); + return regClassSet; + } + + const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const { + translateIntervalNumbersToCurrentFunction(); + return intervalSet; + } + + bool MFRenderingOptions::renderEmptyIndexes() const { + return showEmptyIndexes; + } + + bool MFRenderingOptions::fancyVerticals() const { + return useFancyVerticals; + } + + void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const { + if (!regClassesTranslatedToCurrentFunction) { + processOptions(); + for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + if (renderAllClasses || + classNamesToRender.find(trc->getName()) != + classNamesToRender.end()) { + regClassSet.insert(trc); + } + } + regClassesTranslatedToCurrentFunction = true; + } + } + + void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const { + if (!intervalsTranslatedToCurrentFunction) { + processOptions(); + + // If we're not just doing explicit then do a copy over all matching + // types. + if (intervalTypesToRender != ExplicitOnly) { + for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval *li = liItr->second; + + if (filterEmpty && li->empty()) + continue; + + if ((TargetRegisterInfo::isPhysicalRegister(li->reg) && + (intervalTypesToRender & AllPhys))) { + intervalSet.insert(li); + } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) { + if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) || + ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) { + intervalSet.insert(li); + } + } + } + } + + // If we need to process the explicit list... + if (intervalTypesToRender != All) { + for (std::set<std::pair<unsigned, unsigned> >::const_iterator + regRangeItr = intervalNumsToRender.begin(), + regRangeEnd = intervalNumsToRender.end(); + regRangeItr != regRangeEnd; ++regRangeItr) { + const std::pair<unsigned, unsigned> &range = *regRangeItr; + for (unsigned reg = range.first; reg != range.second; ++reg) { + if (lis->hasInterval(reg)) { + intervalSet.insert(&lis->getInterval(reg)); + } + } + } + } + + intervalsTranslatedToCurrentFunction = true; + } + } + + // ---------- TargetRegisterExtraInformation implementation ---------- + + TargetRegisterExtraInfo::TargetRegisterExtraInfo() + : mapsPopulated(false) { + } + + void TargetRegisterExtraInfo::setup(MachineFunction *mf, + MachineRegisterInfo *mri, + const TargetRegisterInfo *tri, + LiveIntervals *lis) { + this->mf = mf; + this->mri = mri; + this->tri = tri; + this->lis = lis; + } + + void TargetRegisterExtraInfo::reset() { + if (!mapsPopulated) { + initWorst(); + //initBounds(); + initCapacity(); + mapsPopulated = true; + } + + resetPressureAndLiveStates(); + } + + void TargetRegisterExtraInfo::clear() { + prWorst.clear(); + vrWorst.clear(); + capacityMap.clear(); + pressureMap.clear(); + //liveStatesMap.clear(); + mapsPopulated = false; + } + + void TargetRegisterExtraInfo::initWorst() { + assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() && + "Worst map already initialised?"); + + // Start with the physical registers. + for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) { + WorstMapLine &pregLine = prWorst[preg]; + + for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + + unsigned numOverlaps = 0; + for (TargetRegisterClass::iterator rItr = trc->begin(), + rEnd = trc->end(); + rItr != rEnd; ++rItr) { + unsigned trcPReg = *rItr; + if (tri->regsOverlap(preg, trcPReg)) + ++numOverlaps; + } + + pregLine[trc] = numOverlaps; + } + } + + // Now the register classes. + for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rc1Itr != rcEnd; ++rc1Itr) { + const TargetRegisterClass *trc1 = *rc1Itr; + WorstMapLine &classLine = vrWorst[trc1]; + + for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin(); + rc2Itr != rcEnd; ++rc2Itr) { + const TargetRegisterClass *trc2 = *rc2Itr; + + unsigned worst = 0; + + for (TargetRegisterClass::iterator trc1Itr = trc1->begin(), + trc1End = trc1->end(); + trc1Itr != trc1End; ++trc1Itr) { + unsigned trc1Reg = *trc1Itr; + unsigned trc1RegWorst = 0; + + for (TargetRegisterClass::iterator trc2Itr = trc2->begin(), + trc2End = trc2->end(); + trc2Itr != trc2End; ++trc2Itr) { + unsigned trc2Reg = *trc2Itr; + if (tri->regsOverlap(trc1Reg, trc2Reg)) + ++trc1RegWorst; + } + if (trc1RegWorst > worst) { + worst = trc1RegWorst; + } + } + + if (worst != 0) { + classLine[trc2] = worst; + } + } + } + } + + unsigned TargetRegisterExtraInfo::getWorst( + unsigned reg, + const TargetRegisterClass *trc) const { + const WorstMapLine *wml = 0; + if (TargetRegisterInfo::isPhysicalRegister(reg)) { + PRWorstMap::const_iterator prwItr = prWorst.find(reg); + assert(prwItr != prWorst.end() && "Missing prWorst entry."); + wml = &prwItr->second; + } else { + const TargetRegisterClass *regTRC = mri->getRegClass(reg); + VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC); + assert(vrwItr != vrWorst.end() && "Missing vrWorst entry."); + wml = &vrwItr->second; + } + + WorstMapLine::const_iterator wmlItr = wml->find(trc); + if (wmlItr == wml->end()) + return 0; + + return wmlItr->second; + } + + void TargetRegisterExtraInfo::initCapacity() { + assert(!mapsPopulated && capacityMap.empty() && + "Capacity map already initialised?"); + + for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + unsigned capacity = std::distance(trc->allocation_order_begin(*mf), + trc->allocation_order_end(*mf)); + + if (capacity != 0) + capacityMap[trc] = capacity; + } + } + + unsigned TargetRegisterExtraInfo::getCapacity( + const TargetRegisterClass *trc) const { + CapacityMap::const_iterator cmItr = capacityMap.find(trc); + assert(cmItr != capacityMap.end() && + "vreg with unallocable register class"); + return cmItr->second; + } + + void TargetRegisterExtraInfo::resetPressureAndLiveStates() { + pressureMap.clear(); + //liveStatesMap.clear(); + + // Iterate over all slots. + + + // Iterate over all live intervals. + for (LiveIntervals::iterator liItr = lis->begin(), + liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval *li = liItr->second; + + const TargetRegisterClass *liTRC; + + if (TargetRegisterInfo::isPhysicalRegister(li->reg)) + continue; + + liTRC = mri->getRegClass(li->reg); + + + // For all ranges in the current interal. + for (LiveInterval::iterator lrItr = li->begin(), + lrEnd = li->end(); + lrItr != lrEnd; ++lrItr) { + LiveRange *lr = &*lrItr; + + // For all slots in the current range. + for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) { + + // Record increased pressure at index for all overlapping classes. + for (TargetRegisterInfo::regclass_iterator + rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + + if (trc->allocation_order_begin(*mf) == + trc->allocation_order_end(*mf)) + continue; + + unsigned worstAtI = getWorst(li->reg, trc); + + if (worstAtI != 0) { + pressureMap[i][trc] += worstAtI; + } + } + } + } + } + } + + unsigned TargetRegisterExtraInfo::getPressureAtSlot( + const TargetRegisterClass *trc, + SlotIndex i) const { + PressureMap::const_iterator pmItr = pressureMap.find(i); + if (pmItr == pressureMap.end()) + return 0; + const PressureMapLine &pmLine = pmItr->second; + PressureMapLine::const_iterator pmlItr = pmLine.find(trc); + if (pmlItr == pmLine.end()) + return 0; + return pmlItr->second; + } + + bool TargetRegisterExtraInfo::classOverCapacityAtSlot( + const TargetRegisterClass *trc, + SlotIndex i) const { + return (getPressureAtSlot(trc, i) > getCapacity(trc)); + } + + // ---------- MachineFunctionRenderer implementation ---------- + + void RenderMachineFunction::Spacer::print(raw_ostream &os) const { + if (!prettyHTML) + return; + for (unsigned i = 0; i < ns; ++i) { + os << " "; + } + } + + RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const { + return Spacer(ns); + } + + raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) { + s.print(os); + return os; + } + + template <typename Iterator> + std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const { + std::string r; + + for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) { + char c = *sItr; + + switch (c) { + case '<': r.append("<"); break; + case '>': r.append(">"); break; + case '&': r.append("&"); break; + case ' ': r.append(" "); break; + case '\"': r.append("""); break; + default: r.push_back(c); break; + } + } + + return r; + } + + RenderMachineFunction::LiveState + RenderMachineFunction::getLiveStateAt(const LiveInterval *li, + SlotIndex i) const { + const MachineInstr *mi = sis->getInstructionFromIndex(i); + + // For uses/defs recorded use/def indexes override current liveness and + // instruction operands (Only for the interval which records the indexes). + if (i.isUse() || i.isDef()) { + UseDefs::const_iterator udItr = useDefs.find(li); + if (udItr != useDefs.end()) { + const SlotSet &slotSet = udItr->second; + if (slotSet.count(i)) { + if (i.isUse()) { + return Used; + } + // else + return Defined; + } + } + } + + // If the slot is a load/store, or there's no info in the use/def set then + // use liveness and instruction operand info. + if (li->liveAt(i)) { + + if (mi == 0) { + if (vrm == 0 || + (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { + return AliveReg; + } else { + return AliveStack; + } + } else { + if (i.isDef() && mi->definesRegister(li->reg, tri)) { + return Defined; + } else if (i.isUse() && mi->readsRegister(li->reg)) { + return Used; + } else { + if (vrm == 0 || + (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { + return AliveReg; + } else { + return AliveStack; + } + } + } + } + return Dead; + } + + RenderMachineFunction::PressureState + RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc, + SlotIndex i) const { + if (trei.getPressureAtSlot(trc, i) == 0) { + return Zero; + } else if (trei.classOverCapacityAtSlot(trc, i)){ + return High; + } + return Low; + } + + /// \brief Render a machine instruction. + void RenderMachineFunction::renderMachineInstr(raw_ostream &os, + const MachineInstr *mi) const { + std::string s; + raw_string_ostream oss(s); + oss << *mi; + + os << escapeChars(oss.str()); + } + + template <typename T> + void RenderMachineFunction::renderVertical(const Spacer &indent, + raw_ostream &os, + const T &t) const { + if (ro.fancyVerticals()) { + os << indent << "<object\n" + << indent + s(2) << "class=\"obj\"\n" + << indent + s(2) << "type=\"image/svg+xml\"\n" + << indent + s(2) << "width=\"14px\"\n" + << indent + s(2) << "height=\"55px\"\n" + << indent + s(2) << "data=\"data:image/svg+xml,\n" + << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n" + << indent + s(6) << "<text x='-55' y='10' " + "font-family='Courier' font-size='12' " + "transform='rotate(-90)' " + "text-rendering='optimizeSpeed' " + "fill='#000'>" << t << "</text>\n" + << indent + s(4) << "</svg>\">\n" + << indent << "</object>\n"; + } else { + std::ostringstream oss; + oss << t; + std::string tStr(oss.str()); + + os << indent; + for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end(); + tStrItr != tStrEnd; ++tStrItr) { + os << *tStrItr << "<br/>"; + } + os << "\n"; + } + } + + void RenderMachineFunction::insertCSS(const Spacer &indent, + raw_ostream &os) const { + os << indent << "<style type=\"text/css\">\n" + << indent + s(2) << "body { font-color: black; }\n" + << indent + s(2) << "table.code td { font-family: monospace; " + "border-width: 0px; border-style: solid; " + "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n" + << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n" + << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n" + << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n" + << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n" + << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n" + << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n" + << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n" + << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n" + << indent + s(2) << "table.code th { border-width: 0px; " + "border-style: solid; }\n" + << indent << "</style>\n"; + } + + void RenderMachineFunction::renderFunctionSummary( + const Spacer &indent, raw_ostream &os, + const char * const renderContextStr) const { + os << indent << "<h1>Function: " << mf->getFunction()->getName() + << "</h1>\n" + << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n"; + } + + + void RenderMachineFunction::renderPressureTableLegend( + const Spacer &indent, + raw_ostream &os) const { + os << indent << "<h2>Rendering Pressure Legend:</h2>\n" + << indent << "<table class=\"code\">\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<th>Pressure</th><th>Description</th>" + "<th>Appearance</th>\n" + << indent + s(2) << "</tr>\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<td>No Pressure</td>" + "<td>No physical registers of this class requested.</td>" + "<td class=\"p-z\"> </td>\n" + << indent + s(2) << "</tr>\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<td>Low Pressure</td>" + "<td>Sufficient physical registers to meet demand.</td>" + "<td class=\"p-l\"> </td>\n" + << indent + s(2) << "</tr>\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<td>High Pressure</td>" + "<td>Potentially insufficient physical registers to meet demand.</td>" + "<td class=\"p-h\"> </td>\n" + << indent + s(2) << "</tr>\n" + << indent << "</table>\n"; + } + + template <typename CellType> + void RenderMachineFunction::renderCellsWithRLE( + const Spacer &indent, raw_ostream &os, + const std::pair<CellType, unsigned> &rleAccumulator, + const std::map<CellType, std::string> &cellTypeStrs) const { + + if (rleAccumulator.second == 0) + return; + + typename std::map<CellType, std::string>::const_iterator ctsItr = + cellTypeStrs.find(rleAccumulator.first); + + assert(ctsItr != cellTypeStrs.end() && "No string for given cell type."); + + os << indent + s(4) << "<td class=\"" << ctsItr->second << "\""; + if (rleAccumulator.second > 1) + os << " colspan=" << rleAccumulator.second; + os << "></td>\n"; + } + + + void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent, + raw_ostream &os) const { + + std::map<LiveState, std::string> lsStrs; + lsStrs[Dead] = "l-n"; + lsStrs[Defined] = "l-d"; + lsStrs[Used] = "l-u"; + lsStrs[AliveReg] = "l-r"; + lsStrs[AliveStack] = "l-s"; + + std::map<PressureState, std::string> psStrs; + psStrs[Zero] = "p-z"; + psStrs[Low] = "p-l"; + psStrs[High] = "p-h"; + + // Open the table... + + os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n" + << indent + s(2) << "<tr>\n"; + + // Render the header row... + + os << indent + s(4) << "<th>index</th>\n" + << indent + s(4) << "<th>instr</th>\n"; + + // Render class names if necessary... + if (!ro.regClasses().empty()) { + for (MFRenderingOptions::RegClassSet::const_iterator + rcItr = ro.regClasses().begin(), + rcEnd = ro.regClasses().end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + os << indent + s(4) << "<th>\n"; + renderVertical(indent + s(6), os, trc->getName()); + os << indent + s(4) << "</th>\n"; + } + } + + // FIXME: Is there a nicer way to insert space between columns in HTML? + if (!ro.regClasses().empty() && !ro.intervals().empty()) + os << indent + s(4) << "<th> </th>\n"; + + // Render interval numbers if necessary... + if (!ro.intervals().empty()) { + for (MFRenderingOptions::IntervalSet::const_iterator + liItr = ro.intervals().begin(), + liEnd = ro.intervals().end(); + liItr != liEnd; ++liItr) { + + const LiveInterval *li = *liItr; + os << indent + s(4) << "<th>\n"; + renderVertical(indent + s(6), os, li->reg); + os << indent + s(4) << "</th>\n"; + } + } + + os << indent + s(2) << "</tr>\n"; + + // End header row, start with the data rows... + + MachineInstr *mi = 0; + + // Data rows: + for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex(); + i = i.getNextSlot()) { + + // Render the slot column. + os << indent + s(2) << "<tr height=6ex>\n"; + + // Render the code column. + if (i.isLoad()) { + MachineBasicBlock *mbb = sis->getMBBFromIndex(i); + mi = sis->getInstructionFromIndex(i); + + if (i == sis->getMBBStartIdx(mbb) || mi != 0 || + ro.renderEmptyIndexes()) { + os << indent + s(4) << "<td rowspan=4>" << i << " </td>\n" + << indent + s(4) << "<td rowspan=4>\n"; + + if (i == sis->getMBBStartIdx(mbb)) { + os << indent + s(6) << "BB#" << mbb->getNumber() << ": \n"; + } else if (mi != 0) { + os << indent + s(6) << " "; + renderMachineInstr(os, mi); + } else { + // Empty interval - leave blank. + } + os << indent + s(4) << "</td>\n"; + } else { + i = i.getStoreIndex(); // <- Will be incremented to the next index. + continue; + } + } + + // Render the class columns. + if (!ro.regClasses().empty()) { + std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0); + for (MFRenderingOptions::RegClassSet::const_iterator + rcItr = ro.regClasses().begin(), + rcEnd = ro.regClasses().end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + PressureState newPressure = getPressureStateAt(trc, i); + + if (newPressure == psRLEAccumulator.first) { + ++psRLEAccumulator.second; + } else { + renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); + psRLEAccumulator.first = newPressure; + psRLEAccumulator.second = 1; + } + } + renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); + } + + // FIXME: Is there a nicer way to insert space between columns in HTML? + if (!ro.regClasses().empty() && !ro.intervals().empty()) + os << indent + s(4) << "<td width=2em></td>\n"; + + if (!ro.intervals().empty()) { + std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0); + for (MFRenderingOptions::IntervalSet::const_iterator + liItr = ro.intervals().begin(), + liEnd = ro.intervals().end(); + liItr != liEnd; ++liItr) { + const LiveInterval *li = *liItr; + LiveState newLiveness = getLiveStateAt(li, i); + + if (newLiveness == lsRLEAccumulator.first) { + ++lsRLEAccumulator.second; + } else { + renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); + lsRLEAccumulator.first = newLiveness; + lsRLEAccumulator.second = 1; + } + } + renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); + } + os << indent + s(2) << "</tr>\n"; + } + + os << indent << "</table>\n"; + + if (!ro.regClasses().empty()) + renderPressureTableLegend(indent, os); + } + + void RenderMachineFunction::renderFunctionPage( + raw_ostream &os, + const char * const renderContextStr) const { + os << "<html>\n" + << s(2) << "<head>\n" + << s(4) << "<title>" << fqn << "</title>\n"; + + insertCSS(s(4), os); + + os << s(2) << "<head>\n" + << s(2) << "<body >\n"; + + renderFunctionSummary(s(4), os, renderContextStr); + + os << s(4) << "<br/><br/><br/>\n"; + + //renderLiveIntervalInfoTable(" ", os); + + os << s(4) << "<br/><br/><br/>\n"; + + renderCodeTablePlusPI(s(4), os); + + os << s(2) << "</body>\n" + << "</html>\n"; + } + + void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<SlotIndexes>(); + au.addRequired<LiveIntervals>(); + au.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(au); + } + + bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) { + + mf = &fn; + mri = &mf->getRegInfo(); + tri = mf->getTarget().getRegisterInfo(); + lis = &getAnalysis<LiveIntervals>(); + sis = &getAnalysis<SlotIndexes>(); + + trei.setup(mf, mri, tri, lis); + ro.setup(mf, tri, lis, this); + spillIntervals.clear(); + spillFor.clear(); + useDefs.clear(); + + fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + + mf->getFunction()->getName().str(); + + return false; + } + + void RenderMachineFunction::releaseMemory() { + trei.clear(); + ro.clear(); + spillIntervals.clear(); + spillFor.clear(); + useDefs.clear(); + } + + void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) { + + if (!ro.shouldRenderCurrentMachineFunction()) + return; + + for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg), + rEnd = mri->reg_end(); + rItr != rEnd; ++rItr) { + const MachineInstr *mi = &*rItr; + if (mi->readsRegister(li->reg)) { + useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex()); + } + if (mi->definesRegister(li->reg)) { + useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex()); + } + } + } + + void RenderMachineFunction::rememberSpills( + const LiveInterval *li, + const std::vector<LiveInterval*> &spills) { + + if (!ro.shouldRenderCurrentMachineFunction()) + return; + + for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(), + siEnd = spills.end(); + siItr != siEnd; ++siItr) { + const LiveInterval *spill = *siItr; + spillIntervals[li].insert(spill); + spillFor[spill] = li; + } + } + + bool RenderMachineFunction::isSpill(const LiveInterval *li) const { + SpillForMap::const_iterator sfItr = spillFor.find(li); + if (sfItr == spillFor.end()) + return false; + return true; + } + + void RenderMachineFunction::renderMachineFunction( + const char *renderContextStr, + const VirtRegMap *vrm, + const char *renderSuffix) { + if (!ro.shouldRenderCurrentMachineFunction()) + return; + + this->vrm = vrm; + trei.reset(); + + std::string rpFileName(mf->getFunction()->getName().str() + + (renderSuffix ? renderSuffix : "") + + outputFileSuffix); + + std::string errMsg; + raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary); + + renderFunctionPage(outFile, renderContextStr); + + ro.resetRenderSpecificOptions(); + } + + std::string RenderMachineFunction::escapeChars(const std::string &s) const { + return escapeChars(s.begin(), s.end()); + } + +} diff --git a/contrib/llvm/lib/CodeGen/RenderMachineFunction.h b/contrib/llvm/lib/CodeGen/RenderMachineFunction.h new file mode 100644 index 0000000..8d56a82 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RenderMachineFunction.h @@ -0,0 +1,336 @@ +//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H +#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H + +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <algorithm> +#include <map> +#include <set> +#include <string> + +namespace llvm { + + class LiveInterval; + class LiveIntervals; + class MachineInstr; + class MachineRegisterInfo; + class RenderMachineFunction; + class TargetRegisterClass; + class TargetRegisterInfo; + class VirtRegMap; + class raw_ostream; + + /// \brief Helper class to process rendering options. Tries to be as lazy as + /// possible. + class MFRenderingOptions { + public: + + struct RegClassComp { + bool operator()(const TargetRegisterClass *trc1, + const TargetRegisterClass *trc2) const { + std::string trc1Name(trc1->getName()), trc2Name(trc2->getName()); + return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(), + trc2Name.begin(), trc2Name.end()); + } + }; + + typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet; + + struct IntervalComp { + bool operator()(const LiveInterval *li1, const LiveInterval *li2) const { + return li1->reg < li2->reg; + } + }; + + typedef std::set<const LiveInterval*, IntervalComp> IntervalSet; + + /// Initialise the rendering options. + void setup(MachineFunction *mf, const TargetRegisterInfo *tri, + LiveIntervals *lis, const RenderMachineFunction *rmf); + + /// Clear translations of options to the current function. + void clear(); + + /// Reset any options computed for this specific rendering. + void resetRenderSpecificOptions(); + + /// Should we render the current function. + bool shouldRenderCurrentMachineFunction() const; + + /// Return the set of register classes to render pressure for. + const RegClassSet& regClasses() const; + + /// Return the set of live intervals to render liveness for. + const IntervalSet& intervals() const; + + /// Render indexes which are not associated with instructions / MBB starts. + bool renderEmptyIndexes() const; + + /// Return whether or not to render using SVG for fancy vertical text. + bool fancyVerticals() const; + + private: + + static bool renderingOptionsProcessed; + static std::set<std::string> mfNamesToRender; + static bool renderAllMFs; + + static std::set<std::string> classNamesToRender; + static bool renderAllClasses; + + + static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender; + typedef enum { ExplicitOnly = 0, + AllPhys = 1, + VirtNoSpills = 2, + VirtSpills = 4, + AllVirt = 6, + All = 7 } + IntervalTypesToRender; + static unsigned intervalTypesToRender; + + template <typename OutputItr> + static void splitComaSeperatedList(const std::string &s, OutputItr outItr); + + static void processOptions(); + + static void processFuncNames(); + static void processRegClassNames(); + static void processIntervalNumbers(); + + static void processIntervalRange(const std::string &intervalRangeStr); + + MachineFunction *mf; + const TargetRegisterInfo *tri; + LiveIntervals *lis; + const RenderMachineFunction *rmf; + + mutable bool regClassesTranslatedToCurrentFunction; + mutable RegClassSet regClassSet; + + mutable bool intervalsTranslatedToCurrentFunction; + mutable IntervalSet intervalSet; + + void translateRegClassNamesToCurrentFunction() const; + + void translateIntervalNumbersToCurrentFunction() const; + }; + + /// \brief Provide extra information about the physical and virtual registers + /// in the function being compiled. + class TargetRegisterExtraInfo { + public: + TargetRegisterExtraInfo(); + + /// \brief Set up TargetRegisterExtraInfo with pointers to necessary + /// sources of information. + void setup(MachineFunction *mf, MachineRegisterInfo *mri, + const TargetRegisterInfo *tri, LiveIntervals *lis); + + /// \brief Recompute tables for changed function. + void reset(); + + /// \brief Free all tables in TargetRegisterExtraInfo. + void clear(); + + /// \brief Maximum number of registers from trc which alias reg. + unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const; + + /// \brief Returns the number of allocable registers in trc. + unsigned getCapacity(const TargetRegisterClass *trc) const; + + /// \brief Return the number of registers of class trc that may be + /// needed at slot i. + unsigned getPressureAtSlot(const TargetRegisterClass *trc, + SlotIndex i) const; + + /// \brief Return true if the number of registers of type trc that may be + /// needed at slot i is greater than the capacity of trc. + bool classOverCapacityAtSlot(const TargetRegisterClass *trc, + SlotIndex i) const; + + private: + + MachineFunction *mf; + MachineRegisterInfo *mri; + const TargetRegisterInfo *tri; + LiveIntervals *lis; + + typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine; + typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap; + VRWorstMap vrWorst; + + typedef std::map<unsigned, WorstMapLine> PRWorstMap; + PRWorstMap prWorst; + + typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap; + CapacityMap capacityMap; + + typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine; + typedef std::map<SlotIndex, PressureMapLine> PressureMap; + PressureMap pressureMap; + + bool mapsPopulated; + + /// \brief Initialise the 'worst' table. + void initWorst(); + + /// \brief Initialise the 'capacity' table. + void initCapacity(); + + /// \brief Initialise/Reset the 'pressure' and live states tables. + void resetPressureAndLiveStates(); + }; + + /// \brief Render MachineFunction objects and related information to a HTML + /// page. + class RenderMachineFunction : public MachineFunctionPass { + public: + static char ID; + + RenderMachineFunction() : MachineFunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + virtual bool runOnMachineFunction(MachineFunction &fn); + + virtual void releaseMemory(); + + void rememberUseDefs(const LiveInterval *li); + + void rememberSpills(const LiveInterval *li, + const std::vector<LiveInterval*> &spills); + + bool isSpill(const LiveInterval *li) const; + + /// \brief Render this machine function to HTML. + /// + /// @param renderContextStr This parameter will be included in the top of + /// the html file to explain where (in the + /// codegen pipeline) this function was rendered + /// from. Set it to something like + /// "Pre-register-allocation". + /// @param vrm If non-null the VRM will be queried to determine + /// whether a virtual register was allocated to a + /// physical register or spilled. + /// @param renderFilePrefix This string will be appended to the function + /// name (before the output file suffix) to enable + /// multiple renderings from the same function. + void renderMachineFunction(const char *renderContextStr, + const VirtRegMap *vrm = 0, + const char *renderSuffix = 0); + + private: + class Spacer; + friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s); + + std::string fqn; + + MachineFunction *mf; + MachineRegisterInfo *mri; + const TargetRegisterInfo *tri; + LiveIntervals *lis; + SlotIndexes *sis; + const VirtRegMap *vrm; + + TargetRegisterExtraInfo trei; + MFRenderingOptions ro; + + + + // Utilities. + typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState; + LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const; + + typedef enum { Zero, Low, High } PressureState; + PressureState getPressureStateAt(const TargetRegisterClass *trc, + SlotIndex i) const; + + typedef std::map<const LiveInterval*, std::set<const LiveInterval*> > + SpillIntervals; + SpillIntervals spillIntervals; + + typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap; + SpillForMap spillFor; + + typedef std::set<SlotIndex> SlotSet; + typedef std::map<const LiveInterval*, SlotSet> UseDefs; + UseDefs useDefs; + + // ---------- Rendering methods ---------- + + /// For inserting spaces when pretty printing. + class Spacer { + public: + explicit Spacer(unsigned numSpaces) : ns(numSpaces) {} + Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); } + void print(raw_ostream &os) const; + private: + unsigned ns; + }; + + Spacer s(unsigned ns) const; + + template <typename Iterator> + std::string escapeChars(Iterator sBegin, Iterator sEnd) const; + + /// \brief Render a machine instruction. + void renderMachineInstr(raw_ostream &os, + const MachineInstr *mi) const; + + /// \brief Render vertical text. + template <typename T> + void renderVertical(const Spacer &indent, + raw_ostream &os, + const T &t) const; + + /// \brief Insert CSS layout info. + void insertCSS(const Spacer &indent, + raw_ostream &os) const; + + /// \brief Render a brief summary of the function (including rendering + /// context). + void renderFunctionSummary(const Spacer &indent, + raw_ostream &os, + const char * const renderContextStr) const; + + /// \brief Render a legend for the pressure table. + void renderPressureTableLegend(const Spacer &indent, + raw_ostream &os) const; + + /// \brief Render a consecutive set of HTML cells of the same class using + /// the colspan attribute for run-length encoding. + template <typename CellType> + void renderCellsWithRLE( + const Spacer &indent, raw_ostream &os, + const std::pair<CellType, unsigned> &rleAccumulator, + const std::map<CellType, std::string> &cellTypeStrs) const; + + /// \brief Render code listing, potentially with register pressure + /// and live intervals shown alongside. + void renderCodeTablePlusPI(const Spacer &indent, + raw_ostream &os) const; + + /// \brief Render the HTML page representing the MachineFunction. + void renderFunctionPage(raw_ostream &os, + const char * const renderContextStr) const; + + std::string escapeChars(const std::string &s) const; + }; +} + +#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */ diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 09202f8..ea93dd5 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -32,7 +32,8 @@ using namespace llvm; ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt) - : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) { + : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()), + Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) { MFI = mf.getFrameInfo(); DbgValueVec.clear(); } @@ -159,8 +160,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; // Keep track of dangling debug references to registers. - std::pair<MachineInstr*, unsigned> - DanglingDebugValue[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<std::pair<MachineInstr*, unsigned> > + DanglingDebugValue(TRI->getNumRegs(), + std::make_pair(static_cast<MachineInstr*>(0), 0)); // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); @@ -172,7 +174,6 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. DbgValueVec.clear(); - std::memset(DanglingDebugValue, 0, sizeof(DanglingDebugValue)); // Walk the list of instructions, from bottom moving up. for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h index d90659b..c8f543f 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h @@ -106,8 +106,8 @@ namespace llvm { /// are as we iterate upward through the instructions. This is allocated /// here instead of inside BuildSchedGraph to avoid the need for it to be /// initialized and destructed for each block. - std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister]; - std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<std::vector<SUnit *> > Defs; + std::vector<std::vector<SUnit *> > Uses; /// DbgValueVec - Remember DBG_VALUEs that refer to a particular /// register. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e671752..c9c4d91 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4489,6 +4489,16 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // If this is a conversion of N elements of one type to N elements of another // type, convert each element. This handles FP<->INT cases. if (SrcBitSize == DstBitSize) { + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + BV->getValueType(0).getVectorNumElements()); + + // Due to the FP element handling below calling this routine recursively, + // we can end up with a scalar-to-vector node here. + if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), + DstEltVT, BV->getOperand(0))); + SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { SDValue Op = BV->getOperand(i); @@ -4500,8 +4510,6 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } - EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, - BV->getValueType(0).getVectorNumElements()); return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, &Ops[0], Ops.size()); } @@ -5790,7 +5798,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); SDValue N0 = Value.getOperand(0); - if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) { + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + Chain == SDValue(N0.getNode(), 1)) { LoadSDNode *LD = cast<LoadSDNode>(N0); if (LD->getBasePtr() != Ptr) return SDValue(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index decaa76..a4eed71 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -94,7 +94,7 @@ bool FastISel::hasTrivialKill(const Value *V) const { !(I->getOpcode() == Instruction::BitCast || I->getOpcode() == Instruction::PtrToInt || I->getOpcode() == Instruction::IntToPtr) && - cast<Instruction>(I->use_begin())->getParent() == I->getParent(); + cast<Instruction>(*I->use_begin())->getParent() == I->getParent(); } unsigned FastISel::getRegForValue(const Value *V) { @@ -146,7 +146,7 @@ unsigned FastISel::getRegForValue(const Value *V) { return Reg; } -/// materializeRegForValue - Helper for getRegForVale. This function is +/// materializeRegForValue - Helper for getRegForValue. This function is /// called when the value isn't already available in a register and must /// be materialized with new instructions. unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { @@ -276,6 +276,7 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { void FastISel::recomputeInsertPt() { if (getLastLocalValue()) { FuncInfo.InsertPt = getLastLocalValue(); + FuncInfo.MBB = FuncInfo.InsertPt->getParent(); ++FuncInfo.InsertPt; } else FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); @@ -472,17 +473,7 @@ bool FastISel::SelectCall(const User *I) { return true; const AllocaInst *AI = dyn_cast<AllocaInst>(Address); // Don't handle byval struct arguments or VLAs, for example. - // Note that if we have a byval struct argument, fast ISel is turned off; - // those are handled in SelectionDAGBuilder. - if (AI) { - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI == FuncInfo.StaticAllocaMap.end()) break; // VLAs. - int FI = SI->second; - if (!DI->getDebugLoc().isUnknown()) - FuncInfo.MF->getMMI().setVariableDbgInfo(DI->getVariable(), - FI, DI->getDebugLoc()); - } else + if (!AI) // Building the map above is target independent. Generating DBG_VALUE // inline is target dependent; do this now. (void)TargetSelectInstruction(cast<Instruction>(I)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 928e1ec..5ef6404 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -111,17 +112,56 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + + // The object may need to be placed onto the stack near the stack + // protector if one exists. Determine here if this object is a suitable + // candidate. I.e., it would trigger the creation of a stack protector. + bool MayNeedSP = + (AI->isArrayAllocation() || + (TySize > 8 && isa<ArrayType>(Ty) && + cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP); } for (; BB != EB; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Mark values used outside their block as exported, by allocating + // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I))) InitializeRegForValue(I); + // Collect llvm.dbg.declare information. This is done now instead of + // during the initial isel pass through the IR so that it is done + // in a predictable order. + if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { + MachineModuleInfo &MMI = MF->getMMI(); + if (MMI.hasDebugInfo() && + DIVariable(DI->getVariable()).Verify() && + !DI->getDebugLoc().isUnknown()) { + // Don't handle byval struct arguments or VLAs, for example. + // Non-byval arguments are handled here (they refer to the stack + // temporary alloca at this point). + const Value *Address = DI->getAddress(); + if (Address) { + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { + DenseMap<const AllocaInst *, int>::iterator SI = + StaticAllocaMap.find(AI); + if (SI != StaticAllocaMap.end()) { // Check for VLAs. + int FI = SI->second; + MMI.setVariableDbgInfo(DI->getVariable(), + FI, DI->getDebugLoc()); + } + } + } + } + } + } + // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. @@ -181,6 +221,7 @@ void FunctionLoweringInfo::clear() { #endif LiveOutRegInfo.clear(); ArgDbgValues.clear(); + ByValArgFrameIndexMap.clear(); RegFixups.clear(); } @@ -214,6 +255,28 @@ unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) { return FirstReg; } +/// setByValArgumentFrameIndex - Record frame index for the byval +/// argument. This overrides previous frame index entry for this argument, +/// if any. +void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A, + int FI) { + assert (A->hasByValAttr() && "Argument does not have byval attribute!"); + ByValArgFrameIndexMap[A] = FI; +} + +/// getByValArgumentFrameIndex - Get frame index for the byval argument. +/// If the argument does not have any assigned frame index then 0 is +/// returned. +int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) { + assert (A->hasByValAttr() && "Argument does not have byval attribute!"); + DenseMap<const Argument *, int>::iterator I = + ByValArgFrameIndexMap.find(A); + if (I != ByValArgFrameIndexMap.end()) + return I->second; + DEBUG(dbgs() << "Argument does not have assigned frame index!"); + return 0; +} + /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7a47da4..2981cd3 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -100,8 +100,7 @@ public: /// it is already legal or we need to expand it into multiple registers of /// smaller integer type, or we need to promote it to a larger type. LegalizeAction getTypeAction(EVT VT) const { - return - (LegalizeAction)ValueTypeActions.getTypeAction(*DAG.getContext(), VT); + return (LegalizeAction)ValueTypeActions.getTypeAction(VT); } /// isTypeLegal - Return true if this type is legal on this target. @@ -1314,21 +1313,30 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } break; case TargetLowering::Expand: - // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND - // f128 = EXTLOAD {f32,f64} too - if ((SrcVT == MVT::f32 && (Node->getValueType(0) == MVT::f64 || - Node->getValueType(0) == MVT::f128)) || - (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) { + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); - Result = DAG.getNode(ISD::FP_EXTEND, dl, - Node->getValueType(0), Load); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); + break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); + } + Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); Tmp1 = LegalizeOp(Result); // Relegalize new nodes. Tmp2 = LegalizeOp(Load.getValue(1)); break; } + // FIXME: This does not work for vectors on most targets. Sign- and + // zero-extend operations are currently folded into extending loads, + // whether they are legal or not, and then we end up here without any + // support for legalizing them. assert(ExtType != ISD::EXTLOAD && "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index b94ea9a..f8c5890 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -234,8 +234,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { // The pair element type may be legal, or may not promote to the same type as // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), - TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), - JoinIntegers(N->getOperand(0), N->getOperand(1))); + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), JoinIntegers(N->getOperand(0), + N->getOperand(1))); } SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { @@ -245,7 +246,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { // Zero extend things like i1, sign extend everything else. It shouldn't // matter in theory which one we pick, but this tends to give better code? unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(*DAG.getContext(), VT), + SDValue Result = DAG.getNode(Opc, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), VT), SDValue(N, 0)); assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?"); return Result; @@ -310,8 +312,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT - // and SINT conversions are Custom, there is no way to tell which is preferable. - // We choose SINT because that's the right thing on PPC.) + // and SINT conversions are Custom, there is no way to tell which is + // preferable. We choose SINT because that's the right thing on PPC.) if (N->getOpcode() == ISD::FP_TO_UINT && !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) @@ -1030,7 +1032,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, Hi = InL; } else if (Amt == 1 && TLI.isOperationLegalOrCustom(ISD::ADDC, - TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { + TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { // Emit this X << 1 as X+X. SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); SDValue LoOps[2] = { InL, InL }; @@ -1926,7 +1928,8 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueType().getSizeInBits(); Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, - DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); } } @@ -2046,7 +2049,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, unsigned ExcessBits = Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); Hi = DAG.getZeroExtendInReg(Hi, dl, - EVT::getIntegerVT(*DAG.getContext(), ExcessBits)); + EVT::getIntegerVT(*DAG.getContext(), + ExcessBits)); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bd86694..d560292 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -75,7 +75,7 @@ private: /// getTypeAction - Return how we should legalize values of this type. LegalizeAction getTypeAction(EVT VT) const { - switch (ValueTypeActions.getTypeAction(*DAG.getContext(), VT)) { + switch (ValueTypeActions.getTypeAction(VT)) { default: assert(false && "Unknown legalize action!"); case TargetLowering::Legal: @@ -86,8 +86,7 @@ private: // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32). if (!VT.isVector()) return PromoteInteger; - else - return WidenVector; + return WidenVector; case TargetLowering::Expand: // Expand can mean // 1) split scalar in half, 2) convert a float to an integer, @@ -95,23 +94,21 @@ private: if (!VT.isVector()) { if (VT.isInteger()) return ExpandInteger; - else if (VT.getSizeInBits() == - TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits()) + if (VT.getSizeInBits() == + TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits()) return SoftenFloat; - else - return ExpandFloat; - } else if (VT.getVectorNumElements() == 1) { - return ScalarizeVector; - } else { - return SplitVector; + return ExpandFloat; } + + if (VT.getVectorNumElements() == 1) + return ScalarizeVector; + return SplitVector; } } /// isTypeLegal - Return true if this type is legal on this target. bool isTypeLegal(EVT VT) const { - return (ValueTypeActions.getTypeAction(*DAG.getContext(), VT) == - TargetLowering::Legal); + return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal; } /// IgnoreNodeResults - Pretend all of this node's results are legal. @@ -584,6 +581,7 @@ private: SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 93aeff5..93bc2d0 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -983,6 +983,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; @@ -1091,8 +1092,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); return SDValue(DAG.UpdateNodeOperands(N, Hi, DAG.getConstant(IdxVal - LoElts, - Idx.getValueType())), - 0); + Idx.getValueType())), 0); } // Store the vector to the stack. @@ -1113,7 +1113,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { assert(N->isUnindexed() && "Indexed store of vector?"); assert(OpNo == 1 && "Can only split the stored value"); - DebugLoc dl = N->getDebugLoc(); + DebugLoc DL = N->getDebugLoc(); bool isTruncating = N->isTruncatingStore(); SDValue Ch = N->getChain(); @@ -1132,25 +1132,49 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) - Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, LoMemVT, isVol, isNT, Alignment); else - Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, isVol, isNT, Alignment); // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); SVOffset += IncrementSize; if (isTruncating) - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, HiMemVT, isVol, isNT, Alignment); else - Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, isVol, isNT, Alignment); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { + DebugLoc DL = N->getDebugLoc(); + + // The input operands all must have the same type, and we know the result the + // result type is valid. Convert this to a buildvector which extracts all the + // input elements. + // TODO: If the input elements are power-two vectors, we could convert this to + // a new CONCAT_VECTORS node with elements that are half-wide. + SmallVector<SDValue, 32> Elts; + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { + SDValue Op = N->getOperand(op); + for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); + i != e; ++i) { + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, + Op, DAG.getIntPtrConstant(i))); + + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), + &Elts[0], Elts.size()); } @@ -1274,8 +1298,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) { - NumElts = NumElts / 2; - VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); } if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { @@ -1283,124 +1307,123 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); - } else if (NumElts == 1) { - // No legal vector version so unroll the vector operation and then widen. + } + + // No legal vector version so unroll the vector operation and then widen. + if (NumElts == 1) return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); - } else { - // Since the operation can trap, apply operation on the original vector. - EVT MaxVT = VT; - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); - - SmallVector<SDValue, 16> ConcatOps(CurNumElts); - unsigned ConcatEnd = 0; // Current ConcatOps index. - int Idx = 0; // Current Idx into input vectors. - - // NumElts := greatest synthesizable vector size (at most WidenVT) - // while (orig. vector has unhandled elements) { - // take munches of size NumElts from the beginning and add to ConcatOps - // NumElts := next smaller supported vector size or 1 - // } - while (CurNumElts != 0) { - while (CurNumElts >= NumElts) { - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getIntPtrConstant(Idx)); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getIntPtrConstant(Idx)); - ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); - Idx += NumElts; - CurNumElts -= NumElts; - } - do { - NumElts = NumElts / 2; - VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); - } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); - - if (NumElts == 1) { - for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp1, DAG.getIntPtrConstant(Idx)); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp2, DAG.getIntPtrConstant(Idx)); - ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, - EOp1, EOp2); - } - CurNumElts = 0; + + // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); + + SmallVector<SDValue, 16> ConcatOps(CurNumElts); + unsigned ConcatEnd = 0; // Current ConcatOps index. + int Idx = 0; // Current Idx into input vectors. + + // NumElts := greatest synthesizable vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } + while (CurNumElts != 0) { + while (CurNumElts >= NumElts) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getIntPtrConstant(Idx)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); + Idx += NumElts; + CurNumElts -= NumElts; + } + do { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); + + if (NumElts == 1) { + for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp1, DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp2, DAG.getIntPtrConstant(Idx)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, + EOp1, EOp2); } + CurNumElts = 0; } + } - // Check to see if we have a single operation with the widen type. - if (ConcatEnd == 1) { - VT = ConcatOps[0].getValueType(); - if (VT == WidenVT) - return ConcatOps[0]; - } + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } - // while (Some element of ConcatOps is not of type MaxVT) { - // From the end of ConcatOps, collect elements of the same type and put - // them into an op of the next larger supported type - // } - while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { - Idx = ConcatEnd - 1; - VT = ConcatOps[Idx--].getValueType(); - while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) - Idx--; - - int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; - EVT NextVT; - do { - NextSize *= 2; - NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); - } while (!TLI.isTypeSynthesizable(NextVT)); - - if (!VT.isVector()) { - // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT - SDValue VecOp = DAG.getUNDEF(NextVT); - unsigned NumToInsert = ConcatEnd - Idx - 1; - for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, - ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); - } - ConcatOps[Idx+1] = VecOp; - ConcatEnd = Idx + 2; - } - else { - // Vector type, create a CONCAT_VECTORS of type NextVT - SDValue undefVec = DAG.getUNDEF(VT); - unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); - SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); - unsigned RealVals = ConcatEnd - Idx - 1; - unsigned SubConcatEnd = 0; - unsigned SubConcatIdx = Idx + 1; - while (SubConcatEnd < RealVals) - SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; - while (SubConcatEnd < OpsToConcat) - SubConcatOps[SubConcatEnd++] = undefVec; - ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, - NextVT, &SubConcatOps[0], - OpsToConcat); - ConcatEnd = SubConcatIdx + 1; + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + Idx = ConcatEnd - 1; + VT = ConcatOps[Idx--].getValueType(); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeSynthesizable(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, + ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); } + ConcatOps[Idx+1] = VecOp; + ConcatEnd = Idx + 2; + } else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, &SubConcatOps[0], + OpsToConcat); + ConcatEnd = SubConcatIdx + 1; } + } - // Check to see if we have a single operation with the widen type. - if (ConcatEnd == 1) { - VT = ConcatOps[0].getValueType(); - if (VT == WidenVT) - return ConcatOps[0]; - } - - // add undefs of size MaxVT until ConcatOps grows to length of WidenVT - unsigned NumOps = - WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); - if (NumOps != ConcatEnd ) { - SDValue UndefVal = DAG.getUNDEF(MaxVT); - for (unsigned j = ConcatEnd; j < NumOps; ++j) - ConcatOps[j] = UndefVal; - } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps); + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; } + + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); + if (NumOps != ConcatEnd ) { + SDValue UndefVal = DAG.getUNDEF(MaxVT); + for (unsigned j = ConcatEnd; j < NumOps; ++j) + ConcatOps[j] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps); } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { @@ -1561,8 +1584,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { unsigned NewNumElts = WidenSize / InSize; if (InVT.isVector()) { EVT InEltVT = InVT.getVectorElementType(); - NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, - WidenSize / InEltVT.getSizeInBits()); + NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, + WidenSize / InEltVT.getSizeInBits()); } else { NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } @@ -1686,8 +1709,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SDValue RndOp = N->getOperand(3); SDValue SatOp = N->getOperand(4); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), - N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); EVT InVT = InOp.getValueType(); @@ -1720,9 +1742,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SmallVector<SDValue, 16> Ops(NumConcat); Ops[0] = InOp; SDValue UndefVal = DAG.getUNDEF(InVT); - for (unsigned i = 1; i != NumConcat; ++i) { + for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - } + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); @@ -2225,25 +2247,24 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // Check if we can load the element with one instruction if (LdWidth <= NewVTWidth) { - if (NewVT.isVector()) { - if (NewVT != WidenVT) { - assert(WidenWidth % NewVTWidth == 0); - unsigned NumConcat = WidenWidth / NewVTWidth; - SmallVector<SDValue, 16> ConcatOps(NumConcat); - SDValue UndefVal = DAG.getUNDEF(NewVT); - ConcatOps[0] = LdOp; - for (unsigned i = 1; i != NumConcat; ++i) - ConcatOps[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], - NumConcat); - } else - return LdOp; - } else { + if (!NewVT.isVector()) { unsigned NumElts = WidenWidth / NewVTWidth; EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp); } + if (NewVT == WidenVT) + return LdOp; + + assert(WidenWidth % NewVTWidth == 0); + unsigned NumConcat = WidenWidth / NewVTWidth; + SmallVector<SDValue, 16> ConcatOps(NumConcat); + SDValue UndefVal = DAG.getUNDEF(NewVT); + ConcatOps[0] = LdOp; + for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], + NumConcat); } // Load vector by using multiple loads from largest vector to scalar @@ -2276,52 +2297,55 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // Build the vector from the loads operations unsigned End = LdOps.size(); - if (LdOps[0].getValueType().isVector()) { - // If the load contains vectors, build the vector using concat vector. - // All of the vectors used to loads are power of 2 and the scalars load - // can be combined to make a power of 2 vector. - SmallVector<SDValue, 16> ConcatOps(End); - int i = End - 1; - int Idx = End; - EVT LdTy = LdOps[i].getValueType(); - // First combine the scalar loads to a vector - if (!LdTy.isVector()) { - for (--i; i >= 0; --i) { - LdTy = LdOps[i].getValueType(); - if (LdTy.isVector()) - break; - } - ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); - } - ConcatOps[--Idx] = LdOps[i]; + if (!LdOps[0].getValueType().isVector()) + // All the loads are scalar loads. + return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); + + // If the load contains vectors, build the vector using concat vector. + // All of the vectors used to loads are power of 2 and the scalars load + // can be combined to make a power of 2 vector. + SmallVector<SDValue, 16> ConcatOps(End); + int i = End - 1; + int Idx = End; + EVT LdTy = LdOps[i].getValueType(); + // First combine the scalar loads to a vector + if (!LdTy.isVector()) { for (--i; i >= 0; --i) { - EVT NewLdTy = LdOps[i].getValueType(); - if (NewLdTy != LdTy) { - // Create a larger vector - ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, - &ConcatOps[Idx], End - Idx); - Idx = End - 1; - LdTy = NewLdTy; - } - ConcatOps[--Idx] = LdOps[i]; + LdTy = LdOps[i].getValueType(); + if (LdTy.isVector()) + break; } + ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); + } + ConcatOps[--Idx] = LdOps[i]; + for (--i; i >= 0; --i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + // Create a larger vector + ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, + &ConcatOps[Idx], End - Idx); + Idx = End - 1; + LdTy = NewLdTy; + } + ConcatOps[--Idx] = LdOps[i]; + } - if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) { - // We need to fill the rest with undefs to build the vector - unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); - SmallVector<SDValue, 16> WidenOps(NumOps); - SDValue UndefVal = DAG.getUNDEF(LdTy); - unsigned i = 0; - for (; i != End-Idx; ++i) - WidenOps[i] = ConcatOps[Idx+i]; - for (; i != NumOps; ++i) - WidenOps[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps); - } else - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - &ConcatOps[Idx], End - Idx); - } else // All the loads are scalar loads. - return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); + if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + &ConcatOps[Idx], End - Idx); + + // We need to fill the rest with undefs to build the vector + unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); + SmallVector<SDValue, 16> WidenOps(NumOps); + SDValue UndefVal = DAG.getUNDEF(LdTy); + { + unsigned i = 0; + for (; i != End-Idx; ++i) + WidenOps[i] = ConcatOps[Idx+i]; + for (; i != NumOps; ++i) + WidenOps[i] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps); } SDValue diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 3b86c32..fae2729 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" +#include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -432,6 +433,30 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, return N->getValueType(NumRes); } +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs, + const TargetRegisterInfo *TRI) { + bool Added = false; + if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { + if (RegAdded.insert(Reg)) { + LRegs.push_back(Reg); + Added = true; + } + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { + if (RegAdded.insert(*Alias)) { + LRegs.push_back(*Alias); + Added = true; + } + } + return Added; +} + /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay /// scheduling of the given node to satisfy live physical register dependencies. /// If the specific node is the last one that's available to schedule, do @@ -446,37 +471,44 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isAssignedRegDep()) { - unsigned Reg = I->getReg(); - if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) { - if (RegAdded.insert(Reg)) - LRegs.push_back(Reg); - } - for (const unsigned *Alias = TRI->getAliasSet(Reg); - *Alias; ++Alias) - if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) { - if (RegAdded.insert(*Alias)) - LRegs.push_back(*Alias); - } + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + RegAdded, LRegs, TRI); } } for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + if (Node->getOpcode() == ISD::INLINEASM) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + ++i; // Skip the ID value. + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags)) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } if (!Node->isMachineOpcode()) continue; const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); if (!TID.ImplicitDefs) continue; for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { - if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) { - if (RegAdded.insert(*Reg)) - LRegs.push_back(*Reg); - } - for (const unsigned *Alias = TRI->getAliasSet(*Reg); - *Alias; ++Alias) - if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { - if (RegAdded.insert(*Alias)) - LRegs.push_back(*Alias); - } + CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } return !LRegs.empty(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 3ef521c..4c3e4e3 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -24,6 +24,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -54,10 +55,16 @@ static RegisterScheduler static RegisterScheduler hybridListDAGScheduler("list-hybrid", - "Bottom-up rr list scheduling which avoid stalls for " - "long latency instructions", + "Bottom-up register pressure aware list scheduling " + "which tries to balance latency and register pressure", createHybridListDAGScheduler); +static RegisterScheduler + ILPListDAGScheduler("list-ilp", + "Bottom-up register pressure aware list scheduling " + "which tries to balance ILP and register pressure", + createILPListDAGScheduler); + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -181,7 +188,9 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { - DEBUG(dbgs() << "********** List Scheduling **********\n"); + DEBUG(dbgs() + << "********** List Scheduling BB#" << BB->getNumber() + << " **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); @@ -273,6 +282,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { SU->setHeightToAtLeast(CurCycle); Sequence.push_back(SU); + AvailableQueue->ScheduledNode(SU); + ReleasePredecessors(SU, CurCycle); // Release all the implicit physical register defs that are live. @@ -291,7 +302,6 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { } SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); } /// CapturePred - This does the opposite of ReleasePred. Since SU is being @@ -315,8 +325,6 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); DEBUG(SU->dump(this)); - AvailableQueue->UnscheduledNode(SU); - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { CapturePred(&*I); @@ -346,6 +354,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { SU->isScheduled = false; SU->isAvailable = true; AvailableQueue->push(SU); + AvailableQueue->UnscheduledNode(SU); } /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in @@ -956,7 +965,8 @@ namespace { template<class SF> class RegReductionPriorityQueue; - /// Sorting functions for the Available queue. + /// bu_ls_rr_sort - Priority function for bottom up register pressure + // reduction scheduler. struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} @@ -965,6 +975,8 @@ namespace { bool operator()(const SUnit* left, const SUnit* right) const; }; + // td_ls_rr_sort - Priority function for top down register pressure reduction + // scheduler. struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} @@ -973,6 +985,7 @@ namespace { bool operator()(const SUnit* left, const SUnit* right) const; }; + // src_ls_rr_sort - Priority function for source order scheduler. struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) @@ -983,13 +996,26 @@ namespace { bool operator()(const SUnit* left, const SUnit* right) const; }; + // hybrid_ls_rr_sort - Priority function for hybrid scheduler. struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) : SPQ(spq) {} hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) + // scheduler. + struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ; + ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq) + : SPQ(spq) {} + ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + bool operator()(const SUnit* left, const SUnit* right) const; }; } // end anonymous namespace @@ -1029,23 +1055,48 @@ namespace { std::vector<SUnit*> Queue; SF Picker; unsigned CurQueueId; + bool TracksRegPressure; protected: // SUnits - The SUnits for the current graph. std::vector<SUnit> *SUnits; - + + MachineFunction &MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const TargetLowering *TLI; ScheduleDAGRRList *scheduleDAG; // SethiUllmanNumbers - The SethiUllman number for each node. std::vector<unsigned> SethiUllmanNumbers; + /// RegPressure - Tracking current reg pressure per register class. + /// + std::vector<unsigned> RegPressure; + + /// RegLimit - Tracking the number of allocatable registers per register + /// class. + std::vector<unsigned> RegLimit; + public: - RegReductionPriorityQueue(const TargetInstrInfo *tii, - const TargetRegisterInfo *tri) - : Picker(this), CurQueueId(0), - TII(tii), TRI(tri), scheduleDAG(NULL) {} + RegReductionPriorityQueue(MachineFunction &mf, + bool tracksrp, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp), + MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { + if (TracksRegPressure) { + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); + } + } void initNodes(std::vector<SUnit> &sunits) { SUnits = &sunits; @@ -1072,6 +1123,7 @@ namespace { void releaseState() { SUnits = 0; SethiUllmanNumbers.clear(); + std::fill(RegPressure.begin(), RegPressure.end(), 0); } unsigned getNodePriority(const SUnit *SU) const { @@ -1139,10 +1191,244 @@ namespace { SU->NodeQueueId = 0; } + bool HighRegPressure(const SUnit *SU) const { + if (!TLI) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + // Check if this increases register pressure of the specific register + // class to the point where it would cause spills. + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + // Check if this increases register pressure of the specific register + // class to the point where it would cause spills. + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = PN->getValueType(i); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + return true; // Reg pressure already high. + unsigned Cost = TLI->getRepRegClassCostFor(VT); + if (!PN->hasAnyUseOfValue(i)) + continue; + // Check if this increases register pressure of the specific register + // class to the point where it would cause spills. + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + } + } + + return false; + } + + void ScheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + const SDNode *N = SU->getNode(); + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + if (PredSU->NumSuccsLeft != PredSU->NumSuccs) + continue; + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = PN->getValueType(i); + if (!PN->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + } + + // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() + // may transfer data dependencies to CopyToReg. + if (SU->NumSuccs && N->isMachineOpcode()) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = N->getValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + + dumpRegPressure(); + } + + void UnscheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + const SDNode *N = SU->getNode(); + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + if (PredSU->NumSuccsLeft != PredSU->NumSuccs) + continue; + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = PN->getValueType(i); + if (!PN->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + + // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() + // may transfer data dependencies to CopyToReg. + if (SU->NumSuccs && N->isMachineOpcode()) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Flag || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + } + + dumpRegPressure(); + } + void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { scheduleDAG = scheduleDag; } + void dumpRegPressure() const { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + unsigned Id = RC->getID(); + unsigned RP = RegPressure[Id]; + if (!RP) continue; + DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] + << '\n'); + } + } + protected: bool canClobber(const SUnit *SU, const SUnit *Op); void AddPseudoTwoAddrDeps(); @@ -1161,6 +1447,9 @@ namespace { typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> HybridBURRPriorityQueue; + + typedef RegReductionPriorityQueue<ilp_ls_rr_sort> + ILPBURRPriorityQueue; } /// closestSucc - Returns the scheduled cycle of the successor which is @@ -1260,30 +1549,63 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { } bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ - bool LStall = left->SchedulingPref == Sched::Latency && - SPQ->getCurCycle() < left->getHeight(); - bool RStall = right->SchedulingPref == Sched::Latency && - SPQ->getCurCycle() < right->getHeight(); - // If scheduling one of the node will cause a pipeline stall, delay it. - // If scheduling either one of the node will cause a pipeline stall, sort them - // according to their height. - // If neither will cause a pipeline stall, try to reduce register pressure. - if (LStall) { - if (!RStall) - return true; - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - } else if (RStall) + bool LHigh = SPQ->HighRegPressure(left); + bool RHigh = SPQ->HighRegPressure(right); + // Avoid causing spills. If register pressure is high, schedule for + // register pressure reduction. + if (LHigh && !RHigh) + return true; + else if (!LHigh && RHigh) + return false; + else if (!LHigh && !RHigh) { + // Low register pressure situation, schedule for latency if possible. + bool LStall = left->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < left->getHeight(); + bool RStall = right->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < right->getHeight(); + // If scheduling one of the node will cause a pipeline stall, delay it. + // If scheduling either one of the node will cause a pipeline stall, sort + // them according to their height. + // If neither will cause a pipeline stall, try to reduce register pressure. + if (LStall) { + if (!RStall) + return true; + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + } else if (RStall) return false; - // If either node is scheduling for latency, sort them by height and latency - // first. - if (left->SchedulingPref == Sched::Latency || - right->SchedulingPref == Sched::Latency) { - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - if (left->Latency != right->Latency) - return left->Latency > right->Latency; + // If either node is scheduling for latency, sort them by height and latency + // first. + if (left->SchedulingPref == Sched::Latency || + right->SchedulingPref == Sched::Latency) { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + if (left->Latency != right->Latency) + return left->Latency > right->Latency; + } + } + + return BURRSort(left, right, SPQ); +} + +bool ilp_ls_rr_sort::operator()(const SUnit *left, + const SUnit *right) const { + bool LHigh = SPQ->HighRegPressure(left); + bool RHigh = SPQ->HighRegPressure(right); + // Avoid causing spills. If register pressure is high, schedule for + // register pressure reduction. + if (LHigh && !RHigh) + return true; + else if (!LHigh && RHigh) + return false; + else if (!LHigh && !RHigh) { + // Low register pressure situation, schedule to maximize instruction level + // parallelism. + if (left->NumPreds > right->NumPreds) + return false; + else if (left->NumPreds < right->NumPreds) + return false; } return BURRSort(left, right, SPQ); @@ -1635,8 +1957,8 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); - + BURegReductionPriorityQueue *PQ = + new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); PQ->setScheduleDAG(SD); return SD; @@ -1648,8 +1970,8 @@ llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); - + TDRegReductionPriorityQueue *PQ = + new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); PQ->setScheduleDAG(SD); return SD; @@ -1661,8 +1983,8 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI); - + SrcRegReductionPriorityQueue *PQ = + new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); PQ->setScheduleDAG(SD); return SD; @@ -1673,9 +1995,24 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetLowering *TLI = &IS->getTargetLowering(); - HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI); + HybridBURRPriorityQueue *PQ = + new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + PQ->setScheduleDAG(SD); + return SD; +} +llvm::ScheduleDAGSDNodes * +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetLowering *TLI = &IS->getTargetLowering(); + + ILPBURRPriorityQueue *PQ = + new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); PQ->setScheduleDAG(SD); return SD; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 06cf053..f1bf82a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -59,8 +59,9 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { SUnits.back().OrigNode = &SUnits.back(); SUnit *SU = &SUnits.back(); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - if (N->isMachineOpcode() && - N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) + if (!N || + (N->isMachineOpcode() && + N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)) SU->SchedulingPref = Sched::None; else SU->SchedulingPref = TLI.getSchedulingPreference(N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e83a034..ad06ebd 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2236,7 +2236,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { // If we're told that NaNs won't happen, assume they won't. - if (FiniteOnlyFPMath()) + if (NoNaNsFPMath) return true; // If the value is a constant, we can obviously see if it is a NaN or not. @@ -2281,35 +2281,6 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { } -/// getShuffleScalarElt - Returns the scalar element that will make up the ith -/// element of the result of the vector shuffle. -SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, - unsigned i) { - EVT VT = N->getValueType(0); - if (N->getMaskElt(i) < 0) - return getUNDEF(VT.getVectorElementType()); - unsigned Index = N->getMaskElt(i); - unsigned NumElems = VT.getVectorNumElements(); - SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1); - Index %= NumElems; - - if (V.getOpcode() == ISD::BIT_CONVERT) { - V = V.getOperand(0); - EVT VVT = V.getValueType(); - if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems) - return SDValue(); - } - if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) - return (Index == 0) ? V.getOperand(0) - : getUNDEF(VT.getVectorElementType()); - if (V.getOpcode() == ISD::BUILD_VECTOR) - return V.getOperand(Index); - if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V)) - return getShuffleScalarElt(SVN, Index); - return SDValue(); -} - - /// getNode - Gets or creates the specified node. /// SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { @@ -2624,7 +2595,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // one big BUILD_VECTOR. if (N1.getOpcode() == ISD::BUILD_VECTOR && N2.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), + N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); } @@ -3021,7 +2993,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N1.getOpcode() == ISD::BUILD_VECTOR && N2.getOpcode() == ISD::BUILD_VECTOR && N3.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), + N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); @@ -5872,6 +5845,7 @@ std::string ISD::ArgFlagsTy::getArgFlagsString() { void SDNode::dump() const { dump(0); } void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); + dbgs() << '\n'; } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { @@ -5895,7 +5869,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), e = MN->memoperands_end(); i != e; ++i) { OS << **i; - if (next(i) != e) + if (llvm::next(i) != e) OS << " "; } OS << ">"; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 458e865..e657445 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -70,22 +70,29 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, unsigned NumParts, + EVT PartVT, EVT ValueVT); + /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, +static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, EVT PartVT, EVT ValueVT, ISD::NodeType AssertOp = ISD::DELETED_NODE) { + if (ValueVT.isVector()) + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); + assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; if (NumParts > 1) { // Assemble the value from multiple parts. - if (!ValueVT.isVector() && ValueVT.isInteger()) { + if (ValueVT.isInteger()) { unsigned PartBits = PartVT.getSizeInBits(); unsigned ValueBits = ValueVT.getSizeInBits(); @@ -100,25 +107,25 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); if (RoundParts > 2) { - Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2, + Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, PartVT, HalfVT); - Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2, + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, RoundParts / 2, PartVT, HalfVT); } else { - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]); + Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]); } if (TLI.isBigEndian()) std::swap(Lo, Hi); - Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); if (RoundParts < NumParts) { // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); - Hi = getCopyFromParts(DAG, dl, + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT, OddVT); // Combine the round and odd parts. @@ -126,68 +133,29 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, if (TLI.isBigEndian()) std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi); - Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi, + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); + Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, DAG.getConstant(Lo.getValueType().getSizeInBits(), TLI.getPointerTy())); - Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo); - Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi); - } - } else if (ValueVT.isVector()) { - // Handle a multi-element vector. - EVT IntermediateVT, RegisterVT; - unsigned NumIntermediates; - unsigned NumRegs = - TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, - NumIntermediates, RegisterVT); - assert(NumRegs == NumParts - && "Part count doesn't match vector breakdown!"); - NumParts = NumRegs; // Silence a compiler warning. - assert(RegisterVT == PartVT - && "Part type doesn't match vector breakdown!"); - assert(RegisterVT == Parts[0].getValueType() && - "Part type doesn't match part!"); - - // Assemble the parts into intermediate operands. - SmallVector<SDValue, 8> Ops(NumIntermediates); - if (NumIntermediates == NumParts) { - // If the register was not expanded, truncate or copy the value, - // as appropriate. - for (unsigned i = 0; i != NumParts; ++i) - Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1, - PartVT, IntermediateVT); - } else if (NumParts > 0) { - // If the intermediate type was expanded, build the intermediate - // operands from the parts. - assert(NumParts % NumIntermediates == 0 && - "Must expand into a divisible number of parts!"); - unsigned Factor = NumParts / NumIntermediates; - for (unsigned i = 0; i != NumIntermediates; ++i) - Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor, - PartVT, IntermediateVT); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); + Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); } - - // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the - // intermediate operands. - Val = DAG.getNode(IntermediateVT.isVector() ? - ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl, - ValueVT, &Ops[0], NumIntermediates); } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && "Unexpected split"); SDValue Lo, Hi; - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]); + Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { // FP split into integer parts (soft fp) assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); - Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT); } } @@ -197,219 +165,315 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, if (PartVT == ValueVT) return Val; - if (PartVT.isVector()) { - assert(ValueVT.isVector() && "Unknown vector conversion!"); - return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); - } - - if (ValueVT.isVector()) { - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && - "Only trivial scalar-to-vector conversions should get here!"); - return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val); - } - - if (PartVT.isInteger() && - ValueVT.isInteger()) { + if (PartVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. if (AssertOp != ISD::DELETED_NODE) - Val = DAG.getNode(AssertOp, dl, PartVT, Val, + Val = DAG.getNode(AssertOp, DL, PartVT, Val, DAG.getValueType(ValueVT)); - return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); - } else { - return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val); + return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } + return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); } if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { - if (ValueVT.bitsLT(Val.getValueType())) { - // FP_ROUND's are always exact here. - return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val, + // FP_ROUND's are always exact here. + if (ValueVT.bitsLT(Val.getValueType())) + return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, DAG.getIntPtrConstant(1)); - } - return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val); + return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) - return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); + return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); return SDValue(); } +/// getCopyFromParts - Create a value that contains the specified legal parts +/// combined into the value they represent. If the parts combine to a type +/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT +/// (ISD::AssertSext). +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, unsigned NumParts, + EVT PartVT, EVT ValueVT) { + assert(ValueVT.isVector() && "Not a vector value"); + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + // Handle a multi-element vector. + if (NumParts > 1) { + EVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + assert(RegisterVT == Parts[0].getValueType() && + "Part type doesn't match part!"); + + // Assemble the parts into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + if (NumIntermediates == NumParts) { + // If the register was not expanded, truncate or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, + PartVT, IntermediateVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, build the intermediate + // operands from the parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, + PartVT, IntermediateVT); + } + + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the + // intermediate operands. + Val = DAG.getNode(IntermediateVT.isVector() ? + ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, + ValueVT, &Ops[0], NumIntermediates); + } + + // There is now one part, held in Val. Correct it to match ValueVT. + PartVT = Val.getValueType(); + + if (PartVT == ValueVT) + return Val; + + if (PartVT.isVector()) { + // If the element type of the source/dest vectors are the same, but the + // parts vector has more elements than the value vector, then we have a + // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the + // elements we want. + if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) { + assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + "Cannot narrow, it would be a lossy transformation"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getIntPtrConstant(0)); + } + + // Vector/Vector bitcast. + return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); + } + + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial scalar-to-vector conversions should get here!"); + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); +} + + + + +static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, + SDValue Val, SDValue *Parts, unsigned NumParts, + EVT PartVT); + /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, +static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, EVT PartVT, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PtrVT = TLI.getPointerTy(); EVT ValueVT = Val.getValueType(); + + // Handle the vector case separately. + if (ValueVT.isVector()) + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); - if (!NumParts) + if (NumParts == 0) return; - if (!ValueVT.isVector()) { - if (PartVT == ValueVT) { - assert(NumParts == 1 && "No-op copy with multiple parts!"); - Parts[0] = Val; - return; - } - - if (NumParts * PartBits > ValueVT.getSizeInBits()) { - // If the parts cover more bits than the value has, promote the value. - if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { - assert(NumParts == 1 && "Do not know what to promote to!"); - Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val); - } else if (PartVT.isInteger() && ValueVT.isInteger()) { - ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Val = DAG.getNode(ExtendKind, dl, ValueVT, Val); - } else { - llvm_unreachable("Unknown mismatch!"); - } - } else if (PartBits == ValueVT.getSizeInBits()) { - // Different types of the same size. - assert(NumParts == 1 && PartVT != ValueVT); - Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); - } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { - // If the parts cover less bits than value has, truncate the value. - if (PartVT.isInteger() && ValueVT.isInteger()) { - ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); - } else { - llvm_unreachable("Unknown mismatch!"); - } - } - - // The value may have changed - recompute ValueVT. - ValueVT = Val.getValueType(); - assert(NumParts * PartBits == ValueVT.getSizeInBits() && - "Failed to tile the value with PartVT!"); - - if (NumParts == 1) { - assert(PartVT == ValueVT && "Type conversion failed!"); - Parts[0] = Val; - return; - } + assert(!ValueVT.isVector() && "Vector case handled elsewhere"); + if (PartVT == ValueVT) { + assert(NumParts == 1 && "No-op copy with multiple parts!"); + Parts[0] = Val; + return; + } - // Expand the value into multiple parts. - if (NumParts & (NumParts - 1)) { - // The number of parts is not a power of 2. Split off and copy the tail. + if (NumParts * PartBits > ValueVT.getSizeInBits()) { + // If the parts cover more bits than the value has, promote the value. + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + assert(NumParts == 1 && "Do not know what to promote to!"); + Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); + } else { assert(PartVT.isInteger() && ValueVT.isInteger() && - "Do not know what to expand to!"); - unsigned RoundParts = 1 << Log2_32(NumParts); - unsigned RoundBits = RoundParts * PartBits; - unsigned OddParts = NumParts - RoundParts; - SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val, - DAG.getConstant(RoundBits, - TLI.getPointerTy())); - getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, - OddParts, PartVT); - - if (TLI.isBigEndian()) - // The odd parts were reversed by getCopyToParts - unreverse them. - std::reverse(Parts + RoundParts, Parts + NumParts); - - NumParts = RoundParts; + "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); } + } else if (PartBits == ValueVT.getSizeInBits()) { + // Different types of the same size. + assert(NumParts == 1 && PartVT != ValueVT); + Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { + // If the parts cover less bits than value has, truncate the value. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Unknown mismatch!"); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + // The value may have changed - recompute ValueVT. + ValueVT = Val.getValueType(); + assert(NumParts * PartBits == ValueVT.getSizeInBits() && + "Failed to tile the value with PartVT!"); - // The number of parts is a power of 2. Repeatedly bisect the value using - // EXTRACT_ELEMENT. - Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl, - EVT::getIntegerVT(*DAG.getContext(), - ValueVT.getSizeInBits()), - Val); - - for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { - for (unsigned i = 0; i < NumParts; i += StepSize) { - unsigned ThisBits = StepSize * PartBits / 2; - EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); - SDValue &Part0 = Parts[i]; - SDValue &Part1 = Parts[i+StepSize/2]; - - Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, - ThisVT, Part0, - DAG.getConstant(1, PtrVT)); - Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, - ThisVT, Part0, - DAG.getConstant(0, PtrVT)); - - if (ThisBits == PartBits && ThisVT != PartVT) { - Part0 = DAG.getNode(ISD::BIT_CONVERT, dl, - PartVT, Part0); - Part1 = DAG.getNode(ISD::BIT_CONVERT, dl, - PartVT, Part1); - } + if (NumParts == 1) { + assert(PartVT == ValueVT && "Type conversion failed!"); + Parts[0] = Val; + return; + } + + // Expand the value into multiple parts. + if (NumParts & (NumParts - 1)) { + // The number of parts is not a power of 2. Split off and copy the tail. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Do not know what to expand to!"); + unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundBits = RoundParts * PartBits; + unsigned OddParts = NumParts - RoundParts; + SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, + DAG.getIntPtrConstant(RoundBits)); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT); + + if (TLI.isBigEndian()) + // The odd parts were reversed by getCopyToParts - unreverse them. + std::reverse(Parts + RoundParts, Parts + NumParts); + + NumParts = RoundParts; + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + // The number of parts is a power of 2. Repeatedly bisect the value using + // EXTRACT_ELEMENT. + Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL, + EVT::getIntegerVT(*DAG.getContext(), + ValueVT.getSizeInBits()), + Val); + + for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { + for (unsigned i = 0; i < NumParts; i += StepSize) { + unsigned ThisBits = StepSize * PartBits / 2; + EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); + SDValue &Part0 = Parts[i]; + SDValue &Part1 = Parts[i+StepSize/2]; + + Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(1)); + Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(0)); + + if (ThisBits == PartBits && ThisVT != PartVT) { + Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0); + Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1); } } + } - if (TLI.isBigEndian()) - std::reverse(Parts, Parts + OrigNumParts); + if (TLI.isBigEndian()) + std::reverse(Parts, Parts + OrigNumParts); +} - return; - } - // Vector ValueVT. +/// getCopyToPartsVector - Create a series of nodes that contain the specified +/// value split into legal parts. +static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, + SDValue Val, SDValue *Parts, unsigned NumParts, + EVT PartVT) { + EVT ValueVT = Val.getValueType(); + assert(ValueVT.isVector() && "Not a vector"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (NumParts == 1) { - if (PartVT != ValueVT) { - if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { - Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); - } else { - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && - "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - PartVT, Val, - DAG.getConstant(0, PtrVT)); - } - } + if (PartVT == ValueVT) { + // Nothing to do. + } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { + // Bitconvert vector->vector case. + Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + } else if (PartVT.isVector() && + PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { + EVT ElementVT = PartVT.getVectorElementType(); + // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in + // undef elements. + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + ElementVT, Val, DAG.getIntPtrConstant(i))); + + for (unsigned i = ValueVT.getVectorNumElements(), + e = PartVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getUNDEF(ElementVT)); + + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); + // FIXME: Use CONCAT for 2x -> 4x. + + //SDValue UndefElts = DAG.getUNDEF(VectorTy); + //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); + } else { + // Vector -> scalar conversion. + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial vector-to-scalar conversions should get here!"); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + PartVT, Val, DAG.getIntPtrConstant(0)); + } + Parts[0] = Val; return; } - + // Handle a multi-element vector. EVT IntermediateVT, RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, - IntermediateVT, NumIntermediates, RegisterVT); + IntermediateVT, + NumIntermediates, RegisterVT); unsigned NumElements = ValueVT.getVectorNumElements(); - + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); - + // Split the vector into intermediate operands. SmallVector<SDValue, 8> Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { if (IntermediateVT.isVector()) - Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getConstant(i * (NumElements / NumIntermediates), - PtrVT)); + DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); else - Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - IntermediateVT, Val, - DAG.getConstant(i, PtrVT)); + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + IntermediateVT, Val, DAG.getIntPtrConstant(i)); } - + // Split the intermediate operands into legal parts. if (NumParts == NumIntermediates) { // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) - getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. @@ -417,10 +481,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) - getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT); } } + + + namespace { /// RegsForValue - This struct represents the registers (physical or virtual) /// that a particular set of values is assigned, and the type information @@ -460,11 +527,6 @@ namespace { EVT regvt, EVT valuevt) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - RegsForValue(const SmallVector<unsigned, 4> ®s, - const SmallVector<EVT, 4> ®vts, - const SmallVector<EVT, 4> &valuevts) - : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} - RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg, const Type *Ty) { ComputeValueVTs(tli, Ty, ValueVTs); @@ -530,6 +592,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, SDValue &Chain, SDValue *Flag) const { + // A Value with type {} or [0 x %t] needs no registers. + if (ValueVTs.empty()) + return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Assemble the legal parts into the final values. @@ -623,8 +689,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); EVT RegisterVT = RegVTs[Value]; - getCopyToParts(DAG, dl, - Val.getValue(Val.getResNo() + Value), + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT); Part += NumParts; } @@ -701,6 +766,7 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); + DanglingDebugInfoMap.clear(); CurDebugLoc = DebugLoc(); HasTailCall = false; } @@ -805,6 +871,33 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, +// generate the debug data structures now that we've seen its definition. +void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, + SDValue Val) { + DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; + if (DDI.getDI()) { + const DbgValueInst *DI = DDI.getDI(); + DebugLoc dl = DDI.getdl(); + unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); + MDNode *Variable = DI->getVariable(); + uint64_t Offset = DI->getOffset(); + SDDbgValue *SDV; + if (Val.getNode()) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) { + SDV = DAG.getDbgValue(Variable, Val.getNode(), + Val.getResNo(), Offset, dl, DbgSDNodeOrder); + DAG.AddDbgValue(SDV, Val.getNode(), false); + } + } else { + SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), + Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } + DanglingDebugInfoMap[V] = DanglingDebugInfo(); + } +} + // getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { // If we already have an SDValue for this value, use it. It's important @@ -826,6 +919,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); return Val; } @@ -839,10 +933,11 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); return Val; } -/// getValueImpl - Helper function for getValue and getMaterializedValue. +/// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const Constant *C = dyn_cast<Constant>(V)) { @@ -986,10 +1081,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); - EVT PtrVT = PtrValueVTs[0]; for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr, - DAG.getConstant(Offsets[i], PtrVT)); + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), + RetPtr.getValueType(), RetPtr, + DAG.getIntPtrConstant(Offsets[i])); Chains[i] = DAG.getStore(Chain, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), @@ -2709,11 +2804,6 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } Ty = StTy->getElementType(Field); - } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) { - unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); - - // Offset canonically 0 for unions, but type changes - Ty = UnTy->getElementType(Field); } else { Ty = cast<SequentialType>(Ty)->getElementType(); @@ -2818,7 +2908,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Inform the Frame Information that we have just allocated a variable-sized // object. - FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(); + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { @@ -3824,11 +3914,11 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. bool -SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, - const Value *V, MDNode *Variable, - uint64_t Offset, +SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + int64_t Offset, const SDValue &N) { - if (!isa<Argument>(V)) + const Argument *Arg = dyn_cast<Argument>(V); + if (!Arg) return false; MachineFunction &MF = DAG.getMachineFunction(); @@ -3842,7 +3932,15 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, return false; unsigned Reg = 0; - if (N.getOpcode() == ISD::CopyFromReg) { + if (Arg->hasByValAttr()) { + // Byval arguments' frame index is recorded during argument lowering. + // Use this info directly. + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + Reg = TRI->getFrameRegister(MF); + Offset = FuncInfo.getByValArgumentFrameIndex(Arg); + } + + if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) { Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); @@ -3966,42 +4064,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); - if (!DIVariable(DI.getVariable()).Verify()) - return 0; - MDNode *Variable = DI.getVariable(); - // Parameters are handled specially. - bool isParameter = - DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; const Value *Address = DI.getAddress(); - if (!Address) + if (!Address || !DIVariable(DI.getVariable()).Verify()) return 0; - if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) - Address = BCI->getOperand(0); - const AllocaInst *AI = dyn_cast<AllocaInst>(Address); - if (AI) { - // Don't handle byval arguments or VLAs, for example. - // Non-byval arguments are handled here (they refer to the stack temporary - // alloca at this point). - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI == FuncInfo.StaticAllocaMap.end()) - return 0; // VLAs. - int FI = SI->second; - - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) - MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); - } // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder // but do not always have a corresponding SDNode built. The SDNodeOrder // absolute, but not relative, values are different depending on whether // debug info exists. ++SDNodeOrder; + + // Check if address has undef value. + if (isa<UndefValue>(Address) || + (Address->use_empty() && !isa<Argument>(Address))) { + SDDbgValue*SDV = + DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + return 0; + } + SDValue &N = NodeMap[Address]; + if (!N.getNode() && isa<Argument>(Address)) + // Check unused arguments map. + N = UnusedArgNodeMap[Address]; SDDbgValue *SDV; if (N.getNode()) { + // Parameters are handled specially. + bool isParameter = + DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); + if (isParameter && !AI) { FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (FINode) @@ -4020,10 +4116,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { - // This isn't useful, but it shows what we're missing. - SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, isParameter); + // If Address is an arugment then try to emits its dbg value using + // virtual register info from the FuncInfo.ValueMap. Otherwise add undef + // to help track missing debug info. + if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { + SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } } return 0; } @@ -4048,31 +4148,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, 0, false); } else { - bool createUndef = false; - // FIXME : Why not use getValue() directly ? + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. SDValue N = NodeMap[V]; if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) { SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } - } else if (isa<PHINode>(V) && !V->use_empty()) { - SDValue N = getValue(V); - if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { - SDV = DAG.getDbgValue(Variable, N.getNode(), - N.getResNo(), Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, N.getNode(), false); - } - } else - createUndef = true; - } else - createUndef = true; - if (createUndef) { + } else if (isa<PHINode>(V) && !V->use_empty() ) { + // Do not call getValue(V) yet, as we don't want to generate code. + // Remember it for later. + DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); + DanglingDebugInfoMap[V] = DDI; + } else { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter; we need this fallback. SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), @@ -4572,6 +4665,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI)) isTailCall = false; + // If there's a possibility that fast-isel has already selected some amount + // of the current basic block, don't emit a tail call. + if (isTailCall && EnableFastISel) + isTailCall = false; + std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(getRoot(), RetTy, CS.paramHasAttr(0, Attribute::SExt), @@ -6054,6 +6152,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { i += NumParts; } + // Note down frame index for byval arguments. + if (I->hasByValAttr() && !ArgValues.empty()) + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) + FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex()); + if (!I->use_empty()) { SDValue Res; if (!ArgValues.empty()) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 46733d6..5f400e9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,9 +18,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#ifndef NDEBUG -#include "llvm/ADT/SmallSet.h" -#endif #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/CallSite.h" @@ -64,6 +61,7 @@ class PHINode; class PtrToIntInst; class ReturnInst; class SDISelAsmOperandInfo; +class SDDbgValue; class SExtInst; class SelectInst; class ShuffleVectorInst; @@ -93,6 +91,24 @@ class SelectionDAGBuilder { /// to preserve debug information for incoming arguments. DenseMap<const Value*, SDValue> UnusedArgNodeMap; + /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap. + class DanglingDebugInfo { + const DbgValueInst* DI; + DebugLoc dl; + unsigned SDNodeOrder; + public: + DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { } + DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : + DI(di), dl(DL), SDNodeOrder(SDNO) { } + const DbgValueInst* getDI() { return DI; } + DebugLoc getdl() { return dl; } + unsigned getSDNodeOrder() { return SDNodeOrder; } + }; + + /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not + /// yet seen the referent. We defer handling these until we do see it. + DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap; + public: /// PendingLoads - Loads are not emitted to the program immediately. We bunch /// them up and then emit token factor nodes when possible. This allows us to @@ -345,6 +361,9 @@ public: void visit(unsigned Opcode, const User &I); + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, + // generate the debug data structures now that we've seen its definition. + void resolveDanglingDebugInfo(const Value *V, SDValue Val); SDValue getValue(const Value *V); SDValue getNonRegisterValue(const Value *V); SDValue getValueImpl(const Value *V); @@ -506,13 +525,11 @@ private: void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); - /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a - /// function argument, create the corresponding DBG_VALUE machine instruction - /// for it now. At the end of instruction selection, they will be inserted to - /// the entry BB. - bool EmitFuncArgumentDbgValue(const DbgValueInst &DI, - const Value *V, MDNode *Variable, - uint64_t Offset, const SDValue &N); + /// EmitFuncArgumentDbgValue - If V is an function argument then create + /// corresponding DBG_VALUE machine instruction for it now. At the end of + /// instruction selection, they will be inserted to the entry BB. + bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + int64_t Offset, const SDValue &N); }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 08ba548..66cb5ce 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -132,14 +132,16 @@ namespace llvm { const TargetLowering &TLI = IS->getTargetLowering(); if (OptLevel == CodeGenOpt::None) - return createFastDAGScheduler(IS, OptLevel); + return createSourceListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::Latency) return createTDListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::RegPressure) return createBURRListDAGScheduler(IS, OptLevel); - assert(TLI.getSchedulingPreference() == Sched::Hybrid && + if (TLI.getSchedulingPreference() == Sched::Hybrid) + return createHybridListDAGScheduler(IS, OptLevel); + assert(TLI.getSchedulingPreference() == Sched::ILP && "Unknown sched type!"); - return createHybridListDAGScheduler(IS, OptLevel); + return createILPListDAGScheduler(IS, OptLevel); } } @@ -169,7 +171,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, //===----------------------------------------------------------------------===// SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : - MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), + MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), CurDAG(new SelectionDAG(tm)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), @@ -216,7 +218,7 @@ static bool FunctionCallsSetJmp(const Function *F) { for (Value::const_use_iterator I = Callee->use_begin(), E = Callee->use_end(); I != E; ++I) - if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const CallInst *CI = dyn_cast<CallInst>(*I)) if (CI->getParent()->getParent() == F) return true; } @@ -362,38 +364,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, CodeGenAndEmitDAG(); } -namespace { -/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted -/// nodes from the worklist. -class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener { - SmallVector<SDNode*, 128> &Worklist; - SmallPtrSet<SDNode*, 128> &InWorklist; -public: - SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl, - SmallPtrSet<SDNode*, 128> &inwl) - : Worklist(wl), InWorklist(inwl) {} - - void RemoveFromWorklist(SDNode *N) { - if (!InWorklist.erase(N)) return; - - SmallVector<SDNode*, 128>::iterator I = - std::find(Worklist.begin(), Worklist.end(), N); - assert(I != Worklist.end() && "Not in worklist"); - - *I = Worklist.back(); - Worklist.pop_back(); - } - - virtual void NodeDeleted(SDNode *N, SDNode *E) { - RemoveFromWorklist(N); - } - - virtual void NodeUpdated(SDNode *N) { - // Ignore updates. - } -}; -} - void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet<SDNode*, 128> VisitedNodes; SmallVector<SDNode*, 128> Worklist; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 6cae804..8313de5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -199,7 +199,7 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { #else errs() << "SelectionDAG::getGraphAttrs is only available in debug builds" << " on systems with Graphviz or gv!\n"; - return std::string(""); + return std::string(); #endif } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4f38669..b74f600 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -651,6 +651,53 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, return NumVectorRegs; } +/// isLegalRC - Return true if the value types that can be represented by the +/// specified register class are all legal. +bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) + return true; + } + return false; +} + +/// hasLegalSuperRegRegClasses - Return true if the specified register class +/// has one or more super-reg register classes that are legal. +bool +TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{ + if (*RC->superregclasses_begin() == 0) + return false; + for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(), + E = RC->superregclasses_end(); I != E; ++I) { + const TargetRegisterClass *RRC = *I; + if (isLegalRC(RRC)) + return true; + } + return false; +} + +/// findRepresentativeClass - Return the largest legal super-reg register class +/// of the register class for the specified type and its associated "cost". +std::pair<const TargetRegisterClass*, uint8_t> +TargetLowering::findRepresentativeClass(EVT VT) const { + const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy]; + if (!RC) + return std::make_pair(RC, 0); + const TargetRegisterClass *BestRC = RC; + for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(), + E = RC->superregclasses_end(); I != E; ++I) { + const TargetRegisterClass *RRC = *I; + if (RRC->isASubClass() || !isLegalRC(RRC)) + continue; + if (!hasLegalSuperRegRegClasses(RRC)) + return std::make_pair(RRC, 1); + BestRC = RRC; + } + return std::make_pair(BestRC, 1); +} + + /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. void TargetLowering::computeRegisterProperties() { @@ -736,6 +783,28 @@ void TargetLowering::computeRegisterProperties() { MVT VT = (MVT::SimpleValueType)i; if (isTypeLegal(VT)) continue; + // Determine if there is a legal wider type. If so, we should promote to + // that wider vector type. + EVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + if (NElts != 1) { + bool IsLegalWiderType = false; + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + EVT SVT = (MVT::SimpleValueType)nVT; + if (SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && + isTypeSynthesizable(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, Promote); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) continue; + } + MVT IntermediateVT; EVT RegisterVT; unsigned NumIntermediates; @@ -744,32 +813,29 @@ void TargetLowering::computeRegisterProperties() { RegisterVT, this); RegisterTypeForVT[i] = RegisterVT; - // Determine if there is a legal wider type. - bool IsLegalWiderType = false; - EVT EltVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - EVT SVT = (MVT::SimpleValueType)nVT; - if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && NElts != 1) { - TransformToType[i] = SVT; - ValueTypeActions.setTypeAction(VT, Promote); - IsLegalWiderType = true; - break; - } - } - if (!IsLegalWiderType) { - EVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - ValueTypeActions.setTypeAction(VT, Expand); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, Promote); - } + EVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + ValueTypeActions.setTypeAction(VT, Expand); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, Promote); } } + + // Determine the 'representative' register class for each value type. + // An representative register class is the largest (meaning one which is + // not a sub-register class / subreg register class) legal register class for + // a group of value types. For example, on i386, i8, i16, and i32 + // representative would be GR32; while on x86_64 it's GR64. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + const TargetRegisterClass* RRC; + uint8_t Cost; + tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); + RepRegClassForVT[i] = RRC; + RepRegClassCostForVT[i] = Cost; + } } const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -798,8 +864,21 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, EVT &RegisterVT) const { - // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); + + // If there is a wider vector type with the same element type as this one, + // we should widen to that legal vector type. This handles things like + // <2 x float> -> <4 x float>. + if (NumElts != 1 && getTypeAction(VT) == Promote) { + RegisterVT = getTypeToTransformTo(Context, VT); + if (isTypeLegal(RegisterVT)) { + IntermediateVT = RegisterVT; + NumIntermediates = 1; + return 1; + } + } + + // Figure out the right, legal destination reg to copy into. EVT EltTy = VT.getVectorElementType(); unsigned NumVectorRegs = 1; @@ -828,16 +907,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; - if (DestVT.bitsLT(NewVT)) { - // Value is expanded, e.g. i64 -> i16. + if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); - } else { - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; - } - return 1; + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; } /// Get the EVTs and ArgFlags collections that represent the legalized return @@ -1308,9 +1383,32 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt), + if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), KnownZero, KnownOne, TLO, Depth+1)) return true; + + // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits + // are not demanded. This will likely allow the anyext to be folded away. + if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { + SDValue InnerOp = InOp.getNode()->getOperand(0); + EVT InnerVT = InnerOp.getValueType(); + if ((APInt::getHighBitsSet(BitWidth, + BitWidth - InnerVT.getSizeInBits()) & + DemandedMask) == 0 && + isTypeDesirableForOp(ISD::SHL, InnerVT)) { + EVT ShTy = getShiftAmountTy(); + if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) + ShTy = InnerVT; + SDValue NarrowShl = + TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, + TLO.DAG.getConstant(ShAmt, ShTy)); + return + TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), + NarrowShl)); + } + } + KnownZero <<= SA->getZExtValue(); KnownOne <<= SA->getZExtValue(); // low bits known zero. @@ -1415,11 +1513,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getScalarType().getSizeInBits()) & - NewMask; + BitWidth - EVT.getScalarType().getSizeInBits()); // If none of the extended bits are demanded, eliminate the sextinreg. - if (NewBits == 0) + if ((NewBits & NewMask) == 0) return TLO.CombineTo(Op, Op.getOperand(0)); APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits()); @@ -1886,12 +1983,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT ExtDstTy = N0.getValueType(); unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); - // If the extended part has any inconsistent bits, it cannot ever - // compare equal. In other words, they have to be all ones or all - // zeros. - APInt ExtBits = - APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits); - if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits) + // If the constant doesn't fit into the number of bits for the source of + // the sign extension, it is impossible for both sides to be equal. + if (C1.getMinSignedBits() > ExtSrcTyBits) return DAG.getConstant(Cond == ISD::SETNE, VT); SDValue ZextOp; @@ -2476,7 +2570,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, int64_t Offs = GA->getOffset(); if (C) Offs += C->getZExtValue(); Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), - C->getDebugLoc(), + C ? C->getDebugLoc() : DebugLoc(), Op.getValueType(), Offs)); return; } diff --git a/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp index e69d3e4..b29ea19 100644 --- a/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -59,13 +59,16 @@ DisableCrossClassJoin("disable-cross-class-join", cl::desc("Avoid coalescing cross register class copies"), cl::init(false), cl::Hidden); -static RegisterPass<SimpleRegisterCoalescing> -X("simple-register-coalescing", "Simple Register Coalescing"); +static cl::opt<bool> +DisablePhysicalJoin("disable-physical-join", + cl::desc("Avoid coalescing physical register copies"), + cl::init(false), cl::Hidden); -// Declare that we implement the RegisterCoalescer interface -static RegisterAnalysisGroup<RegisterCoalescer, true/*The Default*/> V(X); +INITIALIZE_AG_PASS(SimpleRegisterCoalescing, RegisterCoalescer, + "simple-register-coalescing", "Simple Register Coalescing", + false, false, true); -const PassInfo *const llvm::SimpleRegisterCoalescingID = &X; +char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID; void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -386,16 +389,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) return false; - bool BHasSubRegs = false; - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - BHasSubRegs = *tri_->getSubRegisters(IntB.reg); - - // Abort if the subregisters of IntB.reg have values that are not simply the + // Abort if the aliases of IntB.reg have values that are not simply the // clobbers from the superreg. - if (BHasSubRegs) - for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && - HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0)) + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && + HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0)) return false; // If some of the uses of IntA.reg is already coalesced away, return false. @@ -412,6 +411,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, return false; } + DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << *DefMI); + // At this point we have decided that it is legal to do this // transformation. Start by commuting the instruction. MachineBasicBlock *MBB = DefMI->getParent(); @@ -470,16 +471,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, if (Extended) UseMO.setIsKill(false); } - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (UseMI->isCopy()) { - if (UseMI->getOperand(0).getReg() != IntB.reg || - UseMI->getOperand(0).getSubReg()) - continue; - } else if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){ - if (DstReg != IntB.reg || DstSubIdx) - continue; - } else + if (!UseMI->isCopy()) continue; + if (UseMI->getOperand(0).getReg() != IntB.reg || + UseMI->getOperand(0).getSubReg()) + continue; + // This copy will become a noop. If it's defining a new val#, // remove that val# as well. However this live range is being // extended to the end of the existing live range defined by the copy. @@ -504,13 +501,13 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, // Remove val#'s defined by copies that will be coalesced away. for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) { VNInfo *DeadVNI = BDeadValNos[i]; - if (BHasSubRegs) { - for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) { + if (!li_->hasInterval(*AS)) continue; - LiveInterval &SRLI = li_->getInterval(*SR); - if (const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def)) - SRLI.removeValNo(SRLR->valno); + LiveInterval &ASLI = li_->getInterval(*AS); + if (const LiveRange *ASLR = ASLI.getLiveRangeContaining(DeadVNI->def)) + ASLI.removeValNo(ASLR->valno); } } IntB.removeValNo(BDeadValNos[i]); @@ -628,14 +625,6 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx, if (DefMO.getReg() == li.reg && !DefMO.getSubReg()) DefMO.setIsDead(); } - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - DstReg == li.reg && DstSubIdx == 0) { - // Last use is itself an identity code. - int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, - false, false, tri_); - LastUseMI->getOperand(DeadIdx).setIsDead(); - } return true; } @@ -772,16 +761,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { // A PhysReg copy that won't be coalesced can perhaps be rematerialized // instead. if (DstIsPhys) { - unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; - if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, - CopySrcSubIdx, CopyDstSubIdx) && - CopySrcSubIdx == 0 && CopyDstSubIdx == 0 && - CopySrcReg != CopyDstReg && CopySrcReg == SrcReg && - CopyDstReg != DstReg && !JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, 0, - UseMI)) - continue; - if (UseMI->isCopy() && !UseMI->getOperand(1).getSubReg() && !UseMI->getOperand(0).getSubReg() && @@ -834,28 +813,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { dbgs() << li_->getInstructionIndex(UseMI) << "\t"; dbgs() << *UseMI; }); - - - // After updating the operand, check if the machine instruction has - // become a copy. If so, update its val# information. - const TargetInstrDesc &TID = UseMI->getDesc(); - if (DstIsPhys || TID.getNumDefs() != 1 || TID.getNumOperands() <= 2) - continue; - - unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; - if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, - CopySrcSubIdx, CopyDstSubIdx) && - CopySrcReg != CopyDstReg && - (TargetRegisterInfo::isVirtualRegister(CopyDstReg) || - allocatableRegs_[CopyDstReg])) { - LiveInterval &LI = li_->getInterval(CopyDstReg); - SlotIndex DefIdx = - li_->getInstructionIndex(UseMI).getDefIndex(); - if (const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx)) { - if (DLR->valno->def == DefIdx) - DLR->valno->setCopy(UseMI); - } - } } } @@ -1082,13 +1039,18 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable. } + if (DisablePhysicalJoin && CP.isPhys()) { + DEBUG(dbgs() << "\tPhysical joins disabled.\n"); + return false; + } + DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg()); // Enforce policies. if (CP.isPhys()) { DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n"); // Only coalesce to allocatable physreg. - if (!allocatableRegs_[CP.getDstReg()]) { + if (!li_->isAllocatable(CP.getDstReg())) { DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); return false; // Not coalescable. } @@ -1137,7 +1099,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // happens. if (li_->hasInterval(CP.getDstReg()) && li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { - mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg()); ++numAborts; DEBUG(dbgs() << "\tPhysical register live interval too complicated, abort!\n"); @@ -1156,7 +1117,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI)) return true; - mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg()); ++numAborts; DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. @@ -1543,21 +1503,19 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, MachineInstr *Inst = MII++; // If this isn't a copy nor a extract_subreg, we can't join intervals. - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - bool isInsUndef = false; + unsigned SrcReg, DstReg; if (Inst->isCopy()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(1).getReg(); } else if (Inst->isSubregToReg()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(2).getReg(); - } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + } else continue; bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (isInsUndef || - (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())) + if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) ImpDefCopies.push_back(CopyRec(Inst, 0)); else if (SrcIsPhys || DstIsPhys) PhysCopies.push_back(CopyRec(Inst, 0)); @@ -1679,11 +1637,6 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, MachineInstr *UseMI = Use.getParent(); if (UseMI->isIdentityCopy()) continue; - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - SrcReg == DstReg && SrcSubIdx == DstSubIdx) - // Ignore identity copies. - continue; SlotIndex Idx = li_->getInstructionIndex(UseMI); // FIXME: Should this be Idx != UseIdx? SlotIndex() will return something // that compares higher than any other interval. @@ -1708,10 +1661,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, return NULL; // Ignore identity copies. - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (!MI->isIdentityCopy() && - !(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - SrcReg == DstReg && SrcSubIdx == DstSubIdx)) + if (!MI->isIdentityCopy()) for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { MachineOperand &Use = MI->getOperand(i); if (Use.isReg() && Use.isUse() && Use.getReg() && @@ -1747,7 +1697,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { << "********** Function: " << ((Value*)mf_->getFunction())->getName() << '\n'); - allocatableRegs_ = tri_->getAllocatableSet(fn); for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(), E = tri_->regclass_end(); I != E; ++I) allocatableRCRegs_.insert(std::make_pair(*I, @@ -1775,30 +1724,35 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); mii != mie; ) { MachineInstr *MI = mii; - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (JoinedCopies.count(MI)) { // Delete all coalesced copies. bool DoDelete = true; - if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - assert(MI->isCopyLike() && "Unrecognized copy instruction"); - SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) - // Do not delete extract_subreg, insert_subreg of physical - // registers unless the definition is dead. e.g. - // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 - // or else the scavenger may complain. LowerSubregs will - // delete them later. - DoDelete = false; - } + assert(MI->isCopyLike() && "Unrecognized copy instruction"); + unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + MI->getNumOperands() > 2) + // Do not delete extract_subreg, insert_subreg of physical + // registers unless the definition is dead. e.g. + // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 + // or else the scavenger may complain. LowerSubregs will + // delete them later. + DoDelete = false; + if (MI->allDefsAreDead()) { LiveInterval &li = li_->getInterval(SrcReg); if (!ShortenDeadCopySrcLiveRange(li, MI)) ShortenDeadCopyLiveRange(li, MI); DoDelete = true; } - if (!DoDelete) + if (!DoDelete) { + // We need the instruction to adjust liveness, so make it a KILL. + if (MI->isSubregToReg()) { + MI->RemoveOperand(3); + MI->RemoveOperand(1); + } + MI->setDesc(tii_->get(TargetOpcode::KILL)); mii = llvm::next(mii); - else { + } else { li_->RemoveMachineInstrFromMaps(MI); mii = mbbi->erase(mii); ++numPeep; @@ -1840,9 +1794,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { } // If the move will be an identity move delete it - bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); - if (MI->isIdentityCopy() || - (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) { + if (MI->isIdentityCopy()) { + unsigned SrcReg = MI->getOperand(1).getReg(); if (li_->hasInterval(SrcReg)) { LiveInterval &RegInt = li_->getInterval(SrcReg); // If def of this move instruction is dead, remove its live range diff --git a/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h index e154da6..855bdb9 100644 --- a/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h @@ -47,7 +47,6 @@ namespace llvm { const MachineLoopInfo* loopInfo; AliasAnalysis *AA; - BitVector allocatableRegs_; DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_; /// JoinedCopies - Keep track of copies eliminated due to coalescing. @@ -64,7 +63,7 @@ namespace llvm { public: static char ID; // Pass identifcation, replacement for typeid - SimpleRegisterCoalescing() : MachineFunctionPass(&ID) {} + SimpleRegisterCoalescing() : MachineFunctionPass(ID) {} struct InstrSlots { enum { diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index e90869d..b637980 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -58,7 +58,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit SjLjEHPass(const TargetLowering *tli = NULL) - : FunctionPass(&ID), TLI(tli) { } + : FunctionPass(ID), TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp index 7a227cf..1bc148f 100644 --- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -40,7 +40,8 @@ namespace { } char SlotIndexes::ID = 0; -static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering"); +INITIALIZE_PASS(SlotIndexes, "slotindexes", + "Slot index numbering", false, false); IndexListEntry* IndexListEntry::getEmptyKeyEntry() { return &*IndexListEntryEmptyKey; diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp index 56bcb28..59d5ab3 100644 --- a/contrib/llvm/lib/CodeGen/Spiller.cpp +++ b/contrib/llvm/lib/CodeGen/Spiller.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -49,29 +50,31 @@ namespace { /// Utility class for spillers. class SpillerBase : public Spiller { protected: + MachineFunctionPass *pass; MachineFunction *mf; + VirtRegMap *vrm; LiveIntervals *lis; MachineFrameInfo *mfi; MachineRegisterInfo *mri; const TargetInstrInfo *tii; const TargetRegisterInfo *tri; - VirtRegMap *vrm; /// Construct a spiller base. - SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) - : mf(mf), lis(lis), vrm(vrm) + SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) + : pass(&pass), mf(&mf), vrm(&vrm) { - mfi = mf->getFrameInfo(); - mri = &mf->getRegInfo(); - tii = mf->getTarget().getInstrInfo(); - tri = mf->getTarget().getRegisterInfo(); + lis = &pass.getAnalysis<LiveIntervals>(); + mfi = mf.getFrameInfo(); + mri = &mf.getRegInfo(); + tii = mf.getTarget().getInstrInfo(); + tri = mf.getTarget().getRegisterInfo(); } /// Add spill ranges for every use/def of the live interval, inserting loads /// immediately before each use, and stores after each def. No folding or /// remat is attempted. void trivialSpillEverywhere(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals) { + SmallVectorImpl<LiveInterval*> &newIntervals) { DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); assert(li->weight != HUGE_VALF && @@ -173,13 +176,13 @@ namespace { class TrivialSpiller : public SpillerBase { public: - TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) - : SpillerBase(mf, lis, vrm) {} + TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf, + VirtRegMap &vrm) + : SpillerBase(pass, mf, vrm) {} void spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &, - SlotIndex*) { + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &) { // Ignore spillIs - we don't use it. trivialSpillEverywhere(li, newIntervals); } @@ -193,18 +196,19 @@ namespace { class StandardSpiller : public Spiller { protected: LiveIntervals *lis; - const MachineLoopInfo *loopInfo; + MachineLoopInfo *loopInfo; VirtRegMap *vrm; public: - StandardSpiller(LiveIntervals *lis, const MachineLoopInfo *loopInfo, - VirtRegMap *vrm) - : lis(lis), loopInfo(loopInfo), vrm(vrm) {} + StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf, + VirtRegMap &vrm) + : lis(&pass.getAnalysis<LiveIntervals>()), + loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()), + vrm(&vrm) {} /// Falls back on LiveIntervals::addIntervalsForSpills. void spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex*) { + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) { std::vector<LiveInterval*> added = lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); newIntervals.insert(newIntervals.end(), added.begin(), added.end()); @@ -221,23 +225,21 @@ namespace { /// then the spiller falls back on the standard spilling mechanism. class SplittingSpiller : public StandardSpiller { public: - SplittingSpiller(MachineFunction *mf, LiveIntervals *lis, - const MachineLoopInfo *loopInfo, VirtRegMap *vrm) - : StandardSpiller(lis, loopInfo, vrm) { - - mri = &mf->getRegInfo(); - tii = mf->getTarget().getInstrInfo(); - tri = mf->getTarget().getRegisterInfo(); + SplittingSpiller(MachineFunctionPass &pass, MachineFunction &mf, + VirtRegMap &vrm) + : StandardSpiller(pass, mf, vrm) { + mri = &mf.getRegInfo(); + tii = mf.getTarget().getInstrInfo(); + tri = mf.getTarget().getRegisterInfo(); } void spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex *earliestStart) { + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) { if (worthTryingToSplit(li)) - tryVNISplit(li, earliestStart); + tryVNISplit(li); else - StandardSpiller::spill(li, newIntervals, spillIs, earliestStart); + StandardSpiller::spill(li, newIntervals, spillIs); } private: @@ -252,8 +254,7 @@ private: } /// Try to break a LiveInterval into its component values. - std::vector<LiveInterval*> tryVNISplit(LiveInterval *li, - SlotIndex *earliestStart) { + std::vector<LiveInterval*> tryVNISplit(LiveInterval *li) { DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n"); @@ -277,10 +278,6 @@ private: DEBUG(dbgs() << *splitInterval << "\n"); added.push_back(splitInterval); alreadySplit.insert(splitInterval); - if (earliestStart != 0) { - if (splitInterval->beginIndex() < *earliestStart) - *earliestStart = splitInterval->beginIndex(); - } } else { DEBUG(dbgs() << "0\n"); } @@ -293,10 +290,6 @@ private: if (!li->empty()) { added.push_back(li); alreadySplit.insert(li); - if (earliestStart != 0) { - if (li->beginIndex() < *earliestStart) - *earliestStart = li->beginIndex(); - } } return added; @@ -506,20 +499,19 @@ private: namespace llvm { -Spiller *createInlineSpiller(MachineFunction*, - LiveIntervals*, - const MachineLoopInfo*, - VirtRegMap*); +Spiller *createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm); } -llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, - const MachineLoopInfo *loopInfo, - VirtRegMap *vrm) { +llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) { switch (spillerOpt) { default: assert(0 && "unknown spiller"); - case trivial: return new TrivialSpiller(mf, lis, vrm); - case standard: return new StandardSpiller(lis, loopInfo, vrm); - case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); - case inline_: return createInlineSpiller(mf, lis, loopInfo, vrm); + case trivial: return new TrivialSpiller(pass, mf, vrm); + case standard: return new StandardSpiller(pass, mf, vrm); + case splitting: return new SplittingSpiller(pass, mf, vrm); + case inline_: return createInlineSpiller(pass, mf, vrm); } } diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h index 450447b..59bc0ec 100644 --- a/contrib/llvm/lib/CodeGen/Spiller.h +++ b/contrib/llvm/lib/CodeGen/Spiller.h @@ -11,19 +11,14 @@ #define LLVM_CODEGEN_SPILLER_H #include "llvm/ADT/SmallVector.h" -#include <vector> namespace llvm { class LiveInterval; - class LiveIntervals; - class LiveStacks; class MachineFunction; - class MachineInstr; - class MachineLoopInfo; + class MachineFunctionPass; class SlotIndex; class VirtRegMap; - class VNInfo; /// Spiller interface. /// @@ -40,18 +35,16 @@ namespace llvm { /// @param spillIs A list of intervals that are about to be spilled, /// and so cannot be used for remat etc. /// @param newIntervals The newly created intervals will be appended here. - /// @param earliestIndex The earliest point for splitting. (OK, it's another - /// pointer to the allocator guts). virtual void spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex *earliestIndex = 0) = 0; + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) = 0; }; /// Create and return a spiller object, as specified on the command line. - Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li, - const MachineLoopInfo *loopInfo, VirtRegMap *vrm); + Spiller* createSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm); } #endif diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp new file mode 100644 index 0000000..29474f0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -0,0 +1,1097 @@ +//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SplitAnalysis class as well as mutator functions for +// live range splitting. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "splitter" +#include "SplitKit.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +static cl::opt<bool> +AllowSplit("spiller-splits-edges", + cl::desc("Allow critical edge splitting during spilling")); + +//===----------------------------------------------------------------------===// +// Split Analysis +//===----------------------------------------------------------------------===// + +SplitAnalysis::SplitAnalysis(const MachineFunction &mf, + const LiveIntervals &lis, + const MachineLoopInfo &mli) + : mf_(mf), + lis_(lis), + loops_(mli), + tii_(*mf.getTarget().getInstrInfo()), + curli_(0) {} + +void SplitAnalysis::clear() { + usingInstrs_.clear(); + usingBlocks_.clear(); + usingLoops_.clear(); + curli_ = 0; +} + +bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) { + MachineBasicBlock *T, *F; + SmallVector<MachineOperand, 4> Cond; + return !tii_.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond); +} + +/// analyzeUses - Count instructions, basic blocks, and loops using curli. +void SplitAnalysis::analyzeUses() { + const MachineRegisterInfo &MRI = mf_.getRegInfo(); + for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(curli_->reg); + MachineInstr *MI = I.skipInstruction();) { + if (MI->isDebugValue() || !usingInstrs_.insert(MI)) + continue; + MachineBasicBlock *MBB = MI->getParent(); + if (usingBlocks_[MBB]++) + continue; + if (MachineLoop *Loop = loops_.getLoopFor(MBB)) + usingLoops_[Loop]++; + } + DEBUG(dbgs() << " counted " + << usingInstrs_.size() << " instrs, " + << usingBlocks_.size() << " blocks, " + << usingLoops_.size() << " loops.\n"); +} + +/// removeUse - Update statistics by noting that MI no longer uses curli. +void SplitAnalysis::removeUse(const MachineInstr *MI) { + if (!usingInstrs_.erase(MI)) + return; + + // Decrement MBB count. + const MachineBasicBlock *MBB = MI->getParent(); + BlockCountMap::iterator bi = usingBlocks_.find(MBB); + assert(bi != usingBlocks_.end() && "MBB missing"); + assert(bi->second && "0 count in map"); + if (--bi->second) + return; + // No more uses in MBB. + usingBlocks_.erase(bi); + + // Decrement loop count. + MachineLoop *Loop = loops_.getLoopFor(MBB); + if (!Loop) + return; + LoopCountMap::iterator li = usingLoops_.find(Loop); + assert(li != usingLoops_.end() && "Loop missing"); + assert(li->second && "0 count in map"); + if (--li->second) + return; + // No more blocks in Loop. + usingLoops_.erase(li); +} + +// Get three sets of basic blocks surrounding a loop: Blocks inside the loop, +// predecessor blocks, and exit blocks. +void SplitAnalysis::getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks) { + Blocks.clear(); + + // Blocks in the loop. + Blocks.Loop.insert(Loop->block_begin(), Loop->block_end()); + + // Predecessor blocks. + const MachineBasicBlock *Header = Loop->getHeader(); + for (MachineBasicBlock::const_pred_iterator I = Header->pred_begin(), + E = Header->pred_end(); I != E; ++I) + if (!Blocks.Loop.count(*I)) + Blocks.Preds.insert(*I); + + // Exit blocks. + for (MachineLoop::block_iterator I = Loop->block_begin(), + E = Loop->block_end(); I != E; ++I) { + const MachineBasicBlock *MBB = *I; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if (!Blocks.Loop.count(*SI)) + Blocks.Exits.insert(*SI); + } +} + +/// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in +/// and around the Loop. +SplitAnalysis::LoopPeripheralUse SplitAnalysis:: +analyzeLoopPeripheralUse(const SplitAnalysis::LoopBlocks &Blocks) { + LoopPeripheralUse use = ContainedInLoop; + for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end(); + I != E; ++I) { + const MachineBasicBlock *MBB = I->first; + // Is this a peripheral block? + if (use < MultiPeripheral && + (Blocks.Preds.count(MBB) || Blocks.Exits.count(MBB))) { + if (I->second > 1) use = MultiPeripheral; + else use = SinglePeripheral; + continue; + } + // Is it a loop block? + if (Blocks.Loop.count(MBB)) + continue; + // It must be an unrelated block. + return OutsideLoop; + } + return use; +} + +/// getCriticalExits - It may be necessary to partially break critical edges +/// leaving the loop if an exit block has phi uses of curli. Collect the exit +/// blocks that need special treatment into CriticalExits. +void SplitAnalysis::getCriticalExits(const SplitAnalysis::LoopBlocks &Blocks, + BlockPtrSet &CriticalExits) { + CriticalExits.clear(); + + // A critical exit block contains a phi def of curli, and has a predecessor + // that is not in the loop nor a loop predecessor. + // For such an exit block, the edges carrying the new variable must be moved + // to a new pre-exit block. + for (BlockPtrSet::iterator I = Blocks.Exits.begin(), E = Blocks.Exits.end(); + I != E; ++I) { + const MachineBasicBlock *Succ = *I; + SlotIndex SuccIdx = lis_.getMBBStartIdx(Succ); + VNInfo *SuccVNI = curli_->getVNInfoAt(SuccIdx); + // This exit may not have curli live in at all. No need to split. + if (!SuccVNI) + continue; + // If this is not a PHI def, it is either using a value from before the + // loop, or a value defined inside the loop. Both are safe. + if (!SuccVNI->isPHIDef() || SuccVNI->def.getBaseIndex() != SuccIdx) + continue; + // This exit block does have a PHI. Does it also have a predecessor that is + // not a loop block or loop predecessor? + for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(), + PE = Succ->pred_end(); PI != PE; ++PI) { + const MachineBasicBlock *Pred = *PI; + if (Blocks.Loop.count(Pred) || Blocks.Preds.count(Pred)) + continue; + // This is a critical exit block, and we need to split the exit edge. + CriticalExits.insert(Succ); + break; + } + } +} + +/// canSplitCriticalExits - Return true if it is possible to insert new exit +/// blocks before the blocks in CriticalExits. +bool +SplitAnalysis::canSplitCriticalExits(const SplitAnalysis::LoopBlocks &Blocks, + BlockPtrSet &CriticalExits) { + // If we don't allow critical edge splitting, require no critical exits. + if (!AllowSplit) + return CriticalExits.empty(); + + for (BlockPtrSet::iterator I = CriticalExits.begin(), E = CriticalExits.end(); + I != E; ++I) { + const MachineBasicBlock *Succ = *I; + // We want to insert a new pre-exit MBB before Succ, and change all the + // in-loop blocks to branch to the pre-exit instead of Succ. + // Check that all the in-loop predecessors can be changed. + for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(), + PE = Succ->pred_end(); PI != PE; ++PI) { + const MachineBasicBlock *Pred = *PI; + // The external predecessors won't be altered. + if (!Blocks.Loop.count(Pred) && !Blocks.Preds.count(Pred)) + continue; + if (!canAnalyzeBranch(Pred)) + return false; + } + + // If Succ's layout predecessor falls through, that too must be analyzable. + // We need to insert the pre-exit block in the gap. + MachineFunction::const_iterator MFI = Succ; + if (MFI == mf_.begin()) + continue; + if (!canAnalyzeBranch(--MFI)) + return false; + } + // No problems found. + return true; +} + +void SplitAnalysis::analyze(const LiveInterval *li) { + clear(); + curli_ = li; + analyzeUses(); +} + +const MachineLoop *SplitAnalysis::getBestSplitLoop() { + assert(curli_ && "Call analyze() before getBestSplitLoop"); + if (usingLoops_.empty()) + return 0; + + LoopPtrSet Loops, SecondLoops; + LoopBlocks Blocks; + BlockPtrSet CriticalExits; + + // Find first-class and second class candidate loops. + // We prefer to split around loops where curli is used outside the periphery. + for (LoopCountMap::const_iterator I = usingLoops_.begin(), + E = usingLoops_.end(); I != E; ++I) { + const MachineLoop *Loop = I->first; + getLoopBlocks(Loop, Blocks); + + // FIXME: We need an SSA updater to properly handle multiple exit blocks. + if (Blocks.Exits.size() > 1) { + DEBUG(dbgs() << " multiple exits from " << *Loop); + continue; + } + + LoopPtrSet *LPS = 0; + switch(analyzeLoopPeripheralUse(Blocks)) { + case OutsideLoop: + LPS = &Loops; + break; + case MultiPeripheral: + LPS = &SecondLoops; + break; + case ContainedInLoop: + DEBUG(dbgs() << " contained in " << *Loop); + continue; + case SinglePeripheral: + DEBUG(dbgs() << " single peripheral use in " << *Loop); + continue; + } + // Will it be possible to split around this loop? + getCriticalExits(Blocks, CriticalExits); + DEBUG(dbgs() << " " << CriticalExits.size() << " critical exits from " + << *Loop); + if (!canSplitCriticalExits(Blocks, CriticalExits)) + continue; + // This is a possible split. + assert(LPS); + LPS->insert(Loop); + } + + DEBUG(dbgs() << " getBestSplitLoop found " << Loops.size() << " + " + << SecondLoops.size() << " candidate loops.\n"); + + // If there are no first class loops available, look at second class loops. + if (Loops.empty()) + Loops = SecondLoops; + + if (Loops.empty()) + return 0; + + // Pick the earliest loop. + // FIXME: Are there other heuristics to consider? + const MachineLoop *Best = 0; + SlotIndex BestIdx; + for (LoopPtrSet::const_iterator I = Loops.begin(), E = Loops.end(); I != E; + ++I) { + SlotIndex Idx = lis_.getMBBStartIdx((*I)->getHeader()); + if (!Best || Idx < BestIdx) + Best = *I, BestIdx = Idx; + } + DEBUG(dbgs() << " getBestSplitLoop found " << *Best); + return Best; +} + +/// getMultiUseBlocks - if curli has more than one use in a basic block, it +/// may be an advantage to split curli for the duration of the block. +bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) { + // If curli is local to one block, there is no point to splitting it. + if (usingBlocks_.size() <= 1) + return false; + // Add blocks with multiple uses. + for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end(); + I != E; ++I) + switch (I->second) { + case 0: + case 1: + continue; + case 2: { + // It doesn't pay to split a 2-instr block if it redefines curli. + VNInfo *VN1 = curli_->getVNInfoAt(lis_.getMBBStartIdx(I->first)); + VNInfo *VN2 = + curli_->getVNInfoAt(lis_.getMBBEndIdx(I->first).getPrevIndex()); + // live-in and live-out with a different value. + if (VN1 && VN2 && VN1 != VN2) + continue; + } // Fall through. + default: + Blocks.insert(I->first); + } + return !Blocks.empty(); +} + +//===----------------------------------------------------------------------===// +// LiveIntervalMap +//===----------------------------------------------------------------------===// + +// defValue - Introduce a li_ def for ParentVNI that could be later than +// ParentVNI->def. +VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) { + assert(ParentVNI && "Mapping NULL value"); + assert(Idx.isValid() && "Invalid SlotIndex"); + assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + + // Is this a simple 1-1 mapping? Not likely. + if (Idx == ParentVNI->def) + return mapValue(ParentVNI, Idx); + + // This is a complex def. Mark with a NULL in valueMap. + VNInfo *OldVNI = + valueMap_.insert( + ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))).first->second; + // The static_cast<VNInfo *> is only needed to work around a bug in an + // old version of the C++0x standard which the following compilers + // implemented and have yet to fix: + // + // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel + // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01 + // + // If/When we move to C++0x, this can be replaced by nullptr. + (void)OldVNI; + assert(OldVNI == 0 && "Simple/Complex values mixed"); + + // Should we insert a minimal snippet of VNI LiveRange, or can we count on + // callers to do that? We need it for lookups of complex values. + VNInfo *VNI = li_.getNextValue(Idx, 0, true, lis_.getVNInfoAllocator()); + return VNI; +} + +// mapValue - Find the mapped value for ParentVNI at Idx. +// Potentially create phi-def values. +VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) { + assert(ParentVNI && "Mapping NULL value"); + assert(Idx.isValid() && "Invalid SlotIndex"); + assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + + // Use insert for lookup, so we can add missing values with a second lookup. + std::pair<ValueMap::iterator,bool> InsP = + valueMap_.insert(ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))); + // The static_cast<VNInfo *> is only needed to work around a bug in an + // old version of the C++0x standard which the following compilers + // implemented and have yet to fix: + // + // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel + // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01 + // + // If/When we move to C++0x, this can be replaced by nullptr. + + // This was an unknown value. Create a simple mapping. + if (InsP.second) + return InsP.first->second = li_.createValueCopy(ParentVNI, + lis_.getVNInfoAllocator()); + // This was a simple mapped value. + if (InsP.first->second) + return InsP.first->second; + + // This is a complex mapped value. There may be multiple defs, and we may need + // to create phi-defs. + MachineBasicBlock *IdxMBB = lis_.getMBBFromIndex(Idx); + assert(IdxMBB && "No MBB at Idx"); + + // Is there a def in the same MBB we can extend? + if (VNInfo *VNI = extendTo(IdxMBB, Idx)) + return VNI; + + // Now for the fun part. We know that ParentVNI potentially has multiple defs, + // and we may need to create even more phi-defs to preserve VNInfo SSA form. + // Perform a depth-first search for predecessor blocks where we know the + // dominating VNInfo. Insert phi-def VNInfos along the path back to IdxMBB. + + // Track MBBs where we have created or learned the dominating value. + // This may change during the DFS as we create new phi-defs. + typedef DenseMap<MachineBasicBlock*, VNInfo*> MBBValueMap; + MBBValueMap DomValue; + + for (idf_iterator<MachineBasicBlock*> + IDFI = idf_begin(IdxMBB), + IDFE = idf_end(IdxMBB); IDFI != IDFE;) { + MachineBasicBlock *MBB = *IDFI; + SlotIndex End = lis_.getMBBEndIdx(MBB); + + // We are operating on the restricted CFG where ParentVNI is live. + if (parentli_.getVNInfoAt(End.getPrevSlot()) != ParentVNI) { + IDFI.skipChildren(); + continue; + } + + // Do we have a dominating value in this block? + VNInfo *VNI = extendTo(MBB, End); + if (!VNI) { + ++IDFI; + continue; + } + + // Yes, VNI dominates MBB. Track the path back to IdxMBB, creating phi-defs + // as needed along the way. + for (unsigned PI = IDFI.getPathLength()-1; PI != 0; --PI) { + // Start from MBB's immediate successor. End at IdxMBB. + MachineBasicBlock *Succ = IDFI.getPath(PI-1); + std::pair<MBBValueMap::iterator, bool> InsP = + DomValue.insert(MBBValueMap::value_type(Succ, VNI)); + + // This is the first time we backtrack to Succ. + if (InsP.second) + continue; + + // We reached Succ again with the same VNI. Nothing is going to change. + VNInfo *OVNI = InsP.first->second; + if (OVNI == VNI) + break; + + // Succ already has a phi-def. No need to continue. + SlotIndex Start = lis_.getMBBStartIdx(Succ); + if (OVNI->def == Start) + break; + + // We have a collision between the old and new VNI at Succ. That means + // neither dominates and we need a new phi-def. + VNI = li_.getNextValue(Start, 0, true, lis_.getVNInfoAllocator()); + VNI->setIsPHIDef(true); + InsP.first->second = VNI; + + // Replace OVNI with VNI in the remaining path. + for (; PI > 1 ; --PI) { + MBBValueMap::iterator I = DomValue.find(IDFI.getPath(PI-2)); + if (I == DomValue.end() || I->second != OVNI) + break; + I->second = VNI; + } + } + + // No need to search the children, we found a dominating value. + IDFI.skipChildren(); + } + + // The search should at least find a dominating value for IdxMBB. + assert(!DomValue.empty() && "Couldn't find a reaching definition"); + + // Since we went through the trouble of a full DFS visiting all reaching defs, + // the values in DomValue are now accurate. No more phi-defs are needed for + // these blocks, so we can color the live ranges. + // This makes the next mapValue call much faster. + VNInfo *IdxVNI = 0; + for (MBBValueMap::iterator I = DomValue.begin(), E = DomValue.end(); I != E; + ++I) { + MachineBasicBlock *MBB = I->first; + VNInfo *VNI = I->second; + SlotIndex Start = lis_.getMBBStartIdx(MBB); + if (MBB == IdxMBB) { + // Don't add full liveness to IdxMBB, stop at Idx. + if (Start != Idx) + li_.addRange(LiveRange(Start, Idx, VNI)); + // The caller had better add some liveness to IdxVNI, or it leaks. + IdxVNI = VNI; + } else + li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI)); + } + + assert(IdxVNI && "Didn't find value for Idx"); + return IdxVNI; +} + +// extendTo - Find the last li_ value defined in MBB at or before Idx. The +// parentli_ is assumed to be live at Idx. Extend the live range to Idx. +// Return the found VNInfo, or NULL. +VNInfo *LiveIntervalMap::extendTo(MachineBasicBlock *MBB, SlotIndex Idx) { + LiveInterval::iterator I = std::upper_bound(li_.begin(), li_.end(), Idx); + if (I == li_.begin()) + return 0; + --I; + if (I->start < lis_.getMBBStartIdx(MBB)) + return 0; + if (I->end < Idx) + I->end = Idx; + return I->valno; +} + +// addSimpleRange - Add a simple range from parentli_ to li_. +// ParentVNI must be live in the [Start;End) interval. +void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End, + const VNInfo *ParentVNI) { + VNInfo *VNI = mapValue(ParentVNI, Start); + // A simple mappoing is easy. + if (VNI->def == ParentVNI->def) { + li_.addRange(LiveRange(Start, End, VNI)); + return; + } + + // ParentVNI is a complex value. We must map per MBB. + MachineFunction::iterator MBB = lis_.getMBBFromIndex(Start); + MachineFunction::iterator MBBE = lis_.getMBBFromIndex(End); + + if (MBB == MBBE) { + li_.addRange(LiveRange(Start, End, VNI)); + return; + } + + // First block. + li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI)); + + // Run sequence of full blocks. + for (++MBB; MBB != MBBE; ++MBB) { + Start = lis_.getMBBStartIdx(MBB); + li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), + mapValue(ParentVNI, Start))); + } + + // Final block. + Start = lis_.getMBBStartIdx(MBB); + if (Start != End) + li_.addRange(LiveRange(Start, End, mapValue(ParentVNI, Start))); +} + +/// addRange - Add live ranges to li_ where [Start;End) intersects parentli_. +/// All needed values whose def is not inside [Start;End) must be defined +/// beforehand so mapValue will work. +void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) { + LiveInterval::const_iterator B = parentli_.begin(), E = parentli_.end(); + LiveInterval::const_iterator I = std::lower_bound(B, E, Start); + + // Check if --I begins before Start and overlaps. + if (I != B) { + --I; + if (I->end > Start) + addSimpleRange(Start, std::min(End, I->end), I->valno); + ++I; + } + + // The remaining ranges begin after Start. + for (;I != E && I->start < End; ++I) + addSimpleRange(I->start, std::min(End, I->end), I->valno); +} + +//===----------------------------------------------------------------------===// +// Split Editor +//===----------------------------------------------------------------------===// + +/// Create a new SplitEditor for editing the LiveInterval analyzed by SA. +SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, + SmallVectorImpl<LiveInterval*> &intervals) + : sa_(sa), lis_(lis), vrm_(vrm), + mri_(vrm.getMachineFunction().getRegInfo()), + tii_(*vrm.getMachineFunction().getTarget().getInstrInfo()), + curli_(sa_.getCurLI()), + dupli_(0), openli_(0), + intervals_(intervals), + firstInterval(intervals_.size()) +{ + assert(curli_ && "SplitEditor created from empty SplitAnalysis"); + + // Make sure curli_ is assigned a stack slot, so all our intervals get the + // same slot as curli_. + if (vrm_.getStackSlot(curli_->reg) == VirtRegMap::NO_STACK_SLOT) + vrm_.assignVirt2StackSlot(curli_->reg); + +} + +LiveInterval *SplitEditor::createInterval() { + unsigned curli = sa_.getCurLI()->reg; + unsigned Reg = mri_.createVirtualRegister(mri_.getRegClass(curli)); + LiveInterval &Intv = lis_.getOrCreateInterval(Reg); + vrm_.grow(); + vrm_.assignVirt2StackSlot(Reg, vrm_.getStackSlot(curli)); + return &Intv; +} + +LiveInterval *SplitEditor::getDupLI() { + if (!dupli_) { + // Create an interval for dupli that is a copy of curli. + dupli_ = createInterval(); + dupli_->Copy(*curli_, &mri_, lis_.getVNInfoAllocator()); + } + return dupli_; +} + +VNInfo *SplitEditor::mapValue(const VNInfo *curliVNI) { + VNInfo *&VNI = valueMap_[curliVNI]; + if (!VNI) + VNI = openli_->createValueCopy(curliVNI, lis_.getVNInfoAllocator()); + return VNI; +} + +/// Insert a COPY instruction curli -> li. Allocate a new value from li +/// defined by the COPY. Note that rewrite() will deal with the curli +/// register, so this function can be used to copy from any interval - openli, +/// curli, or dupli. +VNInfo *SplitEditor::insertCopy(LiveInterval &LI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + MachineInstr *MI = BuildMI(MBB, I, DebugLoc(), tii_.get(TargetOpcode::COPY), + LI.reg).addReg(curli_->reg); + SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + return LI.getNextValue(DefIdx, MI, true, lis_.getVNInfoAllocator()); +} + +/// Create a new virtual register and live interval. +void SplitEditor::openIntv() { + assert(!openli_ && "Previous LI not closed before openIntv"); + openli_ = createInterval(); + intervals_.push_back(openli_); + liveThrough_ = false; +} + +/// enterIntvBefore - Enter openli before the instruction at Idx. If curli is +/// not live before Idx, a COPY is not inserted. +void SplitEditor::enterIntvBefore(SlotIndex Idx) { + assert(openli_ && "openIntv not called before enterIntvBefore"); + + // Copy from curli_ if it is live. + if (VNInfo *CurVNI = curli_->getVNInfoAt(Idx.getUseIndex())) { + MachineInstr *MI = lis_.getInstructionFromIndex(Idx); + assert(MI && "enterIntvBefore called with invalid index"); + VNInfo *VNI = insertCopy(*openli_, *MI->getParent(), MI); + openli_->addRange(LiveRange(VNI->def, Idx.getDefIndex(), VNI)); + + // Make sure CurVNI is properly mapped. + VNInfo *&mapVNI = valueMap_[CurVNI]; + // We dont have SSA update yet, so only one entry per value is allowed. + assert(!mapVNI && "enterIntvBefore called more than once for the same value"); + mapVNI = VNI; + } + DEBUG(dbgs() << " enterIntvBefore " << Idx << ": " << *openli_ << '\n'); +} + +/// enterIntvAtEnd - Enter openli at the end of MBB. +/// PhiMBB is a successor inside openli where a PHI value is created. +/// Currently, all entries must share the same PhiMBB. +void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) { + assert(openli_ && "openIntv not called before enterIntvAtEnd"); + + SlotIndex EndA = lis_.getMBBEndIdx(&A); + VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex()); + if (!CurVNIA) { + DEBUG(dbgs() << " enterIntvAtEnd, curli not live out of BB#" + << A.getNumber() << ".\n"); + return; + } + + // Add a phi kill value and live range out of A. + VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator()); + openli_->addRange(LiveRange(VNIA->def, EndA, VNIA)); + + // FIXME: If this is the only entry edge, we don't need the extra PHI value. + // FIXME: If there are multiple entry blocks (so not a loop), we need proper + // SSA update. + + // Now look at the start of B. + SlotIndex StartB = lis_.getMBBStartIdx(&B); + SlotIndex EndB = lis_.getMBBEndIdx(&B); + const LiveRange *CurB = curli_->getLiveRangeContaining(StartB); + if (!CurB) { + DEBUG(dbgs() << " enterIntvAtEnd: curli not live in to BB#" + << B.getNumber() << ".\n"); + return; + } + + VNInfo *VNIB = openli_->getVNInfoAt(StartB); + if (!VNIB) { + // Create a phi value. + VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false, + lis_.getVNInfoAllocator()); + VNIB->setIsPHIDef(true); + VNInfo *&mapVNI = valueMap_[CurB->valno]; + if (mapVNI) { + // Multiple copies - must create PHI value. + abort(); + } else { + // This is the first copy of dupLR. Mark the mapping. + mapVNI = VNIB; + } + + } + + DEBUG(dbgs() << " enterIntvAtEnd: " << *openli_ << '\n'); +} + +/// useIntv - indicate that all instructions in MBB should use openli. +void SplitEditor::useIntv(const MachineBasicBlock &MBB) { + useIntv(lis_.getMBBStartIdx(&MBB), lis_.getMBBEndIdx(&MBB)); +} + +void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) { + assert(openli_ && "openIntv not called before useIntv"); + + // Map the curli values from the interval into openli_ + LiveInterval::const_iterator B = curli_->begin(), E = curli_->end(); + LiveInterval::const_iterator I = std::lower_bound(B, E, Start); + + if (I != B) { + --I; + // I begins before Start, but overlaps. + if (I->end > Start) + openli_->addRange(LiveRange(Start, std::min(End, I->end), + mapValue(I->valno))); + ++I; + } + + // The remaining ranges begin after Start. + for (;I != E && I->start < End; ++I) + openli_->addRange(LiveRange(I->start, std::min(End, I->end), + mapValue(I->valno))); + DEBUG(dbgs() << " use [" << Start << ';' << End << "): " << *openli_ + << '\n'); +} + +/// leaveIntvAfter - Leave openli after the instruction at Idx. +void SplitEditor::leaveIntvAfter(SlotIndex Idx) { + assert(openli_ && "openIntv not called before leaveIntvAfter"); + + const LiveRange *CurLR = curli_->getLiveRangeContaining(Idx.getDefIndex()); + if (!CurLR || CurLR->end <= Idx.getBoundaryIndex()) { + DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": not live\n"); + return; + } + + // Was this value of curli live through openli? + if (!openli_->liveAt(CurLR->valno->def)) { + DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": using external value\n"); + liveThrough_ = true; + return; + } + + // We are going to insert a back copy, so we must have a dupli_. + LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Idx.getDefIndex()); + assert(DupLR && "dupli not live into black, but curli is?"); + + // Insert the COPY instruction. + MachineBasicBlock::iterator I = lis_.getInstructionFromIndex(Idx); + MachineInstr *MI = BuildMI(*I->getParent(), llvm::next(I), I->getDebugLoc(), + tii_.get(TargetOpcode::COPY), dupli_->reg) + .addReg(openli_->reg); + SlotIndex CopyIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + openli_->addRange(LiveRange(Idx.getDefIndex(), CopyIdx, + mapValue(CurLR->valno))); + DupLR->valno->def = CopyIdx; + DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": " << *openli_ << '\n'); +} + +/// leaveIntvAtTop - Leave the interval at the top of MBB. +/// Currently, only one value can leave the interval. +void SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { + assert(openli_ && "openIntv not called before leaveIntvAtTop"); + + SlotIndex Start = lis_.getMBBStartIdx(&MBB); + const LiveRange *CurLR = curli_->getLiveRangeContaining(Start); + + // Is curli even live-in to MBB? + if (!CurLR) { + DEBUG(dbgs() << " leaveIntvAtTop at " << Start << ": not live\n"); + return; + } + + // Is curli defined by PHI at the beginning of MBB? + bool isPHIDef = CurLR->valno->isPHIDef() && + CurLR->valno->def.getBaseIndex() == Start; + + // If MBB is using a value of curli that was defined outside the openli range, + // we don't want to copy it back here. + if (!isPHIDef && !openli_->liveAt(CurLR->valno->def)) { + DEBUG(dbgs() << " leaveIntvAtTop at " << Start + << ": using external value\n"); + liveThrough_ = true; + return; + } + + // We are going to insert a back copy, so we must have a dupli_. + LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Start); + assert(DupLR && "dupli not live into black, but curli is?"); + + // Insert the COPY instruction. + MachineInstr *MI = BuildMI(MBB, MBB.begin(), DebugLoc(), + tii_.get(TargetOpcode::COPY), dupli_->reg) + .addReg(openli_->reg); + SlotIndex Idx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + + // Adjust dupli and openli values. + if (isPHIDef) { + // dupli was already a PHI on entry to MBB. Simply insert an openli PHI, + // and shift the dupli def down to the COPY. + VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false, + lis_.getVNInfoAllocator()); + VNI->setIsPHIDef(true); + openli_->addRange(LiveRange(VNI->def, Idx, VNI)); + + dupli_->removeRange(Start, Idx); + DupLR->valno->def = Idx; + DupLR->valno->setIsPHIDef(false); + } else { + // The dupli value was defined somewhere inside the openli range. + DEBUG(dbgs() << " leaveIntvAtTop source value defined at " + << DupLR->valno->def << "\n"); + // FIXME: We may not need a PHI here if all predecessors have the same + // value. + VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false, + lis_.getVNInfoAllocator()); + VNI->setIsPHIDef(true); + openli_->addRange(LiveRange(VNI->def, Idx, VNI)); + + // FIXME: What if DupLR->valno is used by multiple exits? SSA Update. + + // closeIntv is going to remove the superfluous live ranges. + DupLR->valno->def = Idx; + DupLR->valno->setIsPHIDef(false); + } + + DEBUG(dbgs() << " leaveIntvAtTop at " << Idx << ": " << *openli_ << '\n'); +} + +/// closeIntv - Indicate that we are done editing the currently open +/// LiveInterval, and ranges can be trimmed. +void SplitEditor::closeIntv() { + assert(openli_ && "openIntv not called before closeIntv"); + + DEBUG(dbgs() << " closeIntv cleaning up\n"); + DEBUG(dbgs() << " open " << *openli_ << '\n'); + + if (liveThrough_) { + DEBUG(dbgs() << " value live through region, leaving dupli as is.\n"); + } else { + // live out with copies inserted, or killed by region. Either way we need to + // remove the overlapping region from dupli. + getDupLI(); + for (LiveInterval::iterator I = openli_->begin(), E = openli_->end(); + I != E; ++I) { + dupli_->removeRange(I->start, I->end); + } + // FIXME: A block branching to the entry block may also branch elsewhere + // curli is live. We need both openli and curli to be live in that case. + DEBUG(dbgs() << " dup2 " << *dupli_ << '\n'); + } + openli_ = 0; + valueMap_.clear(); +} + +/// rewrite - after all the new live ranges have been created, rewrite +/// instructions using curli to use the new intervals. +void SplitEditor::rewrite() { + assert(!openli_ && "Previous LI not closed before rewrite"); + const LiveInterval *curli = sa_.getCurLI(); + for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(curli->reg), + RE = mri_.reg_end(); RI != RE;) { + MachineOperand &MO = RI.getOperand(); + MachineInstr *MI = MO.getParent(); + ++RI; + if (MI->isDebugValue()) { + DEBUG(dbgs() << "Zapping " << *MI); + // FIXME: We can do much better with debug values. + MO.setReg(0); + continue; + } + SlotIndex Idx = lis_.getInstructionIndex(MI); + Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + LiveInterval *LI = dupli_; + for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) { + LiveInterval *testli = intervals_[i]; + if (testli->liveAt(Idx)) { + LI = testli; + break; + } + } + if (LI) { + MO.setReg(LI->reg); + sa_.removeUse(MI); + DEBUG(dbgs() << " rewrite " << Idx << '\t' << *MI); + } + } + + // dupli_ goes in last, after rewriting. + if (dupli_) { + if (dupli_->empty()) { + DEBUG(dbgs() << " dupli became empty?\n"); + lis_.removeInterval(dupli_->reg); + dupli_ = 0; + } else { + dupli_->RenumberValues(lis_); + intervals_.push_back(dupli_); + } + } + + // Calculate spill weight and allocation hints for new intervals. + VirtRegAuxInfo vrai(vrm_.getMachineFunction(), lis_, sa_.loops_); + for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) { + LiveInterval &li = *intervals_[i]; + vrai.CalculateRegClass(li.reg); + vrai.CalculateWeightAndHint(li); + DEBUG(dbgs() << " new interval " << mri_.getRegClass(li.reg)->getName() + << ":" << li << '\n'); + } +} + + +//===----------------------------------------------------------------------===// +// Loop Splitting +//===----------------------------------------------------------------------===// + +bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) { + SplitAnalysis::LoopBlocks Blocks; + sa_.getLoopBlocks(Loop, Blocks); + + // Break critical edges as needed. + SplitAnalysis::BlockPtrSet CriticalExits; + sa_.getCriticalExits(Blocks, CriticalExits); + assert(CriticalExits.empty() && "Cannot break critical exits yet"); + + // Create new live interval for the loop. + openIntv(); + + // Insert copies in the predecessors. + for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Preds.begin(), + E = Blocks.Preds.end(); I != E; ++I) { + MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I); + enterIntvAtEnd(MBB, *Loop->getHeader()); + } + + // Switch all loop blocks. + for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Loop.begin(), + E = Blocks.Loop.end(); I != E; ++I) + useIntv(**I); + + // Insert back copies in the exit blocks. + for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Exits.begin(), + E = Blocks.Exits.end(); I != E; ++I) { + MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I); + leaveIntvAtTop(MBB); + } + + // Done. + closeIntv(); + rewrite(); + return dupli_; +} + + +//===----------------------------------------------------------------------===// +// Single Block Splitting +//===----------------------------------------------------------------------===// + +/// splitSingleBlocks - Split curli into a separate live interval inside each +/// basic block in Blocks. Return true if curli has been completely replaced, +/// false if curli is still intact, and needs to be spilled or split further. +bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { + DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n"); + // Determine the first and last instruction using curli in each block. + typedef std::pair<SlotIndex,SlotIndex> IndexPair; + typedef DenseMap<const MachineBasicBlock*,IndexPair> IndexPairMap; + IndexPairMap MBBRange; + for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(), + E = sa_.usingInstrs_.end(); I != E; ++I) { + const MachineBasicBlock *MBB = (*I)->getParent(); + if (!Blocks.count(MBB)) + continue; + SlotIndex Idx = lis_.getInstructionIndex(*I); + DEBUG(dbgs() << " BB#" << MBB->getNumber() << '\t' << Idx << '\t' << **I); + IndexPair &IP = MBBRange[MBB]; + if (!IP.first.isValid() || Idx < IP.first) + IP.first = Idx; + if (!IP.second.isValid() || Idx > IP.second) + IP.second = Idx; + } + + // Create a new interval for each block. + for (SplitAnalysis::BlockPtrSet::const_iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) { + IndexPair &IP = MBBRange[*I]; + DEBUG(dbgs() << " splitting for BB#" << (*I)->getNumber() << ": [" + << IP.first << ';' << IP.second << ")\n"); + assert(IP.first.isValid() && IP.second.isValid()); + + openIntv(); + enterIntvBefore(IP.first); + useIntv(IP.first.getBaseIndex(), IP.second.getBoundaryIndex()); + leaveIntvAfter(IP.second); + closeIntv(); + } + rewrite(); + return dupli_; +} + + +//===----------------------------------------------------------------------===// +// Sub Block Splitting +//===----------------------------------------------------------------------===// + +/// getBlockForInsideSplit - If curli is contained inside a single basic block, +/// and it wou pay to subdivide the interval inside that block, return it. +/// Otherwise return NULL. The returned block can be passed to +/// SplitEditor::splitInsideBlock. +const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() { + // The interval must be exclusive to one block. + if (usingBlocks_.size() != 1) + return 0; + // Don't to this for less than 4 instructions. We want to be sure that + // splitting actually reduces the instruction count per interval. + if (usingInstrs_.size() < 4) + return 0; + return usingBlocks_.begin()->first; +} + +/// splitInsideBlock - Split curli into multiple intervals inside MBB. Return +/// true if curli has been completely replaced, false if curli is still +/// intact, and needs to be spilled or split further. +bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) { + SmallVector<SlotIndex, 32> Uses; + Uses.reserve(sa_.usingInstrs_.size()); + for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(), + E = sa_.usingInstrs_.end(); I != E; ++I) + if ((*I)->getParent() == MBB) + Uses.push_back(lis_.getInstructionIndex(*I)); + DEBUG(dbgs() << " splitInsideBlock BB#" << MBB->getNumber() << " for " + << Uses.size() << " instructions.\n"); + assert(Uses.size() >= 3 && "Need at least 3 instructions"); + array_pod_sort(Uses.begin(), Uses.end()); + + // Simple algorithm: Find the largest gap between uses as determined by slot + // indices. Create new intervals for instructions before the gap and after the + // gap. + unsigned bestPos = 0; + int bestGap = 0; + DEBUG(dbgs() << " dist (" << Uses[0]); + for (unsigned i = 1, e = Uses.size(); i != e; ++i) { + int g = Uses[i-1].distance(Uses[i]); + DEBUG(dbgs() << ") -" << g << "- (" << Uses[i]); + if (g > bestGap) + bestPos = i, bestGap = g; + } + DEBUG(dbgs() << "), best: -" << bestGap << "-\n"); + + // bestPos points to the first use after the best gap. + assert(bestPos > 0 && "Invalid gap"); + + // FIXME: Don't create intervals for low densities. + + // First interval before the gap. Don't create single-instr intervals. + if (bestPos > 1) { + openIntv(); + enterIntvBefore(Uses.front()); + useIntv(Uses.front().getBaseIndex(), Uses[bestPos-1].getBoundaryIndex()); + leaveIntvAfter(Uses[bestPos-1]); + closeIntv(); + } + + // Second interval after the gap. + if (bestPos < Uses.size()-1) { + openIntv(); + enterIntvBefore(Uses[bestPos]); + useIntv(Uses[bestPos].getBaseIndex(), Uses.back().getBoundaryIndex()); + leaveIntvAfter(Uses.back()); + closeIntv(); + } + + rewrite(); + return dupli_; +} diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h new file mode 100644 index 0000000..ddef746 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -0,0 +1,321 @@ +//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SplitAnalysis class as well as mutator functions for +// live range splitting. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SlotIndexes.h" + +namespace llvm { + +class LiveInterval; +class LiveIntervals; +class MachineInstr; +class MachineLoop; +class MachineLoopInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class VirtRegMap; +class VNInfo; + +/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting +/// opportunities. +class SplitAnalysis { +public: + const MachineFunction &mf_; + const LiveIntervals &lis_; + const MachineLoopInfo &loops_; + const TargetInstrInfo &tii_; + + // Instructions using the the current register. + typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet; + InstrPtrSet usingInstrs_; + + // The number of instructions using curli in each basic block. + typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap; + BlockCountMap usingBlocks_; + + // The number of basic block using curli in each loop. + typedef DenseMap<const MachineLoop*, unsigned> LoopCountMap; + LoopCountMap usingLoops_; + +private: + // Current live interval. + const LiveInterval *curli_; + + // Sumarize statistics by counting instructions using curli_. + void analyzeUses(); + + /// canAnalyzeBranch - Return true if MBB ends in a branch that can be + /// analyzed. + bool canAnalyzeBranch(const MachineBasicBlock *MBB); + +public: + SplitAnalysis(const MachineFunction &mf, const LiveIntervals &lis, + const MachineLoopInfo &mli); + + /// analyze - set curli to the specified interval, and analyze how it may be + /// split. + void analyze(const LiveInterval *li); + + /// removeUse - Update statistics by noting that mi no longer uses curli. + void removeUse(const MachineInstr *mi); + + const LiveInterval *getCurLI() { return curli_; } + + /// clear - clear all data structures so SplitAnalysis is ready to analyze a + /// new interval. + void clear(); + + typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; + typedef SmallPtrSet<const MachineLoop*, 16> LoopPtrSet; + + // Sets of basic blocks surrounding a machine loop. + struct LoopBlocks { + BlockPtrSet Loop; // Blocks in the loop. + BlockPtrSet Preds; // Loop predecessor blocks. + BlockPtrSet Exits; // Loop exit blocks. + + void clear() { + Loop.clear(); + Preds.clear(); + Exits.clear(); + } + }; + + // Calculate the block sets surrounding the loop. + void getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks); + + /// LoopPeripheralUse - how is a variable used in and around a loop? + /// Peripheral blocks are the loop predecessors and exit blocks. + enum LoopPeripheralUse { + ContainedInLoop, // All uses are inside the loop. + SinglePeripheral, // At most one instruction per peripheral block. + MultiPeripheral, // Multiple instructions in some peripheral blocks. + OutsideLoop // Uses outside loop periphery. + }; + + /// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in + /// and around the Loop. + LoopPeripheralUse analyzeLoopPeripheralUse(const LoopBlocks&); + + /// getCriticalExits - It may be necessary to partially break critical edges + /// leaving the loop if an exit block has phi uses of curli. Collect the exit + /// blocks that need special treatment into CriticalExits. + void getCriticalExits(const LoopBlocks &Blocks, BlockPtrSet &CriticalExits); + + /// canSplitCriticalExits - Return true if it is possible to insert new exit + /// blocks before the blocks in CriticalExits. + bool canSplitCriticalExits(const LoopBlocks &Blocks, + BlockPtrSet &CriticalExits); + + /// getBestSplitLoop - Return the loop where curli may best be split to a + /// separate register, or NULL. + const MachineLoop *getBestSplitLoop(); + + /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from + /// having curli split to a new live interval. Return true if Blocks can be + /// passed to SplitEditor::splitSingleBlocks. + bool getMultiUseBlocks(BlockPtrSet &Blocks); + + /// getBlockForInsideSplit - If curli is contained inside a single basic block, + /// and it wou pay to subdivide the interval inside that block, return it. + /// Otherwise return NULL. The returned block can be passed to + /// SplitEditor::splitInsideBlock. + const MachineBasicBlock *getBlockForInsideSplit(); +}; + + +/// LiveIntervalMap - Map values from a large LiveInterval into a small +/// interval that is a subset. Insert phi-def values as needed. This class is +/// used by SplitEditor to create new smaller LiveIntervals. +/// +/// parentli_ is the larger interval, li_ is the subset interval. Every value +/// in li_ corresponds to exactly one value in parentli_, and the live range +/// of the value is contained within the live range of the parentli_ value. +/// Values in parentli_ may map to any number of openli_ values, including 0. +class LiveIntervalMap { + LiveIntervals &lis_; + + // The parent interval is never changed. + const LiveInterval &parentli_; + + // The child interval's values are fully contained inside parentli_ values. + LiveInterval &li_; + + typedef DenseMap<const VNInfo*, VNInfo*> ValueMap; + + // Map parentli_ values to simple values in li_ that are defined at the same + // SlotIndex, or NULL for parentli_ values that have complex li_ defs. + // Note there is a difference between values mapping to NULL (complex), and + // values not present (unknown/unmapped). + ValueMap valueMap_; + + // extendTo - Find the last li_ value defined in MBB at or before Idx. The + // parentli_ is assumed to be live at Idx. Extend the live range to Idx. + // Return the found VNInfo, or NULL. + VNInfo *extendTo(MachineBasicBlock *MBB, SlotIndex Idx); + + // addSimpleRange - Add a simple range from parentli_ to li_. + // ParentVNI must be live in the [Start;End) interval. + void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI); + +public: + LiveIntervalMap(LiveIntervals &lis, + const LiveInterval &parentli, + LiveInterval &li) + : lis_(lis), parentli_(parentli), li_(li) {} + + /// defValue - define a value in li_ from the parentli_ value VNI and Idx. + /// Idx does not have to be ParentVNI->def, but it must be contained within + /// ParentVNI's live range in parentli_. + /// Return the new li_ value. + VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx); + + /// mapValue - map ParentVNI to the corresponding li_ value at Idx. It is + /// assumed that ParentVNI is live at Idx. + /// If ParentVNI has not been defined by defValue, it is assumed that + /// ParentVNI->def dominates Idx. + /// If ParentVNI has been defined by defValue one or more times, a value that + /// dominates Idx will be returned. This may require creating extra phi-def + /// values and adding live ranges to li_. + VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx); + + /// addRange - Add live ranges to li_ where [Start;End) intersects parentli_. + /// All needed values whose def is not inside [Start;End) must be defined + /// beforehand so mapValue will work. + void addRange(SlotIndex Start, SlotIndex End); +}; + + +/// SplitEditor - Edit machine code and LiveIntervals for live range +/// splitting. +/// +/// - Create a SplitEditor from a SplitAnalysis. +/// - Start a new live interval with openIntv. +/// - Mark the places where the new interval is entered using enterIntv* +/// - Mark the ranges where the new interval is used with useIntv* +/// - Mark the places where the interval is exited with exitIntv*. +/// - Finish the current interval with closeIntv and repeat from 2. +/// - Rewrite instructions with rewrite(). +/// +class SplitEditor { + SplitAnalysis &sa_; + LiveIntervals &lis_; + VirtRegMap &vrm_; + MachineRegisterInfo &mri_; + const TargetInstrInfo &tii_; + + /// curli_ - The immutable interval we are currently splitting. + const LiveInterval *const curli_; + + /// dupli_ - Created as a copy of curli_, ranges are carved out as new + /// intervals get added through openIntv / closeIntv. This is used to avoid + /// editing curli_. + LiveInterval *dupli_; + + /// Currently open LiveInterval. + LiveInterval *openli_; + + /// createInterval - Create a new virtual register and LiveInterval with same + /// register class and spill slot as curli. + LiveInterval *createInterval(); + + /// getDupLI - Ensure dupli is created and return it. + LiveInterval *getDupLI(); + + /// valueMap_ - Map values in cupli to values in openli. These are direct 1-1 + /// mappings, and do not include values created by inserted copies. + DenseMap<const VNInfo*, VNInfo*> valueMap_; + + /// mapValue - Return the openIntv value that corresponds to the given curli + /// value. + VNInfo *mapValue(const VNInfo *curliVNI); + + /// A dupli value is live through openIntv. + bool liveThrough_; + + /// All the new intervals created for this split are added to intervals_. + SmallVectorImpl<LiveInterval*> &intervals_; + + /// The index into intervals_ of the first interval we added. There may be + /// others from before we got it. + unsigned firstInterval; + + /// Insert a COPY instruction curli -> li. Allocate a new value from li + /// defined by the COPY + VNInfo *insertCopy(LiveInterval &LI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I); + +public: + /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. + /// Newly created intervals will be appended to newIntervals. + SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, + SmallVectorImpl<LiveInterval*> &newIntervals); + + /// getAnalysis - Get the corresponding analysis. + SplitAnalysis &getAnalysis() { return sa_; } + + /// Create a new virtual register and live interval. + void openIntv(); + + /// enterIntvBefore - Enter openli before the instruction at Idx. If curli is + /// not live before Idx, a COPY is not inserted. + void enterIntvBefore(SlotIndex Idx); + + /// enterIntvAtEnd - Enter openli at the end of MBB. + /// PhiMBB is a successor inside openli where a PHI value is created. + /// Currently, all entries must share the same PhiMBB. + void enterIntvAtEnd(MachineBasicBlock &MBB, MachineBasicBlock &PhiMBB); + + /// useIntv - indicate that all instructions in MBB should use openli. + void useIntv(const MachineBasicBlock &MBB); + + /// useIntv - indicate that all instructions in range should use openli. + void useIntv(SlotIndex Start, SlotIndex End); + + /// leaveIntvAfter - Leave openli after the instruction at Idx. + void leaveIntvAfter(SlotIndex Idx); + + /// leaveIntvAtTop - Leave the interval at the top of MBB. + /// Currently, only one value can leave the interval. + void leaveIntvAtTop(MachineBasicBlock &MBB); + + /// closeIntv - Indicate that we are done editing the currently open + /// LiveInterval, and ranges can be trimmed. + void closeIntv(); + + /// rewrite - after all the new live ranges have been created, rewrite + /// instructions using curli to use the new intervals. + void rewrite(); + + // ===--- High level methods ---=== + + /// splitAroundLoop - Split curli into a separate live interval inside + /// the loop. Return true if curli has been completely replaced, false if + /// curli is still intact, and needs to be spilled or split further. + bool splitAroundLoop(const MachineLoop*); + + /// splitSingleBlocks - Split curli into a separate live interval inside each + /// basic block in Blocks. Return true if curli has been completely replaced, + /// false if curli is still intact, and needs to be spilled or split further. + bool splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); + + /// splitInsideBlock - Split curli into multiple intervals inside MBB. Return + /// true if curli has been completely replaced, false if curli is still + /// intact, and needs to be spilled or split further. + bool splitInsideBlock(const MachineBasicBlock *); +}; + +} diff --git a/contrib/llvm/lib/CodeGen/Splitter.cpp b/contrib/llvm/lib/CodeGen/Splitter.cpp new file mode 100644 index 0000000..38f3b1f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Splitter.cpp @@ -0,0 +1,817 @@ +//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loopsplitter" + +#include "Splitter.h" + +#include "SimpleRegisterCoalescing.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +char LoopSplitter::ID = 0; +INITIALIZE_PASS(LoopSplitter, "loop-splitting", + "Split virtual regists across loop boundaries.", false, false); + +namespace llvm { + + class StartSlotComparator { + public: + StartSlotComparator(LiveIntervals &lis) : lis(lis) {} + bool operator()(const MachineBasicBlock *mbb1, + const MachineBasicBlock *mbb2) const { + return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2); + } + private: + LiveIntervals &lis; + }; + + class LoopSplit { + public: + LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop) + : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) { + assert(TargetRegisterInfo::isVirtualRegister(li.reg) && + "Cannot split physical registers."); + } + + LiveInterval& getLI() const { return li; } + + MachineLoop& getLoop() const { return loop; } + + bool isValid() const { return valid; } + + bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); } + + void invalidate() { valid = false; } + + void splitIncoming() { inSplit = true; } + + void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); } + + void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); } + + void apply() { + assert(valid && "Attempt to apply invalid split."); + applyIncoming(); + applyOutgoing(); + copyRanges(); + renameInside(); + } + + private: + LoopSplitter &ls; + LiveInterval &li; + MachineLoop &loop; + bool valid, inSplit; + std::set<MachineLoop::Edge> outSplits; + std::vector<MachineInstr*> loopInstrs; + + LiveInterval *newLI; + std::map<VNInfo*, VNInfo*> vniMap; + + LiveInterval* getNewLI() { + if (newLI == 0) { + const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg); + unsigned vreg = ls.mri->createVirtualRegister(trc); + newLI = &ls.lis->getOrCreateInterval(vreg); + } + return newLI; + } + + VNInfo* getNewVNI(VNInfo *oldVNI) { + VNInfo *newVNI = vniMap[oldVNI]; + + if (newVNI == 0) { + newVNI = getNewLI()->createValueCopy(oldVNI, + ls.lis->getVNInfoAllocator()); + vniMap[oldVNI] = newVNI; + } + + return newVNI; + } + + void applyIncoming() { + if (!inSplit) { + return; + } + + MachineBasicBlock *preHeader = loop.getLoopPreheader(); + if (preHeader == 0) { + assert(ls.canInsertPreHeader(loop) && + "Can't insert required preheader."); + preHeader = &ls.insertPreHeader(loop); + } + + LiveRange *preHeaderRange = + ls.lis->findExitingRange(li, preHeader); + assert(preHeaderRange != 0 && "Range not live into preheader."); + + // Insert the new copy. + MachineInstr *copy = BuildMI(*preHeader, + preHeader->getFirstTerminator(), + DebugLoc(), + ls.tii->get(TargetOpcode::COPY)) + .addReg(getNewLI()->reg, RegState::Define) + .addReg(li.reg, RegState::Kill); + + ls.lis->InsertMachineInstrInMaps(copy); + + SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); + + VNInfo *newVal = getNewVNI(preHeaderRange->valno); + newVal->def = copyDefIdx; + newVal->setCopy(copy); + newVal->setIsDefAccurate(true); + li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true); + + getNewLI()->addRange(LiveRange(copyDefIdx, + ls.lis->getMBBEndIdx(preHeader), + newVal)); + } + + void applyOutgoing() { + + for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(), + osEnd = outSplits.end(); + osItr != osEnd; ++osItr) { + MachineLoop::Edge edge = *osItr; + MachineBasicBlock *outBlock = edge.second; + if (ls.isCriticalEdge(edge)) { + assert(ls.canSplitEdge(edge) && "Unsplitable critical edge."); + outBlock = &ls.splitEdge(edge, loop); + } + LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock); + assert(outRange != 0 && "No exiting range?"); + + MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(), + DebugLoc(), + ls.tii->get(TargetOpcode::COPY)) + .addReg(li.reg, RegState::Define) + .addReg(getNewLI()->reg, RegState::Kill); + + ls.lis->InsertMachineInstrInMaps(copy); + + SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); + + // Blow away output range definition. + outRange->valno->def = ls.lis->getInvalidIndex(); + outRange->valno->setIsDefAccurate(false); + li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx); + + VNInfo *newVal = + getNewLI()->getNextValue(SlotIndex(ls.lis->getMBBStartIdx(outBlock), + true), + 0, false, ls.lis->getVNInfoAllocator()); + + getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock), + copyDefIdx, newVal)); + + } + } + + void copyRange(LiveRange &lr) { + std::pair<bool, LoopSplitter::SlotPair> lsr = + ls.getLoopSubRange(lr, loop); + + if (!lsr.first) + return; + + LiveRange loopRange(lsr.second.first, lsr.second.second, + getNewVNI(lr.valno)); + + li.removeRange(loopRange.start, loopRange.end, true); + + getNewLI()->addRange(loopRange); + } + + void copyRanges() { + for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(), + iEnd = loopInstrs.end(); + iItr != iEnd; ++iItr) { + MachineInstr &instr = **iItr; + SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr); + if (instr.modifiesRegister(li.reg, 0)) { + LiveRange *defRange = + li.getLiveRangeContaining(instrIdx.getDefIndex()); + if (defRange != 0) // May have caught this already. + copyRange(*defRange); + } + if (instr.readsRegister(li.reg, 0)) { + LiveRange *useRange = + li.getLiveRangeContaining(instrIdx.getUseIndex()); + if (useRange != 0) { // May have caught this already. + copyRange(*useRange); + } + } + } + + for (MachineLoop::block_iterator bbItr = loop.block_begin(), + bbEnd = loop.block_end(); + bbItr != bbEnd; ++bbItr) { + MachineBasicBlock &loopBlock = **bbItr; + LiveRange *enteringRange = + ls.lis->findEnteringRange(li, &loopBlock); + if (enteringRange != 0) { + copyRange(*enteringRange); + } + } + } + + void renameInside() { + for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(), + iEnd = loopInstrs.end(); + iItr != iEnd; ++iItr) { + MachineInstr &instr = **iItr; + for (unsigned i = 0; i < instr.getNumOperands(); ++i) { + MachineOperand &mop = instr.getOperand(i); + if (mop.isReg() && mop.getReg() == li.reg) { + mop.setReg(getNewLI()->reg); + } + } + } + } + + }; + + void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<MachineDominatorTree>(); + au.addPreserved<MachineDominatorTree>(); + au.addRequired<MachineLoopInfo>(); + au.addPreserved<MachineLoopInfo>(); + au.addPreserved<RegisterCoalescer>(); + au.addPreserved<CalculateSpillWeights>(); + au.addPreserved<LiveStacks>(); + au.addRequired<SlotIndexes>(); + au.addPreserved<SlotIndexes>(); + au.addRequired<LiveIntervals>(); + au.addPreserved<LiveIntervals>(); + MachineFunctionPass::getAnalysisUsage(au); + } + + bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) { + + mf = &fn; + mri = &mf->getRegInfo(); + tii = mf->getTarget().getInstrInfo(); + tri = mf->getTarget().getRegisterInfo(); + sis = &getAnalysis<SlotIndexes>(); + lis = &getAnalysis<LiveIntervals>(); + mli = &getAnalysis<MachineLoopInfo>(); + mdt = &getAnalysis<MachineDominatorTree>(); + + fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + + mf->getFunction()->getName().str(); + + dbgs() << "Splitting " << mf->getFunction()->getName() << "."; + + dumpOddTerminators(); + +// dbgs() << "----------------------------------------\n"; +// lis->dump(); +// dbgs() << "----------------------------------------\n"; + +// std::deque<MachineLoop*> loops; +// std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); +// dbgs() << "Loops:\n"; +// while (!loops.empty()) { +// MachineLoop &loop = *loops.front(); +// loops.pop_front(); +// std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); + +// dumpLoopInfo(loop); +// } + + //lis->dump(); + //exit(0); + + // Setup initial intervals. + for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval *li = liItr->second; + + if (TargetRegisterInfo::isVirtualRegister(li->reg) && + !lis->intervalIsInOneMBB(*li)) { + intervals.push_back(li); + } + } + + processIntervals(); + + intervals.clear(); + +// dbgs() << "----------------------------------------\n"; +// lis->dump(); +// dbgs() << "----------------------------------------\n"; + + dumpOddTerminators(); + + //exit(1); + + return false; + } + + void LoopSplitter::releaseMemory() { + fqn.clear(); + intervals.clear(); + loopRangeMap.clear(); + } + + void LoopSplitter::dumpOddTerminators() { + for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end(); + bbItr != bbEnd; ++bbItr) { + MachineBasicBlock *mbb = &*bbItr; + MachineBasicBlock *a = 0, *b = 0; + SmallVector<MachineOperand, 4> c; + if (tii->AnalyzeBranch(*mbb, a, b, c)) { + dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n"; + dbgs() << " Terminators:\n"; + for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end(); + iItr != iEnd; ++iItr) { + MachineInstr *instr= &*iItr; + dbgs() << " " << *instr << ""; + } + dbgs() << "\n Listed successors: [ "; + for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end(); + sItr != sEnd; ++sItr) { + MachineBasicBlock *succMBB = *sItr; + dbgs() << succMBB->getNumber() << " "; + } + dbgs() << "]\n\n"; + } + } + } + + void LoopSplitter::dumpLoopInfo(MachineLoop &loop) { + MachineBasicBlock &headerBlock = *loop.getHeader(); + typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList; + ExitEdgesList exitEdges; + loop.getExitEdges(exitEdges); + + dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ "; + for (std::vector<MachineBasicBlock*>::const_iterator + subBlockItr = loop.getBlocks().begin(), + subBlockEnd = loop.getBlocks().end(); + subBlockItr != subBlockEnd; ++subBlockItr) { + MachineBasicBlock &subBlock = **subBlockItr; + dbgs() << "BB#" << subBlock.getNumber() << " "; + } + dbgs() << "], Exit edges: [ "; + for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), + exitEdgeEnd = exitEdges.end(); + exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { + MachineLoop::Edge &exitEdge = *exitEdgeItr; + dbgs() << "(MBB#" << exitEdge.first->getNumber() + << ", MBB#" << exitEdge.second->getNumber() << ") "; + } + dbgs() << "], Sub-Loop Headers: [ "; + for (MachineLoop::iterator subLoopItr = loop.begin(), + subLoopEnd = loop.end(); + subLoopItr != subLoopEnd; ++subLoopItr) { + MachineLoop &subLoop = **subLoopItr; + MachineBasicBlock &subLoopBlock = *subLoop.getHeader(); + dbgs() << "BB#" << subLoopBlock.getNumber() << " "; + } + dbgs() << "]\n"; + } + + void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) { + mbb.updateTerminator(); + + for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end(); + miItr != miEnd; ++miItr) { + if (lis->isNotInMIMap(miItr)) { + lis->InsertMachineInstrInMaps(miItr); + } + } + } + + bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) { + MachineBasicBlock *header = loop.getHeader(); + MachineBasicBlock *a = 0, *b = 0; + SmallVector<MachineOperand, 4> c; + + for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(), + pbEnd = header->pred_end(); + pbItr != pbEnd; ++pbItr) { + MachineBasicBlock *predBlock = *pbItr; + if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) { + return false; + } + } + + MachineFunction::iterator headerItr(header); + if (headerItr == mf->begin()) + return true; + MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr); + assert(headerLayoutPred != 0 && "Header should have layout pred."); + + return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c)); + } + + MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) { + assert(loop.getLoopPreheader() == 0 && "Loop already has preheader."); + + MachineBasicBlock &header = *loop.getHeader(); + + // Save the preds - we'll need to update them once we insert the preheader. + typedef std::set<MachineBasicBlock*> HeaderPreds; + HeaderPreds headerPreds; + + for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), + predEnd = header.pred_end(); + predItr != predEnd; ++predItr) { + if (!loop.contains(*predItr)) + headerPreds.insert(*predItr); + } + + assert(!headerPreds.empty() && "No predecessors for header?"); + + //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader..."; + + MachineBasicBlock *preHeader = + mf->CreateMachineBasicBlock(header.getBasicBlock()); + + assert(preHeader != 0 && "Failed to create pre-header."); + + mf->insert(header, preHeader); + + for (HeaderPreds::iterator hpItr = headerPreds.begin(), + hpEnd = headerPreds.end(); + hpItr != hpEnd; ++hpItr) { + assert(*hpItr != 0 && "How'd a null predecessor get into this set?"); + MachineBasicBlock &hp = **hpItr; + hp.ReplaceUsesOfBlockWith(&header, preHeader); + } + preHeader->addSuccessor(&header); + + MachineBasicBlock *oldLayoutPred = + llvm::prior(MachineFunction::iterator(preHeader)); + if (oldLayoutPred != 0) { + updateTerminators(*oldLayoutPred); + } + + lis->InsertMBBInMaps(preHeader); + + if (MachineLoop *parentLoop = loop.getParentLoop()) { + assert(parentLoop->getHeader() != loop.getHeader() && + "Parent loop has same header?"); + parentLoop->addBasicBlockToLoop(preHeader, mli->getBase()); + + // Invalidate all parent loop ranges. + while (parentLoop != 0) { + loopRangeMap.erase(parentLoop); + parentLoop = parentLoop->getParentLoop(); + } + } + + for (LiveIntervals::iterator liItr = lis->begin(), + liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval &li = *liItr->second; + + // Is this safe for physregs? + // TargetRegisterInfo::isPhysicalRegister(li.reg) || + if (!lis->isLiveInToMBB(li, &header)) + continue; + + if (lis->isLiveInToMBB(li, preHeader)) { + assert(lis->isLiveOutOfMBB(li, preHeader) && + "Range terminates in newly added preheader?"); + continue; + } + + bool insertRange = false; + + for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(), + predEnd = preHeader->pred_end(); + predItr != predEnd; ++predItr) { + MachineBasicBlock *predMBB = *predItr; + if (lis->isLiveOutOfMBB(li, predMBB)) { + insertRange = true; + break; + } + } + + if (!insertRange) + continue; + + VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(preHeader), + 0, false, lis->getVNInfoAllocator()); + li.addRange(LiveRange(lis->getMBBStartIdx(preHeader), + lis->getMBBEndIdx(preHeader), + newVal)); + } + + + //dbgs() << "Dumping SlotIndexes:\n"; + //sis->dump(); + + //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n"; + + return *preHeader; + } + + bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) { + assert(edge.first->succ_size() > 1 && "Non-sensical edge."); + if (edge.second->pred_size() > 1) + return true; + return false; + } + + bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) { + MachineFunction::iterator outBlockItr(edge.second); + if (outBlockItr == mf->begin()) + return true; + MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr); + assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin."); + MachineBasicBlock *a = 0, *b = 0; + SmallVector<MachineOperand, 4> c; + return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) && + !tii->AnalyzeBranch(*edge.first, a, b, c)); + } + + MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge, + MachineLoop &loop) { + + MachineBasicBlock &inBlock = *edge.first; + MachineBasicBlock &outBlock = *edge.second; + + assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) && + "Splitting non-critical edge?"); + + //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber() + // << " -> MBB#" << outBlock.getNumber() << ")..."; + + MachineBasicBlock *splitBlock = + mf->CreateMachineBasicBlock(); + + assert(splitBlock != 0 && "Failed to create split block."); + + mf->insert(&outBlock, splitBlock); + + inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock); + splitBlock->addSuccessor(&outBlock); + + MachineBasicBlock *oldLayoutPred = + llvm::prior(MachineFunction::iterator(splitBlock)); + if (oldLayoutPred != 0) { + updateTerminators(*oldLayoutPred); + } + + lis->InsertMBBInMaps(splitBlock); + + loopRangeMap.erase(&loop); + + MachineLoop *splitParentLoop = loop.getParentLoop(); + while (splitParentLoop != 0 && + !splitParentLoop->contains(&outBlock)) { + splitParentLoop = splitParentLoop->getParentLoop(); + } + + if (splitParentLoop != 0) { + assert(splitParentLoop->contains(&loop) && + "Split-block parent doesn't contain original loop?"); + splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase()); + + // Invalidate all parent loop ranges. + while (splitParentLoop != 0) { + loopRangeMap.erase(splitParentLoop); + splitParentLoop = splitParentLoop->getParentLoop(); + } + } + + + for (LiveIntervals::iterator liItr = lis->begin(), + liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval &li = *liItr->second; + bool intersects = lis->isLiveOutOfMBB(li, &inBlock) && + lis->isLiveInToMBB(li, &outBlock); + if (lis->isLiveInToMBB(li, splitBlock)) { + if (!intersects) { + li.removeRange(lis->getMBBStartIdx(splitBlock), + lis->getMBBEndIdx(splitBlock), true); + } + } else if (intersects) { + VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(splitBlock), + 0, false, lis->getVNInfoAllocator()); + li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock), + lis->getMBBEndIdx(splitBlock), + newVal)); + } + } + + //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n"; + + return *splitBlock; + } + + LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) { + typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet; + LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop); + if (lrItr == loopRangeMap.end()) { + LoopMBBSet loopMBBs((StartSlotComparator(*lis))); + std::copy(loop.block_begin(), loop.block_end(), + std::inserter(loopMBBs, loopMBBs.begin())); + + assert(!loopMBBs.empty() && "No blocks in loop?"); + + LoopRanges &loopRanges = loopRangeMap[&loop]; + assert(loopRanges.empty() && "Loop encountered but not processed?"); + SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin()); + loopRanges.push_back( + std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()), + lis->getInvalidIndex())); + for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()), + curBlockEnd = loopMBBs.end(); + curBlockItr != curBlockEnd; ++curBlockItr) { + SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr); + if (newStart != oldEnd) { + loopRanges.back().second = oldEnd; + loopRanges.push_back(std::make_pair(newStart, + lis->getInvalidIndex())); + } + oldEnd = lis->getMBBEndIdx(*curBlockItr); + } + + loopRanges.back().second = + lis->getMBBEndIdx(*llvm::prior(loopMBBs.end())); + + return loopRanges; + } + return lrItr->second; + } + + std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange( + const LiveRange &lr, + MachineLoop &loop) { + LoopRanges &loopRanges = getLoopRanges(loop); + LoopRanges::iterator lrItr = loopRanges.begin(), + lrEnd = loopRanges.end(); + while (lrItr != lrEnd && lr.start >= lrItr->second) { + ++lrItr; + } + + if (lrItr == lrEnd) { + SlotIndex invalid = lis->getInvalidIndex(); + return std::make_pair(false, SlotPair(invalid, invalid)); + } + + SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start); + SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end); + + return std::make_pair(true, SlotPair(srStart, srEnd)); + } + + void LoopSplitter::dumpLoopRanges(MachineLoop &loop) { + LoopRanges &loopRanges = getLoopRanges(loop); + dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ "; + for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end(); + lrItr != lrEnd; ++lrItr) { + dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") "; + } + dbgs() << "]\n"; + } + + void LoopSplitter::processHeader(LoopSplit &split) { + MachineBasicBlock &header = *split.getLoop().getHeader(); + //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n"; + + if (!lis->isLiveInToMBB(split.getLI(), &header)) + return; // Not live in, but nothing wrong so far. + + MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader(); + if (!preHeader) { + + if (!canInsertPreHeader(split.getLoop())) { + split.invalidate(); + return; // Couldn't insert a pre-header. Bail on this interval. + } + + for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), + predEnd = header.pred_end(); + predItr != predEnd; ++predItr) { + if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) { + split.splitIncoming(); + break; + } + } + } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) { + split.splitIncoming(); + } + } + + void LoopSplitter::processLoopExits(LoopSplit &split) { + typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList; + ExitEdgesList exitEdges; + split.getLoop().getExitEdges(exitEdges); + + //dbgs() << " Processing loop exits:\n"; + + for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), + exitEdgeEnd = exitEdges.end(); + exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { + MachineLoop::Edge exitEdge = *exitEdgeItr; + + LiveRange *outRange = + split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second)); + + if (outRange != 0) { + if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) { + split.invalidate(); + return; + } + + split.splitOutgoing(exitEdge); + } + } + } + + void LoopSplitter::processLoopUses(LoopSplit &split) { + std::set<MachineInstr*> processed; + + for (MachineRegisterInfo::reg_iterator + rItr = mri->reg_begin(split.getLI().reg), + rEnd = mri->reg_end(); + rItr != rEnd; ++rItr) { + MachineInstr &instr = *rItr; + if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) { + split.addLoopInstr(&instr); + processed.insert(&instr); + } + } + + //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg + // << " in blocks [ "; + //dbgs() << "]\n"; + } + + bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) { + assert(TargetRegisterInfo::isVirtualRegister(li.reg) && + "Attempt to split physical register."); + + LoopSplit split(*this, li, loop); + processHeader(split); + if (split.isValid()) + processLoopExits(split); + if (split.isValid()) + processLoopUses(split); + if (split.isValid() /* && split.isWorthwhile() */) { + split.apply(); + DEBUG(dbgs() << "Success.\n"); + return true; + } + DEBUG(dbgs() << "Failed.\n"); + return false; + } + + void LoopSplitter::processInterval(LiveInterval &li) { + std::deque<MachineLoop*> loops; + std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); + + while (!loops.empty()) { + MachineLoop &loop = *loops.front(); + loops.pop_front(); + DEBUG( + dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#" + << loop.getHeader()->getNumber() << " "; + ); + if (!splitOverLoop(li, loop)) { + // Couldn't split over outer loop, schedule sub-loops to be checked. + std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); + } + } + } + + void LoopSplitter::processIntervals() { + while (!intervals.empty()) { + LiveInterval &li = *intervals.front(); + intervals.pop_front(); + + assert(!lis->intervalIsInOneMBB(li) && + "Single interval in process worklist."); + + processInterval(li); + } + } + +} diff --git a/contrib/llvm/lib/CodeGen/Splitter.h b/contrib/llvm/lib/CodeGen/Splitter.h new file mode 100644 index 0000000..a726a7b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Splitter.h @@ -0,0 +1,99 @@ +//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SPLITTER_H +#define LLVM_CODEGEN_SPLITTER_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" + +#include <deque> +#include <map> +#include <string> +#include <vector> + +namespace llvm { + + class LiveInterval; + class LiveIntervals; + struct LiveRange; + class LoopSplit; + class MachineDominatorTree; + class MachineRegisterInfo; + class SlotIndexes; + class TargetInstrInfo; + class VNInfo; + + class LoopSplitter : public MachineFunctionPass { + friend class LoopSplit; + public: + static char ID; + + LoopSplitter() : MachineFunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + virtual bool runOnMachineFunction(MachineFunction &fn); + + virtual void releaseMemory(); + + + private: + + MachineFunction *mf; + LiveIntervals *lis; + MachineLoopInfo *mli; + MachineRegisterInfo *mri; + MachineDominatorTree *mdt; + SlotIndexes *sis; + const TargetInstrInfo *tii; + const TargetRegisterInfo *tri; + + std::string fqn; + std::deque<LiveInterval*> intervals; + + typedef std::pair<SlotIndex, SlotIndex> SlotPair; + typedef std::vector<SlotPair> LoopRanges; + typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap; + LoopRangeMap loopRangeMap; + + void dumpLoopInfo(MachineLoop &loop); + + void dumpOddTerminators(); + + void updateTerminators(MachineBasicBlock &mbb); + + bool canInsertPreHeader(MachineLoop &loop); + MachineBasicBlock& insertPreHeader(MachineLoop &loop); + + bool isCriticalEdge(MachineLoop::Edge &edge); + bool canSplitEdge(MachineLoop::Edge &edge); + MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop); + + LoopRanges& getLoopRanges(MachineLoop &loop); + std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr, + MachineLoop &loop); + + void dumpLoopRanges(MachineLoop &loop); + + void processHeader(LoopSplit &split); + void processLoopExits(LoopSplit &split); + void processLoopUses(LoopSplit &split); + + bool splitOverLoop(LiveInterval &li, MachineLoop &loop); + + void processInterval(LiveInterval &li); + + void processIntervals(); + }; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index ca5c28c..9f51778 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -62,17 +62,17 @@ namespace { bool RequiresStackProtector() const; public: static char ID; // Pass identification, replacement for typeid. - StackProtector() : FunctionPass(&ID), TLI(0) {} + StackProtector() : FunctionPass(ID), TLI(0) {} StackProtector(const TargetLowering *tli) - : FunctionPass(&ID), TLI(tli) {} + : FunctionPass(ID), TLI(tli) {} virtual bool runOnFunction(Function &Fn); }; } // end anonymous namespace char StackProtector::ID = 0; -static RegisterPass<StackProtector> -X("stack-protector", "Insert stack protectors"); +INITIALIZE_PASS(StackProtector, "stack-protector", + "Insert stack protectors", false, false); FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) { return new StackProtector(tli); diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index eff3c33..8d57ae9 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -95,9 +95,9 @@ namespace { public: static char ID; // Pass identification StackSlotColoring() : - MachineFunctionPass(&ID), ColorWithRegs(false), NextColor(-1) {} + MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {} StackSlotColoring(bool RegColor) : - MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {} + MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -119,7 +119,6 @@ namespace { private: void InitializeSlots(); - bool CheckForSetJmpCall(const MachineFunction &MF) const; void ScanForSpillSlotRefs(MachineFunction &MF); bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); @@ -146,8 +145,8 @@ namespace { char StackSlotColoring::ID = 0; -static RegisterPass<StackSlotColoring> -X("stack-slot-coloring", "Stack Slot Coloring"); +INITIALIZE_PASS(StackSlotColoring, "stack-slot-coloring", + "Stack Slot Coloring", false, false); FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) { return new StackSlotColoring(RegColor); diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp index 59315cf..894dbfa 100644 --- a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp @@ -39,7 +39,7 @@ using namespace llvm; namespace { struct StrongPHIElimination : public MachineFunctionPass { static char ID; // Pass identification, replacement for typeid - StrongPHIElimination() : MachineFunctionPass(&ID) {} + StrongPHIElimination() : MachineFunctionPass(ID) {} // Waiting stores, for each MBB, the set of copies that need to // be inserted into that MBB @@ -150,11 +150,10 @@ namespace { } char StrongPHIElimination::ID = 0; -static RegisterPass<StrongPHIElimination> -X("strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently"); +INITIALIZE_PASS(StrongPHIElimination, "strong-phi-node-elimination", + "Eliminate PHI nodes for register allocation, intelligently", false, false); -const PassInfo *const llvm::StrongPHIEliminationID = &X; +char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; /// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree /// of the given MachineFunction. These numbers are then used in other parts diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index 075db80..a815b36 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -69,7 +69,7 @@ namespace { public: static char ID; explicit TailDuplicatePass(bool PreRA) : - MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} + MachineFunctionPass(ID), PreRegAlloc(PreRA) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "Tail Duplication"; } @@ -254,14 +254,15 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { // SSA form. for (unsigned i = 0, e = Copies.size(); i != e; ++i) { MachineInstr *Copy = Copies[i]; - unsigned Src, Dst, SrcSR, DstSR; - if (TII->isMoveInstr(*Copy, Src, Dst, SrcSR, DstSR)) { - MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); - if (++UI == MRI->use_end()) { - // Copy is the only use. Do trivial copy propagation here. - MRI->replaceRegWith(Dst, Src); - Copy->eraseFromParent(); - } + if (!Copy->isCopy()) + continue; + unsigned Dst = Copy->getOperand(0).getReg(); + unsigned Src = Copy->getOperand(1).getReg(); + MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); + if (++UI == MRI->use_end()) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); } } diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp index cdacb98..6e4a0d8 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -178,19 +178,6 @@ MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig, return MF.CloneMachineInstr(Orig); } -unsigned -TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { - unsigned FnSize = 0; - for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - const MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); - I != E; ++I) - FnSize += GetInstSizeInBytes(I); - } - return FnSize; -} - // If the COPY instruction in MI can be folded to a stack operation, return // the register class to use. static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index a80cfc4..f1e10ee 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -519,11 +519,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, ConstTextCoalSection = getContext().getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); - ConstDataCoalSection - = getContext().getMachOSection("__DATA","__const_coal", - MCSectionMachO::S_COALESCED, - SectionKind::getText()); + SectionKind::getReadOnly()); ConstDataSection // .const_data = getContext().getMachOSection("__DATA", "__const", 0, SectionKind::getReadOnlyWithRel()); diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 5649143..78989c5 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -138,7 +138,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - TwoAddressInstructionPass() : MachineFunctionPass(&ID) {} + TwoAddressInstructionPass() : MachineFunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -159,10 +159,10 @@ namespace { } char TwoAddressInstructionPass::ID = 0; -static RegisterPass<TwoAddressInstructionPass> -X("twoaddressinstruction", "Two-Address instruction pass"); +INITIALIZE_PASS(TwoAddressInstructionPass, "twoaddressinstruction", + "Two-Address instruction pass", false, false); -const PassInfo *const llvm::TwoAddressInstructionPassID = &X; +char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; /// Sink3AddrInstruction - A two-address instruction has been converted to a /// three-address instruction to avoid clobbering a register. Try to sink it @@ -380,26 +380,18 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, bool &IsSrcPhys, bool &IsDstPhys) { SrcReg = 0; DstReg = 0; - unsigned SrcSubIdx, DstSubIdx; - if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (MI.isCopy()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - } else if (MI.isInsertSubreg()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - } else if (MI.isSubregToReg()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - } - } + if (MI.isCopy()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + } else if (MI.isInsertSubreg() || MI.isSubregToReg()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + } else + return false; - if (DstReg) { - IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - return true; - } - return false; + IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return true; } /// isKilled - Test if the given register value, which is used by the given @@ -1454,7 +1446,17 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { // // If the REG_SEQUENCE doesn't kill its source, keeping live variables // correctly up to date becomes very difficult. Insert a copy. - // + + // Defer any kill flag to the last operand using SrcReg. Otherwise, we + // might insert a COPY that uses SrcReg after is was killed. + if (isKill) + for (unsigned j = i + 2; j < e; j += 2) + if (MI->getOperand(j).getReg() == SrcReg) { + MI->getOperand(j).setIsKill(); + isKill = false; + break; + } + MachineBasicBlock::iterator InsertLoc = MI; MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp index 7b33812..6dd3333 100644 --- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -43,7 +43,7 @@ namespace { virtual bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - UnreachableBlockElim() : FunctionPass(&ID) {} + UnreachableBlockElim() : FunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<ProfileInfo>(); @@ -51,8 +51,8 @@ namespace { }; } char UnreachableBlockElim::ID = 0; -static RegisterPass<UnreachableBlockElim> -X("unreachableblockelim", "Remove unreachable blocks from the CFG"); +INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim", + "Remove unreachable blocks from the CFG", false, false); FunctionPass *llvm::createUnreachableBlockEliminationPass() { return new UnreachableBlockElim(); @@ -100,16 +100,15 @@ namespace { MachineModuleInfo *MMI; public: static char ID; // Pass identification, replacement for typeid - UnreachableMachineBlockElim() : MachineFunctionPass(&ID) {} + UnreachableMachineBlockElim() : MachineFunctionPass(ID) {} }; } char UnreachableMachineBlockElim::ID = 0; -static RegisterPass<UnreachableMachineBlockElim> -Y("unreachable-mbb-elimination", - "Remove unreachable machine basic blocks"); +INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination", + "Remove unreachable machine basic blocks", false, false); -const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y; +char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID; void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineLoopInfo>(); diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index ed02696..20ffcff 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -48,8 +48,7 @@ STATISTIC(NumSpills , "Number of register spills"); char VirtRegMap::ID = 0; -static RegisterPass<VirtRegMap> -X("virtregmap", "Virtual Register Map"); +INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false); bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { MRI = &mf.getRegInfo(); diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.h b/contrib/llvm/lib/CodeGen/VirtRegMap.h index a5599f6..8b6082d 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.h +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.h @@ -139,7 +139,7 @@ namespace llvm { public: static char ID; - VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG), + VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), Virt2StackSlotMap(NO_STACK_SLOT), Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0), Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL), @@ -152,6 +152,11 @@ namespace llvm { MachineFunctionPass::getAnalysisUsage(AU); } + MachineFunction &getMachineFunction() const { + assert(MF && "getMachineFunction called before runOnMAchineFunction"); + return *MF; + } + void grow(); /// @brief returns true if the specified virtual register is diff --git a/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp b/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp index 57a1500..240d28c 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp @@ -67,23 +67,16 @@ VirtRegRewriter::~VirtRegRewriter() {} /// Note that operands may be added, so the MO reference is no longer valid. static void substitutePhysReg(MachineOperand &MO, unsigned Reg, const TargetRegisterInfo &TRI) { - if (unsigned SubIdx = MO.getSubReg()) { - // Insert the physical subreg and reset the subreg field. - MO.setReg(TRI.getSubReg(Reg, SubIdx)); - MO.setSubReg(0); - - // Any def, dead, and kill flags apply to the full virtual register, so they - // also apply to the full physical register. Add imp-def/dead and imp-kill - // as needed. + if (MO.getSubReg()) { + MO.substPhysReg(Reg, TRI); + + // Any kill flags apply to the full virtual register, so they also apply to + // the full physical register. + // We assume that partial defs have already been decorated with a super-reg + // <imp-def> operand by LiveIntervals. MachineInstr &MI = *MO.getParent(); - if (MO.isDef()) - if (MO.isDead()) - MI.addRegisterDead(Reg, &TRI, /*AddIfNotFound=*/ true); - else - MI.addRegisterDefined(Reg, &TRI); - else if (!MO.isUndef() && - (MO.isKill() || - MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0)))) + if (MO.isUse() && !MO.isUndef() && + (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0)))) MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true); } else { MO.setReg(Reg); @@ -460,7 +453,7 @@ public: /// blocks each of which is a successor of the specified BB and has no other /// predecessor. static void findSinglePredSuccessor(MachineBasicBlock *MBB, - SmallVectorImpl<MachineBasicBlock *> &Succs) { + SmallVectorImpl<MachineBasicBlock *> &Succs){ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { MachineBasicBlock *SuccMBB = *SI; @@ -852,8 +845,8 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, // Yup, use the reload register that we didn't use before. unsigned NewReg = Op.AssignedPhysReg; Rejected.insert(PhysReg); - return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, Rejected, - RegKills, KillOps, VRM); + return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps, VRM); } else { // Otherwise, we might also have a problem if a previously reused // value aliases the new register. If so, codegen the previous reload @@ -1864,7 +1857,7 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) { /// rewriteMBB - Keep track of which spills are available even after the -/// register allocator is done with them. If possible, avid reloading vregs. +/// register allocator is done with them. If possible, avoid reloading vregs. void LocalRewriter::RewriteMBB(LiveIntervals *LIs, AvailableSpills &Spills, BitVector &RegKills, @@ -1914,7 +1907,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, if (InsertSpills(MII)) NextMII = llvm::next(MII); - VirtRegMap::MI2VirtMapTy::const_iterator I, End; bool Erased = false; bool BackTracked = false; MachineInstr &MI = *MII; @@ -2028,14 +2020,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, CanReuse = !ReusedOperands.isClobbered(PhysReg) && Spills.canClobberPhysReg(PhysReg); } - // If this is an asm, and PhysReg is used elsewhere as an earlyclobber - // operand, we can't also use it as an input. (Outputs always come - // before inputs, so we can stop looking at i.) + // If this is an asm, and a PhysReg alias is used elsewhere as an + // earlyclobber operand, we can't also use it as an input. if (MI.isInlineAsm()) { - for (unsigned k=0; k<i; ++k) { + for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) { MachineOperand &MOk = MI.getOperand(k); - if (MOk.isReg() && MOk.getReg()==PhysReg && MOk.isEarlyClobber()) { + if (MOk.isReg() && MOk.isEarlyClobber() && + TRI->regsOverlap(MOk.getReg(), PhysReg)) { CanReuse = false; + DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) + << " for vreg" << VirtReg << ": " << MOk << '\n'); break; } } @@ -2248,15 +2242,22 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // If we have folded references to memory operands, make sure we clear all // physical registers that may contain the value of the spilled virtual // register + + // Copy the folded virts to a small vector, we may change MI2VirtMap. + SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts; + // C++0x FTW! + for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator, + VirtRegMap::MI2VirtMapTy::const_iterator> FVRange = + VRM->getFoldedVirts(&MI); + FVRange.first != FVRange.second; ++FVRange.first) + FoldedVirts.push_back(FVRange.first->second); + SmallSet<int, 2> FoldedSS; - for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) { - unsigned VirtReg = I->second.first; - VirtRegMap::ModRef MR = I->second.second; + for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) { + unsigned VirtReg = FoldedVirts[FVI].first; + VirtRegMap::ModRef MR = FoldedVirts[FVI].second; DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR); - // MI2VirtMap be can updated which invalidate the iterator. - // Increment the iterator first. - ++I; int SS = VRM->getStackSlot(VirtReg); if (SS == VirtRegMap::NO_STACK_SLOT) continue; @@ -2302,7 +2303,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); SmallVector<MachineInstr*, 4> NewMIs; if (PhysReg && - TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) { + TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){ MBB->insert(MII, NewMIs[0]); InvalidateKills(MI, TRI, RegKills, KillOps); VRM->RemoveMachineInstrFromMaps(&MI); @@ -2442,28 +2443,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, Spills.disallowClobberPhysReg(VirtReg); goto ProcessNextInst; } - unsigned Src, Dst, SrcSR, DstSR; - if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && - Src == Dst && SrcSR == DstSR && - !MI.findRegisterUseOperand(Src)->isUndef()) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - SmallVector<unsigned, 2> KillRegs; - InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); - if (MO.isDead() && !KillRegs.empty()) { - // Source register or an implicit super/sub-register use is killed. - assert(KillRegs[0] == Dst || - TRI->isSubRegister(KillRegs[0], Dst) || - TRI->isSuperRegister(KillRegs[0], Dst)); - // Last def is now dead. - TransferDeadness(Src, RegKills, KillOps); - } - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); - Erased = true; - Spills.disallowClobberPhysReg(VirtReg); - goto ProcessNextInst; - } // If it's not a no-op copy, it clobbers the value in the destreg. Spills.ClobberPhysReg(VirtReg); @@ -2541,20 +2520,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, UpdateKills(*LastStore, TRI, RegKills, KillOps); goto ProcessNextInst; } - { - unsigned Src, Dst, SrcSR, DstSR; - if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && - Src == Dst && SrcSR == DstSR) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - InvalidateKills(MI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); - Erased = true; - UpdateKills(*LastStore, TRI, RegKills, KillOps); - goto ProcessNextInst; - } - } } } ProcessNextInst: |