diff options
Diffstat (limited to 'lib/CodeGen')
55 files changed, 2604 insertions, 1019 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 8e3f8e7..bb61682 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -38,16 +38,19 @@ DebugMod("agg-antidep-debugmod", cl::desc("Debug control for aggressive anti-dep breaker"), cl::init(0), cl::Hidden); -AggressiveAntiDepState::AggressiveAntiDepState(MachineBasicBlock *BB) : - GroupNodes(TargetRegisterInfo::FirstVirtualRegister, 0) { - // Initialize all registers to be in their own group. Initially we - // assign the register to the same-indexed GroupNode. - for (unsigned i = 0; i < TargetRegisterInfo::FirstVirtualRegister; ++i) +AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, + MachineBasicBlock *BB) : + NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0) { + + const unsigned BBSize = BB->size(); + for (unsigned i = 0; i < NumTargetRegs; ++i) { + // Initialize all registers to be in their own group. Initially we + // assign the register to the same-indexed GroupNode. GroupNodeIndices[i] = i; - - // Initialize the indices to indicate that no registers are live. - std::fill(KillIndices, array_endof(KillIndices), ~0u); - std::fill(DefIndices, array_endof(DefIndices), BB->size()); + // Initialize the indices to indicate that no registers are live. + KillIndices[i] = ~0u; + DefIndices[i] = BBSize; + } } unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) @@ -64,7 +67,7 @@ void AggressiveAntiDepState::GetGroupRegs( std::vector<unsigned> &Regs, std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs) { - for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) { + for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) { if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0)) Regs.push_back(Reg); } @@ -137,7 +140,7 @@ AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { assert(State == NULL); - State = new AggressiveAntiDepState(BB); + State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); unsigned *KillIndices = State->GetKillIndices(); @@ -220,7 +223,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, DEBUG(errs() << "\tRegs:"); unsigned *DefIndices = State->GetDefIndices(); - for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) { + for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { // If Reg is current live, then mark that it can't be renamed as // we don't know the extent of its live-range anymore (now that it // has been scheduled). If it is not live but was defined in the diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 8154d2d..d385a21 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -44,6 +44,10 @@ namespace llvm { } RegisterReference; private: + /// NumTargetRegs - Number of non-virtual target registers + /// (i.e. TRI->getNumRegs()). + const unsigned NumTargetRegs; + /// GroupNodes - Implements a disjoint-union data structure to /// form register groups. A node is represented by an index into /// the vector. A node can "point to" itself to indicate that it @@ -69,7 +73,7 @@ namespace llvm { unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; public: - AggressiveAntiDepState(MachineBasicBlock *BB); + AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB); /// GetKillIndices - Return the kill indices. unsigned *GetKillIndices() { return KillIndices; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 993cdbf..44fd176 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1374,6 +1374,7 @@ void AsmPrinter::processDebugLoc(const MachineInstr *MI, unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, CurDLT.Scope); printLabel(L); + O << '\n'; DW->BeginScope(MI, L); PrevDLT = CurDLT; } @@ -1837,15 +1838,16 @@ void AsmPrinter::EmitComments(const MachineInstr &MI) const { // Print source line info. O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " SrcLine "; - if (DLT.Scope) { - DICompileUnit CU(DLT.Scope); - if (!CU.isNull()) - O << CU.getFilename() << " "; - } - O << DLT.Line; + O << MAI->getCommentString() << ' '; + DIScope Scope(DLT.Scope); + // Omit the directory, because it's likely to be long and uninteresting. + if (!Scope.isNull()) + O << Scope.getFilename(); + else + O << "<unknown>"; + O << ':' << DLT.Line; if (DLT.Col != 0) - O << ":" << DLT.Col; + O << ':' << DLT.Col; Newline = true; } @@ -1857,35 +1859,40 @@ void AsmPrinter::EmitComments(const MachineInstr &MI) const { // We assume a single instruction only has a spill or reload, not // both. + const MachineMemOperand *MMO; if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); if (Newline) O << '\n'; O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " Reload"; + O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Reload"; Newline = true; } } - else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, FI)) { + else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { if (Newline) O << '\n'; O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " Folded Reload"; + O << MAI->getCommentString() << ' ' + << MMO->getSize() << "-byte Folded Reload"; Newline = true; } } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); if (Newline) O << '\n'; O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " Spill"; + O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Spill"; Newline = true; } } - else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, FI)) { + else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { if (Newline) O << '\n'; O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " Folded Spill"; + O << MAI->getCommentString() << ' ' + << MMO->getSize() << "-byte Folded Spill"; Newline = true; } } diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index dc6a70a..cad8b89 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -274,7 +274,7 @@ namespace llvm { }; //===--------------------------------------------------------------------===// - /// DIEString - A string value DIE. + /// DIEString - A string value DIE. This DIE keeps string reference only. /// class DIEString : public DIEValue { const StringRef Str; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index c2e1e05..c200a46 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -330,8 +330,8 @@ void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, Die->addValue(Attribute, Form, Value); } -/// addString - Add a string attribute data and value. -/// +/// addString - Add a string attribute data and value. DIEString only +/// keeps string reference. void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, const StringRef String) { DIEValue *Value = new DIEString(String); @@ -393,7 +393,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) { return; unsigned Line = V->getLineNumber(); - unsigned FileID = findCompileUnit(V->getCompileUnit()).getID(); + unsigned FileID = findCompileUnit(V->getCompileUnit())->getID(); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -407,7 +407,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) { return; unsigned Line = G->getLineNumber(); - unsigned FileID = findCompileUnit(G->getCompileUnit()).getID(); + unsigned FileID = findCompileUnit(G->getCompileUnit())->getID(); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -425,7 +425,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) { unsigned Line = SP->getLineNumber(); - unsigned FileID = findCompileUnit(SP->getCompileUnit()).getID(); + unsigned FileID = findCompileUnit(SP->getCompileUnit())->getID(); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -440,7 +440,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) { return; unsigned Line = Ty->getLineNumber(); - unsigned FileID = findCompileUnit(CU).getID(); + unsigned FileID = findCompileUnit(CU)->getID(); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -738,13 +738,49 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, addBlock(Die, Attribute, 0, Block); } +/// addToContextOwner - Add Die into the list of its context owner's children. +void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { + if (Context.isNull()) + ModuleCU->addDie(Die); + else if (Context.isType()) { + DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context.getNode())); + ContextDIE->addChild(Die); + } else if (DIE *ContextDIE = ModuleCU->getDIE(Context.getNode())) + ContextDIE->addChild(Die); + else + ModuleCU->addDie(Die); +} + +/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the +/// given DIType. +DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { + DIE *TyDIE = ModuleCU->getDIE(Ty.getNode()); + if (TyDIE) + return TyDIE; + + // Create new type. + TyDIE = new DIE(dwarf::DW_TAG_base_type); + ModuleCU->insertDIE(Ty.getNode(), TyDIE); + if (Ty.isBasicType()) + constructTypeDIE(*TyDIE, DIBasicType(Ty.getNode())); + else if (Ty.isCompositeType()) + constructTypeDIE(*TyDIE, DICompositeType(Ty.getNode())); + else { + assert(Ty.isDerivedType() && "Unknown kind of DIType"); + constructTypeDIE(*TyDIE, DIDerivedType(Ty.getNode())); + } + + addToContextOwner(TyDIE, Ty.getContext()); + return TyDIE; +} + /// addType - Add a new type attribute to the specified entity. -void DwarfDebug::addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { +void DwarfDebug::addType(DIE *Entity, DIType Ty) { if (Ty.isNull()) return; // Check for pre-existence. - DIEEntry *Entry = DW_Unit->getDIEEntry(Ty.getNode()); + DIEEntry *Entry = ModuleCU->getDIEEntry(Ty.getNode()); // If it exists then use the existing value. if (Entry) { @@ -754,36 +790,17 @@ void DwarfDebug::addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { // Set up proxy. Entry = createDIEEntry(); - DW_Unit->insertDIEEntry(Ty.getNode(), Entry); + ModuleCU->insertDIEEntry(Ty.getNode(), Entry); // Construct type. - DIE *Buffer = new DIE(dwarf::DW_TAG_base_type); - if (Ty.isBasicType()) - constructTypeDIE(DW_Unit, *Buffer, DIBasicType(Ty.getNode())); - else if (Ty.isCompositeType()) - constructTypeDIE(DW_Unit, *Buffer, DICompositeType(Ty.getNode())); - else { - assert(Ty.isDerivedType() && "Unknown kind of DIType"); - constructTypeDIE(DW_Unit, *Buffer, DIDerivedType(Ty.getNode())); - } + DIE *Buffer = getOrCreateTypeDIE(Ty); - // Add debug information entry to entity and appropriate context. - DIE *Die = NULL; - DIDescriptor Context = Ty.getContext(); - if (!Context.isNull()) - Die = DW_Unit->getDIE(Context.getNode()); - - if (Die) - Die->addChild(Buffer); - else - DW_Unit->addDie(Buffer); Entry->setEntry(Buffer); Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); } /// constructTypeDIE - Construct basic type die from DIBasicType. -void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, - DIBasicType BTy) { +void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { // Get core information. StringRef Name = BTy.getName(); Buffer.setTag(dwarf::DW_TAG_base_type); @@ -798,8 +815,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, } /// constructTypeDIE - Construct derived type die from DIDerivedType. -void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, - DIDerivedType DTy) { +void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Get core information. StringRef Name = DTy.getName(); uint64_t Size = DTy.getSizeInBits() >> 3; @@ -812,7 +828,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Map to main type, void will not have a type. DIType FromTy = DTy.getTypeDerivedFrom(); - addType(DW_Unit, &Buffer, FromTy); + addType(&Buffer, FromTy); // Add name if not anonymous or intermediate type. if (!Name.empty()) @@ -828,8 +844,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, } /// constructTypeDIE - Construct type DIE from DICompositeType. -void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, - DICompositeType CTy) { +void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Get core information. StringRef Name = CTy.getName(); @@ -840,7 +855,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, switch (Tag) { case dwarf::DW_TAG_vector_type: case dwarf::DW_TAG_array_type: - constructArrayTypeDIE(DW_Unit, Buffer, &CTy); + constructArrayTypeDIE(Buffer, &CTy); break; case dwarf::DW_TAG_enumeration_type: { DIArray Elements = CTy.getTypeArray(); @@ -850,7 +865,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DIE *ElemDie = NULL; DIEnumerator Enum(Elements.getElement(i).getNode()); if (!Enum.isNull()) { - ElemDie = constructEnumTypeDIE(DW_Unit, &Enum); + ElemDie = constructEnumTypeDIE(&Enum); Buffer.addChild(ElemDie); } } @@ -860,7 +875,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Add return type. DIArray Elements = CTy.getTypeArray(); DIDescriptor RTy = Elements.getElement(0); - addType(DW_Unit, &Buffer, DIType(RTy.getNode())); + addType(&Buffer, DIType(RTy.getNode())); // Add prototype flag. addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); @@ -869,7 +884,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); DIDescriptor Ty = Elements.getElement(i); - addType(DW_Unit, Arg, DIType(Ty.getNode())); + addType(Arg, DIType(Ty.getNode())); Buffer.addChild(Arg); } } @@ -891,11 +906,9 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, continue; DIE *ElemDie = NULL; if (Element.getTag() == dwarf::DW_TAG_subprogram) - ElemDie = createSubprogramDIE(DW_Unit, - DISubprogram(Element.getNode())); + ElemDie = createSubprogramDIE(DISubprogram(Element.getNode())); else - ElemDie = createMemberDIE(DW_Unit, - DIDerivedType(Element.getNode())); + ElemDie = createMemberDIE(DIDerivedType(Element.getNode())); Buffer.addChild(ElemDie); } @@ -944,33 +957,32 @@ void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); if (L) addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); - if (H) - addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); Buffer.addChild(DW_Subrange); } /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void DwarfDebug::constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, +void DwarfDebug::constructArrayTypeDIE(DIE &Buffer, DICompositeType *CTy) { Buffer.setTag(dwarf::DW_TAG_array_type); if (CTy->getTag() == dwarf::DW_TAG_vector_type) addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); // Emit derived type. - addType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom()); + addType(&Buffer, CTy->getTypeDerivedFrom()); DIArray Elements = CTy->getTypeArray(); // Get an anonymous type for index type. - DIE *IdxTy = DW_Unit->getIndexTyDie(); + DIE *IdxTy = ModuleCU->getIndexTyDie(); if (!IdxTy) { // Construct an anonymous type for index type. IdxTy = new DIE(dwarf::DW_TAG_base_type); addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_signed); - DW_Unit->addDie(IdxTy); - DW_Unit->setIndexTyDie(IdxTy); + ModuleCU->addDie(IdxTy); + ModuleCU->setIndexTyDie(IdxTy); } // Add subranges to array type. @@ -982,7 +994,7 @@ void DwarfDebug::constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, } /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. -DIE *DwarfDebug::constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { +DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator *ETy) { DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); StringRef Name = ETy->getName(); addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); @@ -992,8 +1004,7 @@ DIE *DwarfDebug::constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { } /// createGlobalVariableDIE - Create new DIE using GV. -DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit, - const DIGlobalVariable &GV) { +DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) { // If the global variable was optmized out then no need to create debug info // entry. if (!GV.getGlobal()) return NULL; @@ -1014,7 +1025,7 @@ DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit, addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, LinkageName); } - addType(DW_Unit, GVDie, GV.getType()); + addType(GVDie, GV.getType()); if (!GV.isLocalToUnit()) addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); addSourceLine(GVDie, &GV); @@ -1030,13 +1041,13 @@ DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit, } /// createMemberDIE - Create new member DIE. -DIE *DwarfDebug::createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){ +DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { DIE *MemberDie = new DIE(DT.getTag()); StringRef Name = DT.getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - addType(DW_Unit, MemberDie, DT.getTypeDerivedFrom()); + addType(MemberDie, DT.getTypeDerivedFrom()); addSourceLine(MemberDie, &DT); @@ -1073,21 +1084,27 @@ DIE *DwarfDebug::createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); if (DT.isProtected()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, 0, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, dwarf::DW_ACCESS_protected); else if (DT.isPrivate()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, 0, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, dwarf::DW_ACCESS_private); - + else if (DT.getTag() == dwarf::DW_TAG_inheritance) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (DT.isVirtual()) + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, + dwarf::DW_VIRTUALITY_virtual); return MemberDie; } /// createSubprogramDIE - Create new DIE using SP. -DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit, - const DISubprogram &SP, - bool IsConstructor, - bool IsInlined) { - DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram); +DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { + DIE *SPDie = ModuleCU->getDIE(SP.getNode()); + if (SPDie) + return SPDie; + + SPDie = new DIE(dwarf::DW_TAG_subprogram); addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); StringRef LinkageName = SP.getLinkageName(); @@ -1103,9 +1120,6 @@ DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit, } addSourceLine(SPDie, &SP); - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - // Add prototyped tag, if C or ObjC. unsigned Lang = SP.getCompileUnit().getLanguage(); if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 || @@ -1113,98 +1127,56 @@ DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit, addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); // Add Return Type. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); unsigned SPTag = SPTy.getTag(); - if (!IsConstructor) { - if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type) - addType(DW_Unit, SPDie, SPTy); - else - addType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode())); + + if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type) + addType(SPDie, SPTy); + else + addType(SPDie, DIType(Args.getElement(0).getNode())); + + unsigned VK = SP.getVirtuality(); + if (VK) { + addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); + DIEBlock *Block = new DIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); + ContainingTypeMap.insert(std::make_pair(SPDie, WeakVH(SP.getContainingType().getNode()))); } - if (!SP.isDefinition()) { + if (MakeDecl || !SP.isDefinition()) { addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. Do not add arguments for subprogram definition. They will - // be handled through RecordVariable. + // be handled while processing variables. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - addType(DW_Unit, Arg, DIType(Args.getElement(i).getNode())); + addType(Arg, DIType(Args.getElement(i).getNode())); addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? SPDie->addChild(Arg); } } // DW_TAG_inlined_subroutine may refer to this DIE. - DW_Unit->insertDIE(SP.getNode(), SPDie); + ModuleCU->insertDIE(SP.getNode(), SPDie); return SPDie; } /// findCompileUnit - Get the compile unit for the given descriptor. /// -CompileUnit &DwarfDebug::findCompileUnit(DICompileUnit Unit) const { +CompileUnit *DwarfDebug::findCompileUnit(DICompileUnit Unit) { DenseMap<Value *, CompileUnit *>::const_iterator I = CompileUnitMap.find(Unit.getNode()); - assert(I != CompileUnitMap.end() && "Missing compile unit."); - return *I->second; -} - -/// createDbgScopeVariable - Create a new scope variable. -/// -DIE *DwarfDebug::createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { - // Get the descriptor. - const DIVariable &VD = DV->getVariable(); - StringRef Name = VD.getName(); - if (Name.empty()) - return NULL; - - // Translate tag to proper Dwarf tag. The result variable is dropped for - // now. - unsigned Tag; - switch (VD.getTag()) { - case dwarf::DW_TAG_return_variable: - return NULL; - case dwarf::DW_TAG_arg_variable: - Tag = dwarf::DW_TAG_formal_parameter; - break; - case dwarf::DW_TAG_auto_variable: // fall thru - default: - Tag = dwarf::DW_TAG_variable; - break; - } - - // Define variable debug information entry. - DIE *VariableDie = new DIE(Tag); - addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - // Add source line info if available. - addSourceLine(VariableDie, &VD); - - // Add variable type. - // FIXME: isBlockByrefVariable should be reformulated in terms of complex - // addresses instead. - if (VD.isBlockByrefVariable()) - addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name)); - else - addType(Unit, VariableDie, VD.getType()); - - // Add variable address. - // Variables for abstract instances of inlined functions don't get a - // location. - MachineLocation Location; - unsigned FrameReg; - int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg); - Location.set(FrameReg, Offset); - - - if (VD.hasComplexAddress()) - addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else if (VD.isBlockByrefVariable()) - addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else - addAddress(VariableDie, dwarf::DW_AT_location, Location); - - return VariableDie; + if (I == CompileUnitMap.end()) + return constructCompileUnit(Unit.getNode()); + return I->second; } /// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction. @@ -1295,6 +1267,28 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { DIE *SPDie = ModuleCU->getDIE(SPNode); assert (SPDie && "Unable to find subprogram DIE!"); + DISubprogram SP(SPNode); + if (SP.isDefinition() && !SP.getContext().isCompileUnit()) { + addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + addType(Arg, DIType(Args.getElement(i).getNode())); + addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + SPDeclDie); + + ModuleCU->addDie(SPDie); + } + addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, DWLabel("func_begin", SubprogramCount)); addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, @@ -1305,19 +1299,6 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { if (!DISubprogram(SPNode).isLocalToUnit()) addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - // If there are global variables at this scope then add their dies. - for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), - SGE = ScopedGVs.end(); SGI != SGE; ++SGI) { - MDNode *N = dyn_cast_or_null<MDNode>(*SGI); - if (!N) continue; - DIGlobalVariable GV(N); - if (GV.getContext().getNode() == SPNode) { - DIE *ScopedGVDie = createGlobalVariableDIE(ModuleCU, GV); - if (ScopedGVDie) - SPDie->addChild(ScopedGVDie); - } - } - return SPDie; } @@ -1401,8 +1382,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { /// constructVariableDIE - Construct a DIE for the given DbgVariable. -DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, - DbgScope *Scope, CompileUnit *Unit) { +DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // Get the descriptor. const DIVariable &VD = DV->getVariable(); StringRef Name = VD.getName(); @@ -1451,9 +1431,9 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. if (VD.isBlockByrefVariable()) - addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name)); + addType(VariableDie, getBlockByrefType(VD.getType(), Name)); else - addType(Unit, VariableDie, VD.getType()); + addType(VariableDie, VD.getType()); } // Add variable address. @@ -1522,7 +1502,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { // Add variables to scope. SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables(); for (unsigned i = 0, N = Variables.size(); i < N; ++i) { - DIE *VariableDIE = constructVariableDIE(Variables[i], Scope, ModuleCU); + DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); if (VariableDIE) ScopeDIE->addChild(VariableDIE); } @@ -1579,7 +1559,7 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName) return SrcId; } -void DwarfDebug::constructCompileUnit(MDNode *N) { +CompileUnit *DwarfDebug::constructCompileUnit(MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); StringRef Dir = DIUnit.getDirectory(); @@ -1618,6 +1598,7 @@ void DwarfDebug::constructCompileUnit(MDNode *N) { CompileUnitMap[DIUnit.getNode()] = Unit; CompileUnits.push_back(Unit); + return Unit; } void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { @@ -1631,14 +1612,16 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { if (ModuleCU->getDIE(DI_GV.getNode())) return; - DIE *VariableDie = createGlobalVariableDIE(ModuleCU, DI_GV); + DIE *VariableDie = createGlobalVariableDIE(DI_GV); + if (!VariableDie) + return; // Add to map. ModuleCU->insertDIE(N, VariableDie); // Add to context owner. - ModuleCU->getCUDie()->addChild(VariableDie); - + addToContextOwner(VariableDie, DI_GV.getContext()); + // Expose as global. FIXME - need to check external flag. ModuleCU->addGlobal(DI_GV.getName(), VariableDie); @@ -1663,13 +1646,15 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) { // class type. return; - DIE *SubprogramDie = createSubprogramDIE(ModuleCU, SP); + DIE *SubprogramDie = createSubprogramDIE(SP); // Add to map. ModuleCU->insertDIE(N, SubprogramDie); // Add to context owner. - ModuleCU->getCUDie()->addChild(SubprogramDie); + if (SP.getContext().getNode() == SP.getCompileUnit().getNode()) + if (TopLevelDIEs.insert(SubprogramDie)) + TopLevelDIEsVector.push_back(SubprogramDie); // Expose as global. ModuleCU->addGlobal(SP.getName(), SubprogramDie); @@ -1709,21 +1694,16 @@ void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) { if (!ModuleCU) ModuleCU = CompileUnits[0]; - // Create DIEs for each of the externally visible global variables. - for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), - E = DbgFinder.global_variable_end(); I != E; ++I) { - DIGlobalVariable GV(*I); - if (GV.getContext().getNode() != GV.getCompileUnit().getNode()) - ScopedGVs.push_back(*I); - else - constructGlobalVariableDIE(*I); - } - // Create DIEs for each subprogram. for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), E = DbgFinder.subprogram_end(); I != E; ++I) constructSubprogramDIE(*I); + // Create DIEs for each global variable. + for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), + E = DbgFinder.global_variable_end(); I != E; ++I) + constructGlobalVariableDIE(*I); + MMI = mmi; shouldEmit = true; MMI->setDebugInfoAvailability(true); @@ -1770,6 +1750,22 @@ void DwarfDebug::endModule() { addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } + // Insert top level DIEs. + for (SmallVector<DIE *, 4>::iterator TI = TopLevelDIEsVector.begin(), + TE = TopLevelDIEsVector.end(); TI != TE; ++TI) + ModuleCU->getCUDie()->addChild(*TI); + + for (DenseMap<DIE *, WeakVH>::iterator CI = ContainingTypeMap.begin(), + CE = ContainingTypeMap.end(); CI != CE; ++CI) { + DIE *SPDie = CI->first; + MDNode *N = dyn_cast_or_null<MDNode>(CI->second); + if (!N) continue; + DIE *NDie = ModuleCU->getDIE(N); + if (!NDie) continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + } + // Standard sections final addresses. Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); EmitLabel("text_end", 0); @@ -1898,6 +1894,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) { unsigned Label = MMI->NextLabelID(); Asm->printLabel(Label); + O << '\n'; SmallVector<DbgScope *, 2> &SD = I->second; for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end(); @@ -2092,17 +2089,15 @@ void DwarfDebug::endFunction(MachineFunction *MF) { MMI->getFrameMoves())); // Clear debug info - if (CurrentFnDbgScope) { - CurrentFnDbgScope = NULL; - DbgScopeMap.clear(); - DbgScopeBeginMap.clear(); - DbgScopeEndMap.clear(); - ConcreteScopes.clear(); - AbstractScopesList.clear(); - } + CurrentFnDbgScope = NULL; + DbgScopeMap.clear(); + DbgScopeBeginMap.clear(); + DbgScopeEndMap.clear(); + ConcreteScopes.clear(); + AbstractScopesList.clear(); Lines.clear(); - + if (TimePassesIsEnabled) DebugTimer->stopTimer(); } @@ -2337,13 +2332,16 @@ void DwarfDebug::emitDIE(DIE *Die) { } } -/// emitDebugInfo / emitDebugInfoPerCU - Emit the debug info section. +/// emitDebugInfo - Emit the debug info section. /// -void DwarfDebug::emitDebugInfoPerCU(CompileUnit *Unit) { - DIE *Die = Unit->getCUDie(); +void DwarfDebug::emitDebugInfo() { + // Start debug info section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfInfoSection()); + DIE *Die = ModuleCU->getCUDie(); // Emit the compile units header. - EmitLabel("info_begin", Unit->getID()); + EmitLabel("info_begin", ModuleCU->getID()); // Emit size of content not including length itself unsigned ContentSize = Die->getSize() + @@ -2364,17 +2362,10 @@ void DwarfDebug::emitDebugInfoPerCU(CompileUnit *Unit) { Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); - EmitLabel("info_end", Unit->getID()); + EmitLabel("info_end", ModuleCU->getID()); Asm->EOL(); -} - -void DwarfDebug::emitDebugInfo() { - // Start debug info section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfInfoSection()); - emitDebugInfoPerCU(ModuleCU); } /// emitAbbreviations - Emit the abbreviation section. @@ -2534,9 +2525,9 @@ void DwarfDebug::emitDebugLines() { std::pair<unsigned, unsigned> SourceID = getSourceDirectoryAndFileIds(LineInfo.getSourceID()); O << '\t' << MAI->getCommentString() << ' ' - << getSourceDirectoryName(SourceID.first) << ' ' + << getSourceDirectoryName(SourceID.first) << '/' << getSourceFileName(SourceID.second) - <<" :" << utostr_32(LineInfo.getLine()) << '\n'; + << ':' << utostr_32(LineInfo.getLine()) << '\n'; } // Define the line address. @@ -2672,24 +2663,30 @@ DwarfDebug::emitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){ Asm->EOL(); } -void DwarfDebug::emitDebugPubNamesPerCU(CompileUnit *Unit) { - EmitDifference("pubnames_end", Unit->getID(), - "pubnames_begin", Unit->getID(), true); +/// emitDebugPubNames - Emit visible names into a debug pubnames section. +/// +void DwarfDebug::emitDebugPubNames() { + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubNamesSection()); + + EmitDifference("pubnames_end", ModuleCU->getID(), + "pubnames_begin", ModuleCU->getID(), true); Asm->EOL("Length of Public Names Info"); - EmitLabel("pubnames_begin", Unit->getID()); + EmitLabel("pubnames_begin", ModuleCU->getID()); Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF Version"); EmitSectionOffset("info_begin", "section_info", - Unit->getID(), 0, true, false); + ModuleCU->getID(), 0, true, false); Asm->EOL("Offset of Compilation Unit Info"); - EmitDifference("info_end", Unit->getID(), "info_begin", Unit->getID(), + EmitDifference("info_end", ModuleCU->getID(), "info_begin", ModuleCU->getID(), true); Asm->EOL("Compilation Unit Length"); - const StringMap<DIE*> &Globals = Unit->getGlobals(); + const StringMap<DIE*> &Globals = ModuleCU->getGlobals(); for (StringMap<DIE*>::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); @@ -2700,21 +2697,11 @@ void DwarfDebug::emitDebugPubNamesPerCU(CompileUnit *Unit) { } Asm->EmitInt32(0); Asm->EOL("End Mark"); - EmitLabel("pubnames_end", Unit->getID()); + EmitLabel("pubnames_end", ModuleCU->getID()); Asm->EOL(); } -/// emitDebugPubNames - Emit visible names into a debug pubnames section. -/// -void DwarfDebug::emitDebugPubNames() { - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubNamesSection()); - - emitDebugPubNamesPerCU(ModuleCU); -} - void DwarfDebug::emitDebugPubTypes() { // Start the dwarf pubnames section. Asm->OutStreamer.SwitchSection( diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 679d9b9..12ad322 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -154,12 +154,14 @@ class DwarfDebug : public Dwarf { /// (at the end of the module) as DW_AT_inline. SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; + DenseMap<DIE *, WeakVH> ContainingTypeMap; + /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs. SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs; - /// ScopedGVs - Tracks global variables that are not at file scope. - /// For example void f() { static int b = 42; } - SmallVector<WeakVH, 4> ScopedGVs; + /// TopLevelDIEs - Collection of top level DIEs. + SmallPtrSet<DIE *, 4> TopLevelDIEs; + SmallVector<DIE *, 4> TopLevelDIEsVector; typedef SmallVector<DbgScope *, 2> ScopeVector; typedef DenseMap<const MachineInstr *, ScopeVector> @@ -307,53 +309,52 @@ class DwarfDebug : public Dwarf { void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location); + /// addToContextOwner - Add Die into the list of its context owner's children. + void addToContextOwner(DIE *Die, DIDescriptor Context); + /// addType - Add a new type attribute to the specified entity. - void addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty); + void addType(DIE *Entity, DIType Ty); + + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the + /// given DIType. + DIE *getOrCreateTypeDIE(DIType Ty); void addPubTypes(DISubprogram SP); /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + void constructTypeDIE(DIE &Buffer, DIBasicType BTy); /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + void constructTypeDIE(DIE &Buffer, DIDerivedType DTy); /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + void constructTypeDIE(DIE &Buffer, DICompositeType CTy); /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + void constructArrayTypeDIE(DIE &Buffer, DICompositeType *CTy); /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - DIE *constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy); + DIE *constructEnumTypeDIE(DIEnumerator *ETy); /// createGlobalVariableDIE - Create new DIE using GV. - DIE *createGlobalVariableDIE(CompileUnit *DW_Unit, - const DIGlobalVariable &GV); + DIE *createGlobalVariableDIE(const DIGlobalVariable &GV); /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT); + DIE *createMemberDIE(const DIDerivedType &DT); /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(CompileUnit *DW_Unit, - const DISubprogram &SP, - bool IsConstructor = false, - bool IsInlined = false); + DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false); /// findCompileUnit - Get the compile unit for the given descriptor. /// - CompileUnit &findCompileUnit(DICompileUnit Unit) const; - - /// createDbgScopeVariable - Create a new scope variable. - /// - DIE *createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit); + CompileUnit *findCompileUnit(DICompileUnit Unit); /// getUpdatedDbgScope - Find or create DbgScope assicated with /// the instruction. Initialize scope and update scope hierarchy. @@ -384,7 +385,7 @@ class DwarfDebug : public Dwarf { DIE *constructInlinedScopeDIE(DbgScope *Scope); /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit); + DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S); /// constructScopeDIE - Construct a DIE for this scope. DIE *constructScopeDIE(DbgScope *Scope); @@ -405,10 +406,8 @@ class DwarfDebug : public Dwarf { /// void computeSizeAndOffsets(); - /// EmitDebugInfo / emitDebugInfoPerCU - Emit the debug info section. + /// EmitDebugInfo - Emit the debug info section. /// - void emitDebugInfoPerCU(CompileUnit *Unit); - void emitDebugInfo(); /// emitAbbreviations - Emit the abbreviation section. @@ -432,8 +431,6 @@ class DwarfDebug : public Dwarf { /// section. void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo); - void emitDebugPubNamesPerCU(CompileUnit *Unit); - /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void emitDebugPubNames(); @@ -488,7 +485,7 @@ class DwarfDebug : public Dwarf { /// as well. unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName); - void constructCompileUnit(MDNode *N); + CompileUnit *constructCompileUnit(MDNode *N); void constructGlobalVariableDIE(MDNode *N); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 1c8b8f4..3fd077f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -292,14 +292,13 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { Asm->EmitULEB128Bytes(is4Byte ? 4 : 8); Asm->EOL("Augmentation size"); + // We force 32-bits here because we've encoded our LSDA in the CIE with + // `dwarf::DW_EH_PE_sdata4'. And the CIE and FDE should agree. if (EHFrameInfo.hasLandingPads) - EmitReference("exception", EHFrameInfo.Number, true, false); - else { - if (is4Byte) - Asm->EmitInt32((int)0); - else - Asm->EmitInt64((int)0); - } + EmitReference("exception", EHFrameInfo.Number, true, true); + else + Asm->EmitInt32((int)0); + Asm->EOL("Language Specific Data Area"); } else { Asm->EmitULEB128Bytes(0); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index aff1665..aa01c5b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -119,7 +119,6 @@ class DwarfException : public Dwarf { static inline unsigned getTombstoneKey() { return -2U; } static unsigned getHashValue(const unsigned &Key) { return Key; } static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; /// PadRange - Structure holding a try-range and the associated landing pad. diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 8a62eb2..3887e6d 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -427,7 +427,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, const TargetInstrInfo *TII) { MachineFunction *MF = CurMBB->getParent(); - MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB)); + MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB)); MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; if (I != MF->end() && @@ -805,7 +805,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // a compile-time infinite loop repeatedly doing and undoing the same // transformations.) - for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) { if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; @@ -833,7 +833,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { continue; // This is the QBB case described above if (!FBB) - FBB = next(MachineFunction::iterator(PBB)); + FBB = llvm::next(MachineFunction::iterator(PBB)); } // Failing case: the only way IBB can be reached from PBB is via // exception handling. Happens for landing pads. Would be nice @@ -1140,7 +1140,7 @@ ReoptimizeBlock: // falls through into MBB and we can't understand the prior block's branch // condition. if (MBB->empty()) { - bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB); + bool PredHasNoFallThrough = !PrevBB.canFallThrough(); if (PredHasNoFallThrough || !PriorUnAnalyzable || !PrevBB.isSuccessor(MBB)) { // If the prior block falls through into us, turn it into an @@ -1239,7 +1239,7 @@ ReoptimizeBlock: // B elsewhere // next: if (CurFallsThru) { - MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); TII->InsertBranch(*MBB, NextBB, 0, CurCond); } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 6f86614..7a969f0 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMCodeGen AggressiveAntiDepBreaker.cpp BranchFolding.cpp + CalcSpillWeights.cpp CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp @@ -35,7 +36,9 @@ add_llvm_library(LLVMCodeGen MachinePassRegistry.cpp MachineRegisterInfo.cpp MachineSink.cpp + MachineSSAUpdater.cpp MachineVerifier.cpp + MaxStackAlignment.cpp ObjectCodeEmitter.cpp OcamlGC.cpp PHIElimination.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp new file mode 100644 index 0000000..dcffb8a2 --- /dev/null +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -0,0 +1,154 @@ +//===------------------------ CalcSpillWeights.cpp ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "calcspillweights" + +#include "llvm/Function.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +char CalculateSpillWeights::ID = 0; +static RegisterPass<CalculateSpillWeights> X("calcspillweights", + "Calculate spill weights"); + +void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<LiveIntervals>(); + au.addRequired<MachineLoopInfo>(); + au.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(au); +} + +bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { + + DEBUG(errs() << "********** Compute Spill Weights **********\n" + << "********** Function: " + << fn.getFunction()->getName() << '\n'); + + LiveIntervals *lis = &getAnalysis<LiveIntervals>(); + MachineLoopInfo *loopInfo = &getAnalysis<MachineLoopInfo>(); + const TargetInstrInfo *tii = fn.getTarget().getInstrInfo(); + MachineRegisterInfo *mri = &fn.getRegInfo(); + + SmallSet<unsigned, 4> processed; + for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* mbb = mbbi; + SlotIndex mbbEnd = lis->getMBBEndIdx(mbb); + MachineLoop* loop = loopInfo->getLoopFor(mbb); + unsigned loopDepth = loop ? loop->getLoopDepth() : 0; + bool isExiting = loop ? loop->isLoopExiting(mbb) : false; + + for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end(); + mii != mie; ++mii) { + const MachineInstr *mi = mii; + if (tii->isIdentityCopy(*mi)) + continue; + + if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + continue; + + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + const MachineOperand &mopi = mi->getOperand(i); + if (!mopi.isReg() || mopi.getReg() == 0) + continue; + unsigned reg = mopi.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) + continue; + // Multiple uses of reg by the same instruction. It should not + // contribute to spill weight again. + if (!processed.insert(reg)) + continue; + + bool hasDef = mopi.isDef(); + bool hasUse = !hasDef; + for (unsigned j = i+1; j != e; ++j) { + const MachineOperand &mopj = mi->getOperand(j); + if (!mopj.isReg() || mopj.getReg() != reg) + continue; + hasDef |= mopj.isDef(); + hasUse |= mopj.isUse(); + if (hasDef && hasUse) + break; + } + + LiveInterval ®Int = lis->getInterval(reg); + float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth); + if (hasDef && isExiting) { + // Looks like this is a loop count variable update. + SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex(); + const LiveRange *dlr = + lis->getInterval(reg).getLiveRangeContaining(defIdx); + if (dlr->end > mbbEnd) + weight *= 3.0F; + } + regInt.weight += weight; + } + processed.clear(); + } + } + + for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) { + LiveInterval &li = *I->second; + if (TargetRegisterInfo::isVirtualRegister(li.reg)) { + // If the live interval length is essentially zero, i.e. in every live + // range the use follows def immediately, it doesn't make sense to spill + // it and hope it will be easier to allocate for this li. + if (isZeroLengthInterval(&li)) { + li.weight = HUGE_VALF; + continue; + } + + bool isLoad = false; + SmallVector<LiveInterval*, 4> spillIs; + if (lis->isReMaterializable(li, spillIs, isLoad)) { + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. If non of the defs are + // loads, then it's potentially very cheap to re-materialize. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + if (isLoad) + li.weight *= 0.9F; + else + li.weight *= 0.5F; + } + + // Slightly prefer live interval that has been assigned a preferred reg. + std::pair<unsigned, unsigned> Hint = mri->getRegAllocationHint(li.reg); + if (Hint.first || Hint.second) + li.weight *= 1.01F; + + // Divide the weight of the interval by its size. This encourages + // spilling of intervals that are large and have few uses, and + // discourages spilling of small intervals with many uses. + li.weight /= lis->getApproximateInstructionCount(li) * SlotIndex::NUM; + } + } + + return false; +} + +/// Returns true if the given live interval is zero length. +bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const { + for (LiveInterval::Ranges::const_iterator + i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) + if (i->end.getPrevIndex() > i->start) + return false; + return true; +} diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index e9844d8..ff71f6b 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -182,7 +182,7 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, // Move it and all the blocks that can reach it via fallthrough edges // exclusively, to keep existing fallthrough edges intact. MachineFunction::iterator Begin = Pred; - MachineFunction::iterator End = next(Begin); + MachineFunction::iterator End = llvm::next(Begin); while (Begin != MF.begin()) { MachineFunction::iterator Prior = prior(Begin); if (Prior == MF.begin()) @@ -255,7 +255,8 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, // to the top of the loop to avoid loosing that fallthrough. Otherwise append // them to the bottom, even if it previously had a fallthrough, on the theory // that it's worth an extra branch to keep the loop contiguous. - MachineFunction::iterator InsertPt = next(MachineFunction::iterator(BotMBB)); + MachineFunction::iterator InsertPt = + llvm::next(MachineFunction::iterator(BotMBB)); bool InsertAtTop = false; if (TopMBB != MF.begin() && !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) && @@ -268,7 +269,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, // with the loop header. SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks; for (MachineFunction::iterator I = TopMBB, - E = next(MachineFunction::iterator(BotMBB)); I != E; ++I) + E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I) ContiguousBlocks.insert(I); // Find non-contigous blocks and fix them. @@ -301,7 +302,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, // Process this block and all loop blocks contiguous with it, to keep // them in their relative order. MachineFunction::iterator Begin = BB; - MachineFunction::iterator End = next(MachineFunction::iterator(BB)); + MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB)); for (; End != MF.end(); ++End) { if (!L->contains(End)) break; if (!HasAnalyzableTerminator(End)) break; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 1b39fec..3c7961c2 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -43,8 +43,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { static_cast<const TargetRegisterClass *>(0)); // Initialize the indices to indicate that no registers are live. - std::fill(KillIndices, array_endof(KillIndices), ~0u); - std::fill(DefIndices, array_endof(DefIndices), BB->size()); + const unsigned BBSize = BB->size(); + for (unsigned i = 0; i < TRI->getNumRegs(); ++i) { + KillIndices[i] = ~0u; + DefIndices[i] = BBSize; + } // Clear "do not change" set. KeepRegs.clear(); @@ -122,7 +125,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, // may have been rescheduled and its lifetime may overlap with registers // in ways not reflected in our current liveness state. For each such // register, adjust the liveness state to be conservatively correct. - for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) + for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); // Mark this register to be non-renamable. diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 242cba5..297dd31 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -74,6 +74,9 @@ EnableFastISelOption("fast-isel", cl::Hidden, static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden, cl::desc("Split GEPs and run no-load GVN")); +static cl::opt<bool> PreAllocTailDup("pre-regalloc-taildup", cl::Hidden, + cl::desc("Pre-register allocation tail duplication")); + LLVMTargetMachine::LLVMTargetMachine(const Target &T, const std::string &TargetTriple) : TargetMachine(T) { @@ -302,6 +305,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, /* allowDoubleDefs= */ true); } + // Pre-ra tail duplication. + if (OptLevel != CodeGenOpt::None && + !DisableTailDuplicate && PreAllocTailDup) { + PM.add(createTailDuplicatePass(true)); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + } + // Run pre-ra passes. if (addPreRegAlloc(PM, OptLevel)) printAndVerify(PM, "After PreRegAlloc passes", @@ -348,7 +358,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Tail duplication. if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) { - PM.add(createTailDuplicatePass()); + PM.add(createTailDuplicatePass(false)); printAndVerify(PM, "After TailDuplicate"); } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 8d632cb..cc286aa 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -847,7 +847,7 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { if (vni->isUnused()) { OS << "x"; } else { - if (!vni->isDefAccurate()) + if (!vni->isDefAccurate() && !vni->isPHIDef()) OS << "?"; else OS << vni->def; diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 24adf36..8806439f 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -149,44 +149,69 @@ void LiveIntervals::dumpInstrs() const { printInstrs(errs()); } -/// conflictsWithPhysRegDef - Returns true if the specified register -/// is defined during the duration of the specified interval. -bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li, - VirtRegMap &vrm, unsigned reg) { - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - for (SlotIndex index = I->start.getBaseIndex(), - end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); - index != end; - index = index.getNextIndex()) { - // skip deleted instructions - while (index != end && !getInstructionFromIndex(index)) - index = index.getNextIndex(); - if (index == end) break; +bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, + VirtRegMap &vrm, unsigned reg) { + // We don't handle fancy stuff crossing basic block boundaries + if (li.ranges.size() != 1) + return true; + const LiveRange &range = li.ranges.front(); + SlotIndex idx = range.start.getBaseIndex(); + SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex(); + + // Skip deleted instructions + MachineInstr *firstMI = getInstructionFromIndex(idx); + while (!firstMI && idx != end) { + idx = idx.getNextIndex(); + firstMI = getInstructionFromIndex(idx); + } + if (!firstMI) + return false; - MachineInstr *MI = getInstructionFromIndex(index); - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) - if (SrcReg == li.reg || DstReg == li.reg) - continue; - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& mop = MI->getOperand(i); - if (!mop.isReg()) - continue; - unsigned PhysReg = mop.getReg(); - if (PhysReg == 0 || PhysReg == li.reg) + // Find last instruction in range + SlotIndex lastIdx = end.getPrevIndex(); + MachineInstr *lastMI = getInstructionFromIndex(lastIdx); + while (!lastMI && lastIdx != idx) { + lastIdx = lastIdx.getPrevIndex(); + lastMI = getInstructionFromIndex(lastIdx); + } + if (!lastMI) + return false; + + // Range cannot cross basic block boundaries or terminators + MachineBasicBlock *MBB = firstMI->getParent(); + if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator()) + return true; + + MachineBasicBlock::const_iterator E = lastMI; + ++E; + for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) { + const MachineInstr &MI = *I; + + // Allow copies to and from li.reg + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (SrcReg == li.reg || DstReg == li.reg) + continue; + + // Check for operands using reg + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand& mop = MI.getOperand(i); + if (!mop.isReg()) + continue; + unsigned PhysReg = mop.getReg(); + if (PhysReg == 0 || PhysReg == li.reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(PhysReg)) { + if (!vrm.hasPhys(PhysReg)) continue; - if (TargetRegisterInfo::isVirtualRegister(PhysReg)) { - if (!vrm.hasPhys(PhysReg)) - continue; - PhysReg = vrm.getPhys(PhysReg); - } - if (PhysReg && tri_->regsOverlap(PhysReg, reg)) - return true; + PhysReg = vrm.getPhys(PhysReg); } + if (PhysReg && tri_->regsOverlap(PhysReg, reg)) + return true; } } + // No conflicts found. return false; } @@ -201,15 +226,9 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li, end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); index != end; index = index.getNextIndex()) { - // Skip deleted instructions. - MachineInstr *MI = 0; - while (index != end) { - MI = getInstructionFromIndex(index); - if (MI) - break; - index = index.getNextIndex(); - } - if (index == end) break; + MachineInstr *MI = getInstructionFromIndex(index); + if (!MI) + continue; // skip deleted instructions if (JoinedCopies.count(MI)) continue; @@ -374,8 +393,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // Value#0 is now defined by the 2-addr instruction. OldValNo->def = RedefIndex; OldValNo->setCopy(0); - if (MO.isEarlyClobber()) - OldValNo->setHasRedefByEC(true); // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); @@ -411,7 +428,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, interval.removeRange(Start, End); assert(interval.ranges.size() == 1 && "Newly discovered PHI interval has >1 ranges."); - MachineBasicBlock *killMBB = getMBBFromIndex(interval.endIndex()); + MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def); VNI->addKill(indexes_->getTerminatorGap(killMBB)); VNI->setHasPHIKill(true); DEBUG({ @@ -422,7 +439,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // Replace the interval with one of a NEW value number. Note that this // value number isn't actually defined by an instruction, weird huh? :) LiveRange LR(Start, End, - interval.getNextValue(SlotIndex(getMBBStartIdx(mbb), true), + interval.getNextValue(SlotIndex(getMBBStartIdx(Killer->getParent()), true), 0, false, VNInfoAllocator)); LR.valno->setIsPHIDef(true); DEBUG(errs() << " replace range with " << LR); @@ -513,8 +530,6 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, if (mi->isRegTiedToUseOperand(DefIdx)) { // Two-address instruction. end = baseIndex.getDefIndex(); - assert(!mi->getOperand(DefIdx).isEarlyClobber() && - "Two address instruction is an early clobber?"); } else { // Another instruction redefines the register before it is ever read. // Then the register is essentially dead at the instruction that defines @@ -730,8 +745,16 @@ unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { if (VNI->getCopy()->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { // If it's extracting out of a physical register, return the sub-register. unsigned Reg = VNI->getCopy()->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + unsigned SrcSubReg = VNI->getCopy()->getOperand(2).getImm(); + unsigned DstSubReg = VNI->getCopy()->getOperand(0).getSubReg(); + if (SrcSubReg == DstSubReg) + // %reg1034:3<def> = EXTRACT_SUBREG %EDX, 3 + // reg1034 can still be coalesced to EDX. + return Reg; + assert(DstSubReg == 0); Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm()); + } return Reg; } else if (VNI->getCopy()->getOpcode() == TargetInstrInfo::INSERT_SUBREG || VNI->getCopy()->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 68f80ac..3c88e37 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -720,6 +720,51 @@ bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, return findKill(&MBB); } +bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) { + LiveVariables::VarInfo &VI = getVarInfo(Reg); + + // Loop over all of the successors of the basic block, checking to see if + // the value is either live in the block, or if it is killed in the block. + std::vector<MachineBasicBlock*> OpSuccBlocks; + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + E = MBB.succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + + // Is it alive in this successor? + unsigned SuccIdx = SuccMBB->getNumber(); + if (VI.AliveBlocks.test(SuccIdx)) + return true; + OpSuccBlocks.push_back(SuccMBB); + } + + // Check to see if this value is live because there is a use in a successor + // that kills it. + switch (OpSuccBlocks.size()) { + case 1: { + MachineBasicBlock *SuccMBB = OpSuccBlocks[0]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB) + return true; + break; + } + case 2: { + MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB1 || + VI.Kills[i]->getParent() == SuccMBB2) + return true; + break; + } + default: + std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), + VI.Kills[i]->getParent())) + return true; + } + return false; +} + /// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All /// variables that are live out of DomBB will be marked as passing live through /// BB. diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index 30636a8..80eb6cd 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -312,7 +312,7 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { mbbi != mbbe; ++mbbi) { for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me;) { - MachineBasicBlock::iterator nmi = next(mi); + MachineBasicBlock::iterator nmi = llvm::next(mi); MachineInstr *MI = mi; if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { MadeChange |= LowerExtract(MI); diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index e55e369..a58286d 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -290,7 +290,7 @@ void MachineBasicBlock::updateTerminator() { } else { // The block has a fallthrough conditional branch. MachineBasicBlock *MBBA = *succ_begin(); - MachineBasicBlock *MBBB = *next(succ_begin()); + MachineBasicBlock *MBBB = *llvm::next(succ_begin()); if (MBBA == TBB) std::swap(MBBB, MBBA); if (isLayoutSuccessor(TBB)) { if (TII->ReverseBranchCondition(Cond)) { @@ -359,15 +359,10 @@ bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { MachineFunction::const_iterator I(this); - return next(I) == MachineFunction::const_iterator(MBB); + return llvm::next(I) == MachineFunction::const_iterator(MBB); } bool MachineBasicBlock::canFallThrough() { - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); - bool BranchUnAnalyzable = TII->AnalyzeBranch(*this, TBB, FBB, Cond, true); - MachineFunction::iterator Fallthrough = this; ++Fallthrough; // If FallthroughBlock is off the end of the function, it can't fall through. @@ -378,16 +373,21 @@ bool MachineBasicBlock::canFallThrough() { if (!isSuccessor(Fallthrough)) return false; - // If we couldn't analyze the branch, examine the last instruction. - // If the block doesn't end in a known control barrier, assume fallthrough - // is possible. The isPredicable check is needed because this code can be - // called during IfConversion, where an instruction which is normally a - // Barrier is predicated and thus no longer an actual control barrier. This - // is over-conservative though, because if an instruction isn't actually - // predicated we could still treat it like a barrier. - if (BranchUnAnalyzable) + // Analyze the branches, if any, at the end of the block. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + if (TII->AnalyzeBranch(*this, TBB, FBB, Cond, true)) { + // If we couldn't analyze the branch, examine the last instruction. + // If the block doesn't end in a known control barrier, assume fallthrough + // is possible. The isPredicable check is needed because this code can be + // called during IfConversion, where an instruction which is normally a + // Barrier is predicated and thus no longer an actual control barrier. This + // is over-conservative though, because if an instruction isn't actually + // predicated we could still treat it like a barrier. return empty() || !back().getDesc().isBarrier() || back().getDesc().isPredicable(); + } // If there is no branch, control always falls through. if (TBB == 0) return true; @@ -461,18 +461,19 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, bool MadeChange = false; bool AddedFallThrough = false; - MachineFunction::iterator FallThru = next(MachineFunction::iterator(this)); + MachineFunction::iterator FallThru = + llvm::next(MachineFunction::iterator(this)); - // If this block ends with a conditional branch that falls through to its - // successor, set DestB as the successor. if (isCond) { + // If this block ends with a conditional branch that falls through to its + // successor, set DestB as the successor. if (DestB == 0 && FallThru != getParent()->end()) { DestB = FallThru; AddedFallThrough = true; } } else { // If this is an unconditional branch with no explicit dest, it must just be - // a fallthrough into DestB. + // a fallthrough into DestA. if (DestA == 0 && FallThru != getParent()->end()) { DestA = FallThru; AddedFallThrough = true; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index d20f446..dd6fd7e 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -328,7 +328,7 @@ void MachineFunction::print(raw_ostream &OS) const { if (I->second) OS << " in reg%" << I->second; - if (next(I) != E) + if (llvm::next(I) != E) OS << ", "; } OS << '\n'; @@ -342,7 +342,7 @@ void MachineFunction::print(raw_ostream &OS) const { else OS << "%physreg" << *I; - if (next(I) != E) + if (llvm::next(I) != E) OS << " "; } OS << '\n'; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index f73a5a3..12b974d 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1058,6 +1058,22 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { return true; } +/// isConstantValuePHI - If the specified instruction is a PHI that always +/// merges together the same virtual register, return the register, otherwise +/// return 0. +unsigned MachineInstr::isConstantValuePHI() const { + if (getOpcode() != TargetInstrInfo::PHI) + return 0; + assert(getNumOperands() >= 3 && + "It's illegal to have a PHI without source operands"); + + unsigned Reg = getOperand(1).getReg(); + for (unsigned i = 3, e = getNumOperands(); i < e; i += 2) + if (getOperand(i).getReg() != Reg) + return 0; + return Reg; +} + void MachineInstr::dump() const { errs() << " " << *this; } @@ -1150,9 +1166,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc); DIScope Scope(DLT.Scope); OS << " dbg:"; + // Omit the directory, since it's usually long and uninteresting. if (!Scope.isNull()) - OS << Scope.getDirectory() << ':' << Scope.getFilename() << ':'; - OS << DLT.Line << ":" << DLT.Col; + OS << Scope.getFilename(); + else + OS << "<unknown>"; + OS << ':' << DLT.Line; + if (DLT.Col != 0) + OS << ':' << DLT.Col; } OS << "\n"; diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index db77d19..63f4f18 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -62,11 +62,11 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { MachineBasicBlock *BotMBB = getHeader(); MachineFunction::iterator End = BotMBB->getParent()->end(); if (BotMBB != prior(End)) { - MachineBasicBlock *NextMBB = next(MachineFunction::iterator(BotMBB)); + MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); while (contains(NextMBB)) { BotMBB = NextMBB; - if (BotMBB == next(MachineFunction::iterator(BotMBB))) break; - NextMBB = next(MachineFunction::iterator(BotMBB)); + if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break; + NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); } } return BotMBB; diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp new file mode 100644 index 0000000..292096f --- /dev/null +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -0,0 +1,393 @@ +//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MachineSSAUpdater class. It's based on SSAUpdater +// class in lib/Transforms/Utils. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; +typedef std::vector<std::pair<MachineBasicBlock*, unsigned> > + IncomingPredInfoTy; + +static AvailableValsTy &getAvailableVals(void *AV) { + return *static_cast<AvailableValsTy*>(AV); +} + +static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) { + return *static_cast<IncomingPredInfoTy*>(IPI); +} + + +MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, + SmallVectorImpl<MachineInstr*> *NewPHI) + : AV(0), IPI(0), InsertedPHIs(NewPHI) { + TII = MF.getTarget().getInstrInfo(); + MRI = &MF.getRegInfo(); +} + +MachineSSAUpdater::~MachineSSAUpdater() { + delete &getAvailableVals(AV); + delete &getIncomingPredInfo(IPI); +} + +/// Initialize - Reset this object to get ready for a new set of SSA +/// updates. ProtoValue is the value used to name PHI nodes. +void MachineSSAUpdater::Initialize(unsigned V) { + if (AV == 0) + AV = new AvailableValsTy(); + else + getAvailableVals(AV).clear(); + + if (IPI == 0) + IPI = new IncomingPredInfoTy(); + else + getIncomingPredInfo(IPI).clear(); + + VR = V; + VRC = MRI->getRegClass(VR); +} + +/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for +/// the specified block. +bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const { + return getAvailableVals(AV).count(BB); +} + +/// AddAvailableValue - Indicate that a rewritten value is available in the +/// specified block with the specified value. +void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) { + getAvailableVals(AV)[BB] = V; +} + +/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is +/// live at the end of the specified block. +unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { + return GetValueAtEndOfBlockInternal(BB); +} + +static +unsigned LookForIdenticalPHI(MachineBasicBlock *BB, + SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) { + if (BB->empty()) + return 0; + + MachineBasicBlock::iterator I = BB->front(); + if (I->getOpcode() != TargetInstrInfo::PHI) + return 0; + + AvailableValsTy AVals; + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + AVals[PredValues[i].first] = PredValues[i].second; + while (I != BB->end() && I->getOpcode() == TargetInstrInfo::PHI) { + bool Same = true; + for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) { + unsigned SrcReg = I->getOperand(i).getReg(); + MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB(); + if (AVals[SrcBB] != SrcReg) { + Same = false; + break; + } + } + if (Same) + return I->getOperand(0).getReg(); + ++I; + } + return 0; +} + +/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define +/// a value of the given register class at the start of the specified basic +/// block. It returns the virtual register defined by the instruction. +static +MachineInstr *InsertNewDef(unsigned Opcode, + MachineBasicBlock *BB, MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { + unsigned NewVR = MRI->createVirtualRegister(RC); + return BuildMI(*BB, I, DebugLoc::getUnknownLoc(), TII->get(Opcode), NewVR); +} + +/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that +/// is live in the middle of the specified block. +/// +/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one +/// important case: if there is a definition of the rewritten value after the +/// 'use' in BB. Consider code like this: +/// +/// X1 = ... +/// SomeBB: +/// use(X) +/// X2 = ... +/// br Cond, SomeBB, OutBB +/// +/// In this case, there are two values (X1 and X2) added to the AvailableVals +/// set by the client of the rewriter, and those values are both live out of +/// their respective blocks. However, the use of X happens in the *middle* of +/// a block. Because of this, we need to insert a new PHI node in SomeBB to +/// merge the appropriate values, and this value isn't live out of the block. +/// +unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { + // If there is no definition of the renamed variable in this block, just use + // GetValueAtEndOfBlock to do our work. + if (!getAvailableVals(AV).count(BB)) + return GetValueAtEndOfBlockInternal(BB); + + // If there are no predecessors, just return undef. + if (BB->pred_empty()) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF, + BB, BB->getFirstTerminator(), + VRC, MRI, TII); + return NewDef->getOperand(0).getReg(); + } + + // Otherwise, we have the hard case. Get the live-in values for each + // predecessor. + SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues; + unsigned SingularValue = 0; + + bool isFirstPred = true; + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PredBB = *PI; + unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + + // Otherwise, if all the merged values are the same, just use it. + if (SingularValue != 0) + return SingularValue; + + // If an identical PHI is already in BB, just reuse it. + unsigned DupPHI = LookForIdenticalPHI(BB, PredValues); + if (DupPHI) + return DupPHI; + + // Otherwise, we do need a PHI: insert one now. + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineInstr *InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB, + Loc, VRC, MRI, TII); + + // Fill in all the predecessors of the PHI. + MachineInstrBuilder MIB(InsertedPHI); + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) { + InsertedPHI->eraseFromParent(); + return ConstVal; + } + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + + DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + return InsertedPHI->getOperand(0).getReg(); +} + +static +MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI, + MachineOperand *U) { + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + if (&MI->getOperand(i) == U) + return MI->getOperand(i+1).getMBB(); + } + + llvm_unreachable("MachineOperand::getParent() failure?"); + return 0; +} + +/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, +/// which use their value in the corresponding predecessor. +void MachineSSAUpdater::RewriteUse(MachineOperand &U) { + MachineInstr *UseMI = U.getParent(); + unsigned NewVR = 0; + if (UseMI->getOpcode() == TargetInstrInfo::PHI) { + MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U); + NewVR = GetValueAtEndOfBlockInternal(SourceBB); + } else { + NewVR = GetValueInMiddleOfBlock(UseMI->getParent()); + } + + U.setReg(NewVR); +} + +void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { + MRI->replaceRegWith(OldReg, NewReg); + + AvailableValsTy &AvailableVals = getAvailableVals(AV); + for (DenseMap<MachineBasicBlock*, unsigned>::iterator + I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I) + if (I->second == OldReg) + I->second = NewReg; +} + +/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry +/// for the specified BB and if so, return it. If not, construct SSA form by +/// walking predecessors inserting PHI nodes as needed until we get to a block +/// where the value is available. +/// +unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ + AvailableValsTy &AvailableVals = getAvailableVals(AV); + + // Query AvailableVals by doing an insertion of null. + std::pair<AvailableValsTy::iterator, bool> InsertRes = + AvailableVals.insert(std::make_pair(BB, 0)); + + // Handle the case when the insertion fails because we have already seen BB. + if (!InsertRes.second) { + // If the insertion failed, there are two cases. The first case is that the + // value is already available for the specified block. If we get this, just + // return the value. + if (InsertRes.first->second != 0) + return InsertRes.first->second; + + // Otherwise, if the value we find is null, then this is the value is not + // known but it is being computed elsewhere in our recursion. This means + // that we have a cycle. Handle this by inserting a PHI node and returning + // it. When we get back to the first instance of the recursion we will fill + // in the PHI node. + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineInstr *NewPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc, + VRC, MRI,TII); + unsigned NewVR = NewPHI->getOperand(0).getReg(); + InsertRes.first->second = NewVR; + return NewVR; + } + + // If there are no predecessors, then we must have found an unreachable block + // just return 'undef'. Since there are no predecessors, InsertRes must not + // be invalidated. + if (BB->pred_empty()) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF, + BB, BB->getFirstTerminator(), + VRC, MRI, TII); + return InsertRes.first->second = NewDef->getOperand(0).getReg(); + } + + // Okay, the value isn't in the map and we just inserted a null in the entry + // to indicate that we're processing the block. Since we have no idea what + // value is in this block, we have to recurse through our predecessors. + // + // While we're walking our predecessors, we keep track of them in a vector, + // then insert a PHI node in the end if we actually need one. We could use a + // smallvector here, but that would take a lot of stack space for every level + // of the recursion, just use IncomingPredInfo as an explicit stack. + IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI); + unsigned FirstPredInfoEntry = IncomingPredInfo.size(); + + // As we're walking the predecessors, keep track of whether they are all + // producing the same value. If so, this value will capture it, if not, it + // will get reset to null. We distinguish the no-predecessor case explicitly + // below. + unsigned SingularValue = 0; + bool isFirstPred = true; + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PredBB = *PI; + unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB); + IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + + /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If + /// this block is involved in a loop, a no-entry PHI node will have been + /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted + /// above. + unsigned &InsertedVal = AvailableVals[BB]; + + // If all the predecessor values are the same then we don't need to insert a + // PHI. This is the simple and common case. + if (SingularValue) { + // If a PHI node got inserted, replace it with the singlar value and delete + // it. + if (InsertedVal) { + MachineInstr *OldVal = MRI->getVRegDef(InsertedVal); + // Be careful about dead loops. These RAUW's also update InsertedVal. + assert(InsertedVal != SingularValue && "Dead loop?"); + ReplaceRegWith(InsertedVal, SingularValue); + OldVal->eraseFromParent(); + } + + InsertedVal = SingularValue; + + // Drop the entries we added in IncomingPredInfo to restore the stack. + IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, + IncomingPredInfo.end()); + return InsertedVal; + } + + + // Otherwise, we do need a PHI: insert one now if we don't already have one. + MachineInstr *InsertedPHI; + if (InsertedVal == 0) { + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc, + VRC, MRI, TII); + InsertedVal = InsertedPHI->getOperand(0).getReg(); + } else { + InsertedPHI = MRI->getVRegDef(InsertedVal); + } + + // Fill in all the predecessors of the PHI. + MachineInstrBuilder MIB(InsertedPHI); + for (IncomingPredInfoTy::iterator I = + IncomingPredInfo.begin()+FirstPredInfoEntry, + E = IncomingPredInfo.end(); I != E; ++I) + MIB.addReg(I->second).addMBB(I->first); + + // Drop the entries we added in IncomingPredInfo to restore the stack. + IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, + IncomingPredInfo.end()); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) { + MRI->replaceRegWith(InsertedVal, ConstVal); + InsertedPHI->eraseFromParent(); + InsertedVal = ConstVal; + } else { + DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + } + + return InsertedVal; +} diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index d9f4c99..917d053 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -376,15 +376,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB doesn't fall through but is empty!", MBB); } } - if (TII->BlockHasNoFallThrough(*MBB)) { - if (MBB->empty()) { - report("TargetInstrInfo says the block has no fall through, but the " - "block is empty!", MBB); - } else if (!MBB->back().getDesc().isBarrier()) { - report("TargetInstrInfo says the block has no fall through, but the " - "block does not end in a barrier!", MBB); - } - } } else { // Block is last in function. if (MBB->empty()) { diff --git a/lib/CodeGen/MaxStackAlignment.cpp b/lib/CodeGen/MaxStackAlignment.cpp new file mode 100644 index 0000000..d327cfa --- /dev/null +++ b/lib/CodeGen/MaxStackAlignment.cpp @@ -0,0 +1,70 @@ +//===-- MaxStackAlignment.cpp - Compute the required stack alignment -- ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass looks for vector register usage and aligned local objects to +// calculate the maximum required alignment for a function. This is used by +// targets which support it to determine if dynamic stack realignment is +// necessary. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +namespace { + struct MaximalStackAlignmentCalculator : public MachineFunctionPass { + static char ID; + MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + MachineRegisterInfo &RI = MF.getRegInfo(); + + // Calculate max stack alignment of all already allocated stack objects. + FFI->calculateMaxStackAlignment(); + unsigned MaxAlign = FFI->getMaxAlignment(); + + // Be over-conservative: scan over all vreg defs and find whether vector + // registers are used. If yes, there is probability that vector registers + // will be spilled and thus the stack needs to be aligned properly. + // FIXME: It would be better to only do this if a spill actually + // happens rather than conseratively aligning the stack regardless. + for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; + RegNum < RI.getLastVirtReg(); ++RegNum) + MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); + + if (FFI->getMaxAlignment() == MaxAlign) + return false; + + FFI->setMaxAlignment(MaxAlign); + return true; + } + + virtual const char *getPassName() const { + return "Stack Alignment Requirements Auto-Detector"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; + + char MaximalStackAlignmentCalculator::ID = 0; +} + +FunctionPass* +llvm::createMaxStackAlignmentCalculatorPass() { + return new MaximalStackAlignmentCalculator(); +} + diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 2e30cc6..c62d179 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -287,7 +287,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Okay, if we now know that the value is not live out of the block, we can // add a kill marker in this block saying that it kills the incoming value! - if (!ValueIsUsed && !isLiveOut(SrcReg, opBlock, *LV)) { + if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) { // In our final twist, we have to decide which instruction kills the // register. In most cases this is the copy, however, the first // terminator instruction at the end of the block may also use the value. @@ -301,8 +301,8 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Check that no other terminators use values. #ifndef NDEBUG - for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end(); - ++TI) { + for (MachineBasicBlock::iterator TI = llvm::next(Term); + TI != opBlock.end(); ++TI) { assert(!TI->readsRegister(SrcReg) && "Terminator instructions cannot use virtual registers unless" "they are the first terminator in a block!"); @@ -353,59 +353,13 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, // We break edges when registers are live out from the predecessor block // (not considering PHI nodes). If the register is live in to this block // anyway, we would gain nothing from splitting. - if (!LV.isLiveIn(Reg, MBB) && isLiveOut(Reg, *PreMBB, LV)) + if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) SplitCriticalEdge(PreMBB, &MBB); } } return true; } -bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB, - LiveVariables &LV) { - LiveVariables::VarInfo &VI = LV.getVarInfo(Reg); - - // Loop over all of the successors of the basic block, checking to see if - // the value is either live in the block, or if it is killed in the block. - std::vector<MachineBasicBlock*> OpSuccBlocks; - for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), - E = MBB.succ_end(); SI != E; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - - // Is it alive in this successor? - unsigned SuccIdx = SuccMBB->getNumber(); - if (VI.AliveBlocks.test(SuccIdx)) - return true; - OpSuccBlocks.push_back(SuccMBB); - } - - // Check to see if this value is live because there is a use in a successor - // that kills it. - switch (OpSuccBlocks.size()) { - case 1: { - MachineBasicBlock *SuccMBB = OpSuccBlocks[0]; - for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) - if (VI.Kills[i]->getParent() == SuccMBB) - return true; - break; - } - case 2: { - MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1]; - for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) - if (VI.Kills[i]->getParent() == SuccMBB1 || - VI.Kills[i]->getParent() == SuccMBB2) - return true; - break; - } - default: - std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); - for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) - if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), - VI.Kills[i]->getParent())) - return true; - } - return false; -} - MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, MachineBasicBlock *B) { assert(A && B && "Missing MBB end point"); @@ -423,7 +377,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, ++NumSplits; MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); - MF->insert(next(MachineFunction::iterator(A)), NMBB); + MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB); DEBUG(errs() << "PHIElimination splitting critical edge:" " BB#" << A->getNumber() << " -- BB#" << NMBB->getNumber() diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h index f5872cb..b0b71ce 100644 --- a/lib/CodeGen/PHIElimination.h +++ b/lib/CodeGen/PHIElimination.h @@ -93,12 +93,6 @@ namespace llvm { bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, LiveVariables &LV); - /// isLiveOut - Determine if Reg is live out from MBB, when not - /// considering PHI nodes. This means that Reg is either killed by - /// a successor block or passed through one. - bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB, - LiveVariables &LV); - /// SplitCriticalEdge - Split a critical edge from A to B by /// inserting a new MBB. Update branches in A and PHI instructions /// in B. Return the new block. diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 9101fce2..79be295 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -373,7 +373,8 @@ void SchedulePostRATDList::FinishBlock() { /// void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { // Initialize the indices to indicate that no registers are live. - std::fill(KillIndices, array_endof(KillIndices), ~0u); + for (unsigned i = 0; i < TRI->getNumRegs(); ++i) + KillIndices[i] = ~0u; // Determine the live-out physregs for this block. if (!BB->empty() && BB->back().getDesc().isReturn()) { @@ -510,12 +511,9 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { } if (MO.isKill() != kill) { - bool removed = ToggleKillFlag(MI, MO); - if (removed) { - DEBUG(errs() << "Fixed <removed> in "); - } else { - DEBUG(errs() << "Fixed " << MO << " in "); - } + DEBUG(errs() << "Fixing " << MO << " in "); + // Warning: ToggleKillFlag may invalidate MO. + ToggleKillFlag(MI, MO); DEBUG(MI->dump()); } diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 8f62345..b0d7a47 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "pre-alloc-split" #include "VirtRegMap.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" @@ -104,6 +105,7 @@ namespace { AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); AU.addPreserved<RegisterCoalescer>(); + AU.addPreserved<CalculateSpillWeights>(); if (StrongPHIElim) AU.addPreservedID(StrongPHIEliminationID); else @@ -876,7 +878,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB) KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); else - KillPt = next(MachineBasicBlock::iterator(DefMI)); + KillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); if (KillPt == DefMI->getParent()->end()) return false; @@ -1118,7 +1120,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { return false; // No gap to insert spill. } } else { - SpillPt = next(MachineBasicBlock::iterator(DefMI)); + SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); if (SpillPt == DefMBB->end()) { DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 8905f75..e94247f 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -136,9 +136,10 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + MachineFrameInfo *FFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; - bool HasCalls = false; + bool HasCalls = FFI->hasCalls(); // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); @@ -166,7 +167,6 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { HasCalls = true; } - MachineFrameInfo *FFI = Fn.getFrameInfo(); FFI->setHasCalls(HasCalls); FFI->setMaxCallFrameSize(MaxCallFrameSize); @@ -674,7 +674,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else - I = next(PrevI); + I = llvm::next(PrevI); continue; } diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 4ff5129..c02d47b 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -16,6 +16,7 @@ #include "VirtRegRewriter.h" #include "Spiller.h" #include "llvm/Function.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -59,6 +60,11 @@ PreSplitIntervals("pre-alloc-split", cl::desc("Pre-register allocation live interval splitting"), cl::init(false), cl::Hidden); +static cl::opt<bool> +TrivCoalesceEnds("trivial-coalesce-ends", + cl::desc("Attempt trivial coalescing of interval ends"), + cl::init(false), cl::Hidden); + static RegisterRegAlloc linearscanRegAlloc("linearscan", "linear scan register allocator", createLinearScanRegisterAllocator); @@ -182,6 +188,7 @@ namespace { // Make sure PassManager knows which analyses to make available // to coalescing and which analyses coalescing invalidates. AU.addRequiredTransitive<RegisterCoalescer>(); + AU.addRequired<CalculateSpillWeights>(); if (PreSplitIntervals) AU.addRequiredID(PreAllocSplittingID); AU.addRequired<LiveStacks>(); @@ -390,66 +397,71 @@ void RALinScan::ComputeRelatedRegClasses() { RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]); } -/// attemptTrivialCoalescing - If a simple interval is defined by a copy, -/// try allocate the definition the same register as the source register -/// if the register is not defined during live time of the interval. This -/// eliminate a copy. This is used to coalesce copies which were not -/// coalesced away before allocation either due to dest and src being in -/// different register classes or because the coalescer was overly -/// conservative. +/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try +/// allocate the definition the same register as the source register if the +/// register is not defined during live time of the interval. If the interval is +/// killed by a copy, try to use the destination register. This eliminates a +/// copy. This is used to coalesce copies which were not coalesced away before +/// allocation either due to dest and src being in different register classes or +/// because the coalescer was overly conservative. unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { unsigned Preference = vrm_->getRegAllocPref(cur.reg); if ((Preference && Preference == Reg) || !cur.containsOneValue()) return Reg; - VNInfo *vni = cur.begin()->valno; - if ((vni->def == SlotIndex()) || - vni->isUnused() || !vni->isDefAccurate()) + // We cannot handle complicated live ranges. Simple linear stuff only. + if (cur.ranges.size() != 1) return Reg; - MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg; - if (!CopyMI || - !tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + + const LiveRange &range = cur.ranges.front(); + + VNInfo *vni = range.valno; + if (vni->isUnused()) return Reg; - PhysReg = SrcReg; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { - if (!vrm_->isAssignedReg(SrcReg)) + + unsigned CandReg; + { + MachineInstr *CopyMI; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (vni->def != SlotIndex() && vni->isDefAccurate() && + (CopyMI = li_->getInstructionFromIndex(vni->def)) && + tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + // Defined by a copy, try to extend SrcReg forward + CandReg = SrcReg; + else if (TrivCoalesceEnds && + (CopyMI = + li_->getInstructionFromIndex(range.end.getBaseIndex())) && + tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + cur.reg == SrcReg) + // Only used by a copy, try to extend DstReg backwards + CandReg = DstReg; + else + return Reg; + } + + if (TargetRegisterInfo::isVirtualRegister(CandReg)) { + if (!vrm_->isAssignedReg(CandReg)) return Reg; - PhysReg = vrm_->getPhys(SrcReg); + CandReg = vrm_->getPhys(CandReg); } - if (Reg == PhysReg) + if (Reg == CandReg) return Reg; const TargetRegisterClass *RC = mri_->getRegClass(cur.reg); - if (!RC->contains(PhysReg)) + if (!RC->contains(CandReg)) return Reg; - // Try to coalesce. - if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) { - DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg) - << '\n'); - vrm_->clearVirt(cur.reg); - vrm_->assignVirt2Phys(cur.reg, PhysReg); - - // Remove unnecessary kills since a copy does not clobber the register. - if (li_->hasInterval(SrcReg)) { - LiveInterval &SrcLI = li_->getInterval(SrcReg); - for (MachineRegisterInfo::use_iterator I = mri_->use_begin(cur.reg), - E = mri_->use_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - if (!O.isKill()) - continue; - MachineInstr *MI = &*I; - if (SrcLI.liveAt(li_->getInstructionIndex(MI).getDefIndex())) - O.setIsKill(false); - } - } + if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg)) + return Reg; - ++NumCoalesce; - return PhysReg; - } + // Try to coalesce. + DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) + << '\n'); + vrm_->clearVirt(cur.reg); + vrm_->assignVirt2Phys(cur.reg, CandReg); - return Reg; + ++NumCoalesce; + return CandReg; } bool RALinScan::runOnMachineFunction(MachineFunction &fn) { @@ -1261,9 +1273,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // The earliest start of a Spilled interval indicates up to where // in handled we need to roll back + assert(!spillIs.empty() && "No spill intervals?"); + SlotIndex earliestStart = spillIs[0]->beginIndex(); - LiveInterval *earliestStartInterval = cur; - // Spill live intervals of virtual regs mapped to the physical register we // want to clear (and its aliases). We only spill those that overlap with the // current interval as the rest do not affect its allocation. we also keep @@ -1274,19 +1286,16 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { LiveInterval *sli = spillIs.back(); spillIs.pop_back(); DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n'); - earliestStartInterval = - (earliestStartInterval->beginIndex() < sli->beginIndex()) ? - earliestStartInterval : sli; + if (sli->beginIndex() < earliestStart) + earliestStart = sli->beginIndex(); std::vector<LiveInterval*> newIs; - newIs = spiller_->spill(sli, spillIs); + newIs = spiller_->spill(sli, spillIs, &earliestStart); addStackInterval(sli, ls_, li_, mri_, *vrm_); std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); spilled.insert(sli->reg); } - SlotIndex earliestStart = earliestStartInterval->beginIndex(); - DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n'); // Scan handled in reverse order up to the earliest start of a @@ -1295,7 +1304,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { while (!handled_.empty()) { LiveInterval* i = handled_.back(); // If this interval starts before t we are done. - if (i->beginIndex() < earliestStart) + if (!i->empty() && i->beginIndex() < earliestStart) break; DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n'); handled_.pop_back(); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index c677d34..c2014a7 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -36,6 +36,7 @@ #include "PBQP/Heuristics/Briggs.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -90,6 +91,7 @@ namespace { au.addRequired<LiveIntervals>(); //au.addRequiredID(SplitCriticalEdgesID); au.addRequired<RegisterCoalescer>(); + au.addRequired<CalculateSpillWeights>(); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); au.addRequired<MachineLoopInfo>(); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 94680ed..67bf209 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -125,7 +125,7 @@ void RegScavenger::forward() { Tracking = true; } else { assert(MBBI != MBB->end() && "Already at the end of the basic block!"); - MBBI = next(MBBI); + MBBI = llvm::next(MBBI); } MachineInstr *MI = MBBI; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 06ffdd6..2b52187 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -119,7 +119,8 @@ namespace { /// it can be simplified or if things it uses can be simplified by bit /// propagation. If so, return true. bool SimplifyDemandedBits(SDValue Op) { - APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits()); + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Demanded = APInt::getAllOnesValue(BitWidth); return SimplifyDemandedBits(Op, Demanded); } @@ -546,7 +547,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, To[0].getNode()->dump(&DAG); errs() << " and " << NumTo-1 << " other values\n"; for (unsigned i = 0, e = NumTo; i != e; ++i) - assert(N->getValueType(i) == To[i].getValueType() && + assert((!To[i].getNode() || + N->getValueType(i) == To[i].getValueType()) && "Cannot combine value to value of different type!")); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To, &DeadNodes); @@ -1687,10 +1689,14 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + // + // do not sink logical op inside of a vector extend, since it may combine + // into a vsetcc. if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| N0.getOpcode() == ISD::SIGN_EXTEND || (N0.getOpcode() == ISD::TRUNCATE && !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && + !VT.isVector() && N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) { @@ -1943,8 +1949,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or x, undef) -> -1 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + } // fold (or c1, c2) -> c1|c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); @@ -2434,7 +2442,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (shl c1, c2) -> c1<<c2 if (N0C && N1C) @@ -2450,7 +2458,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return N0; // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), - APInt::getAllOnesValue(VT.getSizeInBits()))) + APInt::getAllOnesValue(OpSizeInBits))) return DAG.getConstant(0, VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && @@ -2526,6 +2534,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) @@ -2537,7 +2546,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N0C && N0C->isAllOnesValue()) return N0; // fold (sra x, (setge c, size(x))) -> undef - if (N1C && N1C->getZExtValue() >= VT.getSizeInBits()) + if (N1C && N1C->getZExtValue() >= OpSizeInBits) return DAG.getUNDEF(VT); // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) @@ -2545,7 +2554,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports // sext_inreg. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { - unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue(); + unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, @@ -2556,7 +2565,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N1C && N0.getOpcode() == ISD::SRA) { if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); - if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1; + if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(Sum, N1C->getValueType(0))); } @@ -2572,9 +2581,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); if (N01C && N1C) { // Determine what the truncate's result bitsize and type would be. - unsigned VTValSize = VT.getSizeInBits(); EVT TruncVT = - EVT::getIntegerVT(*DAG.getContext(), VTValSize - N1C->getZExtValue()); + EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue()); // Determine the residual right-shift amount. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -2607,7 +2615,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); - TruncC.trunc(TruncVT.getSizeInBits()); + TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, @@ -2636,7 +2644,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (srl c1, c2) -> c1 >>u c2 if (N0C && N1C) @@ -3029,7 +3037,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { else if (Op.getValueType().bitsGT(VT)) Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, - DAG.getValueType(N0.getValueType())); + DAG.getValueType(N0.getValueType().getScalarType())); } } @@ -3170,7 +3178,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } else if (Op.getValueType().bitsGT(VT)) { Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); } - return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType()); + return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), + N0.getValueType().getScalarType()); } // Fold (zext (and (trunc x), cst)) -> (and x, cst), @@ -3193,6 +3202,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { X, DAG.getConstant(Mask, VT)); } + // Fold (zext (and x, cst)) -> (and (zext x), cst) + if (N0.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N0.getOperand(0).getOpcode() != ISD::TRUNCATE && + N0.getOperand(0).hasOneUse()) { + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)), + DAG.getConstant(Mask, VT)); + } + // fold (zext (load x)) -> (zext (truncate (zextload x))) if (ISD::isNON_EXTLoad(N0.getNode()) && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || @@ -3269,6 +3291,26 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SCC.getNode()) return SCC; } + // (zext (shl (zext x), cst)) -> (shl (zext x), cst) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N0.hasOneUse()) { + if (N0.getOpcode() == ISD::SHL) { + // If the original shl may be shifting out bits, do not perform this + // transformation. + unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() - + N0.getOperand(0).getOperand(0).getValueType().getSizeInBits(); + if (ShAmt > KnownZeroBits) + return SDValue(); + } + DebugLoc dl = N->getDebugLoc(); + return DAG.getNode(N0.getOpcode(), dl, VT, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); + } + return SDValue(); } @@ -3529,7 +3571,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT EVT = cast<VTSDNode>(N1)->getVT(); - unsigned VTBits = VT.getSizeInBits(); + unsigned VTBits = VT.getScalarType().getSizeInBits(); unsigned EVTBits = EVT.getSizeInBits(); // fold (sext_in_reg c1) -> c1 @@ -3537,7 +3579,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1) + if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -3552,7 +3594,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // if x is small enough. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getValueType().getSizeInBits() < EVTBits) + if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits) return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); } @@ -3576,11 +3618,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. if (N0.getOpcode() == ISD::SRL) { if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) - if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) { + if (ShAmt->getZExtValue()+EVTBits <= VTBits) { // We can turn this into an SRA iff the input to the SRL is already sign // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); - if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits) + if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1)); } @@ -3681,7 +3723,6 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) return SDValue(); EVT LD1VT = LD1->getValueType(0); - const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && @@ -3689,7 +3730,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { // If one is volatile it might be ok, but play conservative and bail out. !LD1->isVolatile() && !LD2->isVolatile() && - TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { + DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { unsigned Align = LD1->getAlignment(); unsigned NewAlign = TLI.getTargetData()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); @@ -4804,49 +4845,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } -/// InferAlignment - If we can infer some alignment information from this -/// pointer, return it. -static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) { - // If this is a direct reference to a stack slot, use information about the - // stack slot's alignment. - int FrameIdx = 1 << 31; - int64_t FrameOffset = 0; - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { - FrameIdx = FI->getIndex(); - } else if (Ptr.getOpcode() == ISD::ADD && - isa<ConstantSDNode>(Ptr.getOperand(1)) && - isa<FrameIndexSDNode>(Ptr.getOperand(0))) { - FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); - FrameOffset = Ptr.getConstantOperandVal(1); - } - - if (FrameIdx != (1 << 31)) { - // FIXME: Handle FI+CST. - const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo(); - if (MFI.isFixedObjectIndex(FrameIdx)) { - int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; - - // The alignment of the frame index can be determined from its offset from - // the incoming frame position. If the frame object is at offset 32 and - // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. - unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment(); - unsigned Align = MinAlign(ObjectOffset, StackAlign); - - // Finally, the frame object itself may have a known alignment. Factor - // the alignment + offset into a new alignment. For example, if we know - // the FI is 8 byte aligned, but the pointer is 4 off, we really have a - // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte - // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. - unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), - FrameOffset); - return std::max(Align, FIInfoAlign); - } - } - - return 0; -} - SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); SDValue Chain = LD->getChain(); @@ -4854,7 +4852,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { - if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment()) return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), LD->getValueType(0), @@ -5079,7 +5077,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { - if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(), @@ -5231,7 +5229,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits(Value, APInt::getLowBitsSet( - Value.getValueSizeInBits(), + Value.getValueType().getScalarType().getSizeInBits(), ST->getMemoryVT().getSizeInBits()))) return SDValue(N, 0); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 5eb9ca1..4ead9c9 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -532,7 +532,15 @@ bool FastISel::SelectBitCast(User *I) { bool FastISel::SelectInstruction(Instruction *I) { - return SelectOperator(I, I->getOpcode()); + // First, try doing target-independent selection. + if (SelectOperator(I, I->getOpcode())) + return true; + + // Next, try calling the target to attempt to handle the instruction. + if (TargetSelectInstruction(I)) + return true; + + return false; } /// FastEmitBranch - Emit an unconditional branch to the given block, @@ -541,7 +549,7 @@ FastISel::SelectInstruction(Instruction *I) { void FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { MachineFunction::iterator NextMBB = - next(MachineFunction::iterator(MBB)); + llvm::next(MachineFunction::iterator(MBB)); if (MBB->isLayoutSuccessor(MSucc)) { // The unconditional fall-through case, which needs no instructions. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 273dbf0..f9c05d0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -232,7 +232,7 @@ void SelectionDAGLegalize::LegalizeDAG() { // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != next(E); ++I) + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) LegalizeOp(SDValue(I, 0)); // Finally, it's possible the root changed. Get the new root. @@ -2294,9 +2294,15 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // NOTE: we could fall back on load/store here too for targets without // SAR. However, it is doubtful that any exist. EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); - unsigned BitsDiff = Node->getValueType(0).getSizeInBits() - + EVT VT = Node->getValueType(0); + EVT ShiftAmountTy = TLI.getShiftAmountTy(); + if (VT.isVector()) { + ShiftAmountTy = VT; + VT = VT.getVectorElementType(); + } + unsigned BitsDiff = VT.getSizeInBits() - ExtraVT.getSizeInBits(); - SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy()); + SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy); Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), Node->getOperand(0), ShiftCst); Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); @@ -3059,8 +3065,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, // SelectionDAG::Legalize - This is the entry point for the file. // -void SelectionDAG::Legalize(bool TypesNeedLegalizing, - CodeGenOpt::Level OptLevel) { +void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) { /// run - This is the main entry point to this class. /// SelectionDAGLegalize(*this, OptLevel).LegalizeDAG(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8ac8063..2f4457e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1167,55 +1167,62 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { GetExpandedInteger(N->getOperand(0), InL, InH); SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); - SDValue Amt2 = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); - SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), - Amt, NVBitsNode, ISD::SETULT); + SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode); + SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); + SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), + Amt, NVBitsNode, ISD::SETULT); - SDValue Lo1, Hi1, Lo2, Hi2; + SDValue LoS, HiS, LoL, HiL; switch (N->getOpcode()) { default: llvm_unreachable("Unknown shift"); case ISD::SHL: - // ShAmt < NVTBits - Lo1 = DAG.getConstant(0, NVT); // Low part is zero. - Hi1 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. - - // ShAmt >= NVTBits - Lo2 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); - Hi2 = DAG.getNode(ISD::OR, dl, NVT, + // Short: ShAmt < NVTBits + LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); + HiS = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), - DAG.getNode(ISD::SRL, dl, NVT, InL, Amt2)); + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack)); + + // Long: ShAmt >= NVTBits + LoL = DAG.getConstant(0, NVT); // Lo part is zero. + HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); return true; case ISD::SRL: - // ShAmt < NVTBits - Hi1 = DAG.getConstant(0, NVT); // Hi part is zero. - Lo1 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. - - // ShAmt >= NVTBits - Hi2 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); - Lo2 = DAG.getNode(ISD::OR, dl, NVT, - DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), - DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2)); - - Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getConstant(0, NVT); // Hi part is zero. + LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); return true; case ISD::SRA: - // ShAmt < NVTBits - Hi1 = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. - DAG.getConstant(NVTBits-1, ShTy)); - Lo1 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. - - // ShAmt >= NVTBits - Hi2 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); - Lo2 = DAG.getNode(ISD::OR, dl, NVT, + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), - DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2)); + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part. + DAG.getConstant(NVTBits-1, ShTy)); + LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); return true; } @@ -1989,7 +1996,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SRA: case ISD::SRL: case ISD::ROTL: - case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; + case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; + case ISD::RETURNADDR: + case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break; } // If the result is null, the sub-method took care of registering results etc. @@ -2173,6 +2182,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo); } +SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { + // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This + // surely makes pretty nice problems on 8/16 bit targets. Just truncate this + // constant to valid type. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(0), Lo, Hi); + return DAG.UpdateNodeOperands(SDValue(N, 0), Lo); +} + SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); EVT DstVT = N->getValueType(0); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 2ee9f8a..c35f7ad 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -362,6 +362,7 @@ private: SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ExpandIntOp_TRUNCATE(SDNode *N); SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_RETURNADDR(SDNode *N); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, DebugLoc dl); @@ -516,6 +517,7 @@ private: SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); SDValue ScalarizeVecRes_SETCC(SDNode *N); @@ -559,6 +561,7 @@ private: void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, @@ -601,6 +604,7 @@ private: SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); + SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); SDValue WidenVecRes_SELECT(SDNode* N); SDValue WidenVecRes_SELECT_CC(SDNode* N); SDValue WidenVecRes_UNDEF(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 785c2ad..2625245 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -79,7 +79,7 @@ bool VectorLegalizer::Run() { // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != next(E); ++I) + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) LegalizeOp(SDValue(I, 0)); // Finally, it's possible the root changed. Get the new root. @@ -179,6 +179,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; case ISD::SINT_TO_FP: diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 023324b..cf67ab9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -54,6 +54,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_SIGN_EXTEND_INREG(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; @@ -195,6 +196,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { return InOp; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), EltVT, + LHS, N->getOperand(1)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(ISD::SELECT, N->getDebugLoc(), @@ -401,6 +409,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: SplitVecRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; @@ -700,6 +709,18 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, Hi = DAG.getUNDEF(HiVT); } +void DAGTypeLegalizer::SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, + N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, + N->getOperand(1)); +} + void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); @@ -1141,6 +1162,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_SIGN_EXTEND_INREG(N); break; case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; @@ -1691,6 +1713,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { WidenVT, N->getOperand(0)); } +SDValue DAGTypeLegalizer::WidenVecRes_SIGN_EXTEND_INREG(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), + WidenVT, WidenLHS, N->getOperand(1)); +} + SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index d53de34..b2ee8bb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -20,10 +20,16 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtarget.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +cl::opt<bool> +DisableInstScheduling("disable-inst-scheduling", + cl::init(false), + cl::desc("Disable instruction scheduling")); + ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf) { } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c38c79b..da55e6b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" @@ -47,6 +48,8 @@ #include <cmath> using namespace llvm; +extern cl::opt<bool> DisableInstScheduling; + /// makeVTList - Return an instance of the SDVTList struct initialized with the /// specified members. static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { @@ -551,6 +554,9 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes, } DeallocateNode(N); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } } @@ -576,6 +582,9 @@ void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { N->DropOperands(); DeallocateNode(N); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } void SelectionDAG::DeallocateNode(SDNode *N) { @@ -587,6 +596,9 @@ void SelectionDAG::DeallocateNode(SDNode *N) { N->NodeType = ISD::DELETED_NODE; NodeAllocator.Deallocate(AllNodes.remove(N)); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -690,7 +702,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } /// FindModifiedNodeSlot - Find a slot for the specified node if its operands @@ -707,7 +721,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } @@ -724,7 +740,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } /// VerifyNode - Sanity check the given node. Aborts if it is invalid. @@ -777,8 +795,13 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) : TLI(tli), FLI(fli), DW(0), EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(), - getVTList(MVT::Other)), Root(getEntryNode()) { + getVTList(MVT::Other)), + Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); + if (DisableInstScheduling) { + Ordering = new NodeOrdering(); + Ordering->add(&EntryNode); + } } void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, @@ -791,6 +814,7 @@ void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, SelectionDAG::~SelectionDAG() { allnodes_clear(); + delete Ordering; } void SelectionDAG::allnodes_clear() { @@ -816,6 +840,10 @@ void SelectionDAG::clear() { EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); + if (DisableInstScheduling) { + Ordering = new NodeOrdering(); + Ordering->add(&EntryNode); + } } SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { @@ -831,8 +859,12 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { } SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { + assert(!VT.isVector() && + "getZeroExtendInReg should use the vector element type instead of " + "the vector type!"); if (Op.getValueType() == VT) return Op; - APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(), + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Imm = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); return getNode(ISD::AND, DL, Op.getValueType(), Op, getConstant(Imm, Op.getValueType())); @@ -872,14 +904,17 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { ID.AddPointer(&Val); void *IP = 0; SDNode *N = NULL; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { + if (Ordering) Ordering->add(N); if (!VT.isVector()) return SDValue(N, 0); + } if (!N) { N = NodeAllocator.Allocate<ConstantSDNode>(); new (N) ConstantSDNode(isT, &Val, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } SDValue Result(N, 0); @@ -916,14 +951,17 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ ID.AddPointer(&V); void *IP = 0; SDNode *N = NULL; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { + if (Ordering) Ordering->add(N); if (!VT.isVector()) return SDValue(N, 0); + } if (!N) { N = NodeAllocator.Allocate<ConstantFPSDNode>(); new (N) ConstantFPSDNode(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } SDValue Result(N, 0); @@ -978,12 +1016,15 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>(); new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -993,12 +1034,15 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); ID.AddInteger(FI); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>(); new (N) FrameIndexSDNode(FI, VT, isTarget); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1012,12 +1056,15 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, ID.AddInteger(JTI); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>(); new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1037,12 +1084,15 @@ SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT, ID.AddPointer(C); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>(); new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1063,12 +1113,15 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, C->AddSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>(); new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1077,12 +1130,15 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); ID.AddPointer(MBB); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>(); new (N) BasicBlockSDNode(MBB); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1098,6 +1154,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { N = NodeAllocator.Allocate<VTSDNode>(); new (N) VTSDNode(VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1107,6 +1164,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { N = NodeAllocator.Allocate<ExternalSymbolSDNode>(); new (N) ExternalSymbolSDNode(false, Sym, 0, VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1119,6 +1177,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, N = NodeAllocator.Allocate<ExternalSymbolSDNode>(); new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1131,6 +1190,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { new (N) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } return SDValue(CondCodeNodes[Cond], 0); } @@ -1223,8 +1283,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, ID.AddInteger(MaskVec[i]); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } // Allocate the mask array for the node out of the BumpPtrAllocator, since // SDNode doesn't have access to it. This memory will be "leaked" when @@ -1236,6 +1298,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1253,12 +1316,15 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>(); new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1267,12 +1333,15 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); ID.AddInteger(RegNo); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<RegisterSDNode>(); new (N) RegisterSDNode(RegNo, VT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1284,12 +1353,15 @@ SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl, AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1); ID.AddInteger(LabelID); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<LabelSDNode>(); new (N) LabelSDNode(Opcode, dl, Root, LabelID); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1303,12 +1375,15 @@ SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT, ID.AddPointer(BA); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>(); new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1321,13 +1396,16 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { ID.AddPointer(V); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>(); new (N) SrcValueSDNode(V); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1480,7 +1558,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { if (Op.getValueType().isVector()) return false; - unsigned BitWidth = Op.getValueSizeInBits(); + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); } @@ -1503,7 +1581,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, APInt &KnownZero, APInt &KnownOne, unsigned Depth) const { unsigned BitWidth = Mask.getBitWidth(); - assert(BitWidth == Op.getValueType().getSizeInBits() && + assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() && "Mask size mismatches value type size!"); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. @@ -1760,7 +1838,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getSizeInBits(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; APInt InMask = Mask; InMask.trunc(InBits); @@ -1774,7 +1852,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getSizeInBits(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InSignBit = APInt::getSignBit(InBits); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; APInt InMask = Mask; @@ -1815,7 +1893,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getSizeInBits(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InMask = Mask; InMask.trunc(InBits); KnownZero.trunc(InBits); @@ -1827,7 +1905,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getSizeInBits(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InMask = Mask; InMask.zext(InBits); KnownZero.zext(InBits); @@ -1960,7 +2038,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); - unsigned VTBits = VT.getSizeInBits(); + unsigned VTBits = VT.getScalarType().getSizeInBits(); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -1987,7 +2065,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ } case ISD::SIGN_EXTEND: - Tmp = VTBits-Op.getOperand(0).getValueType().getSizeInBits(); + Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; case ISD::SIGN_EXTEND_INREG: @@ -2238,13 +2316,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<SDNode>(); new (N) SDNode(Opcode, DL, getVTList(VT)); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2349,6 +2430,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, assert(VT.isFloatingPoint() && Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); if (Operand.getValueType() == VT) return Operand; // noop conversion. + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (Operand.getOpcode() == ISD::UNDEF) return getUNDEF(VT); break; @@ -2356,8 +2441,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid SIGN_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().bitsLT(VT) - && "Invalid sext node, dst < src!"); + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid sext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); break; @@ -2365,8 +2454,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ZERO_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().bitsLT(VT) - && "Invalid zext node, dst < src!"); + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid zext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getNode()->getOperand(0)); @@ -2375,8 +2468,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ANY_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().bitsLT(VT) - && "Invalid anyext node, dst < src!"); + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid anyext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); @@ -2385,14 +2482,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid TRUNCATE!"); if (Operand.getValueType() == VT) return Operand; // noop truncate - assert(Operand.getValueType().bitsGT(VT) - && "Invalid truncate node, src < dst!"); + assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) && + "Invalid truncate node, src < dst!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::TRUNCATE) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) { // If the source is smaller than the dest, we still need an extend. - if (Operand.getNode()->getOperand(0).getValueType().bitsLT(VT)) + if (Operand.getNode()->getOperand(0).getValueType().getScalarType() + .bitsLT(VT.getScalarType())) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); @@ -2447,8 +2549,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDValue Ops[1] = { Operand }; AddNodeIDNode(ID, Opcode, VTs, Ops, 1); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate<UnarySDNode>(); new (N) UnarySDNode(Opcode, DL, VTs, Operand); CSEMap.InsertNode(N, IP); @@ -2458,6 +2562,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2623,6 +2728,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, assert(VT == N1.getValueType() && "Not an inreg extend!"); assert(VT.isInteger() && EVT.isInteger() && "Cannot *_EXTEND_INREG FP types"); + assert(!EVT.isVector() && + "AssertSExt/AssertZExt type should be the vector element type " + "rather than the vector type!"); assert(EVT.bitsLE(VT) && "Not extending!"); if (VT == EVT) return N1; // noop assertion. break; @@ -2632,12 +2740,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, assert(VT == N1.getValueType() && "Not an inreg extend!"); assert(VT.isInteger() && EVT.isInteger() && "Cannot *_EXTEND_INREG FP types"); - assert(EVT.bitsLE(VT) && "Not extending!"); + assert(!EVT.isVector() && + "SIGN_EXTEND_INREG type should be the vector element type rather " + "than the vector type!"); + assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!"); if (EVT == VT) return N1; // Not actually extending if (N1C) { APInt Val = N1C->getAPIntValue(); - unsigned FromBits = cast<VTSDNode>(N2)->getVT().getSizeInBits(); + unsigned FromBits = EVT.getSizeInBits(); Val <<= Val.getBitWidth()-FromBits; Val = Val.ashr(Val.getBitWidth()-FromBits); return getConstant(Val, VT); @@ -2859,8 +2970,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate<BinarySDNode>(); new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); CSEMap.InsertNode(N, IP); @@ -2870,6 +2983,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2936,8 +3050,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate<TernarySDNode>(); new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); CSEMap.InsertNode(N, IP); @@ -2945,7 +3061,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, N = NodeAllocator.Allocate<TernarySDNode>(); new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); } + AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -3541,12 +3659,14 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3604,12 +3724,14 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3682,6 +3804,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } @@ -3693,6 +3816,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3732,16 +3856,15 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, assert(VT == MemVT && "Non-extending load from different memory type!"); } else { // Extending load. - if (VT.isVector()) - assert(MemVT.getVectorNumElements() == VT.getVectorNumElements() && - "Invalid vector extload!"); - else - assert(MemVT.bitsLT(VT) && - "Should only be an extending load, not truncating!"); - assert((ExtType == ISD::EXTLOAD || VT.isInteger()) && - "Cannot sign/zero extend a FP/Vector load!"); + assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be an extending load, not truncating!"); assert(VT.isInteger() == MemVT.isInteger() && "Cannot convert from FP to Int or Int -> FP!"); + assert(VT.isVector() == MemVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == MemVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); } bool Indexed = AM != ISD::UNINDEXED; @@ -3758,12 +3881,14 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate<LoadSDNode>(); new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3834,12 +3959,14 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3874,10 +4001,15 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, if (VT == SVT) return getStore(Chain, dl, Val, Ptr, MMO); - assert(VT.bitsGT(SVT) && "Not a truncation?"); + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); - + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == SVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); SDVTList VTs = getVTList(MVT::Other); SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -3889,12 +4021,14 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3911,14 +4045,17 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); new (N) StoreSDNode(Ops, dl, VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), ST->getMemOperand()); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3984,8 +4121,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate<SDNode>(); new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); @@ -3996,6 +4135,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4051,8 +4191,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } if (NumOps == 1) { N = NodeAllocator.Allocate<UnarySDNode>(); new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); @@ -4083,6 +4225,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, } } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4166,7 +4309,7 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { I->VTs[2] == VT3 && I->VTs[3] == VT4) return *I; - EVT *Array = Allocator.Allocate<EVT>(3); + EVT *Array = Allocator.Allocate<EVT>(4); Array[0] = VT1; Array[1] = VT2; Array[2] = VT3; @@ -4545,8 +4688,10 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); - if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(ON); return ON; + } } if (!RemoveNodeFromCSEMaps(N)) @@ -4610,6 +4755,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, if (IP) CSEMap.InsertNode(N, IP); // Memoize the new node. + if (Ordering) Ordering->add(N); return N; } @@ -4748,8 +4894,10 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, FoldingSetNodeID ID; AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return cast<MachineSDNode>(E); + } } // Allocate a new MachineSDNode. @@ -4771,6 +4919,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4807,8 +4956,10 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return E; + } } return NULL; } @@ -5867,6 +6018,99 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { &Scalars[0], Scalars.size()); } + +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a +/// location that is 'Dist' units away from the location that the 'Base' load +/// is loading from. +bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, + unsigned Bytes, int Dist) const { + if (LD->getChain() != Base->getChain()) + return false; + EVT VT = LD->getValueType(0); + if (VT.getSizeInBits() / 8 != Bytes) + return false; + + SDValue Loc = LD->getOperand(1); + SDValue BaseLoc = Base->getOperand(1); + if (Loc.getOpcode() == ISD::FrameIndex) { + if (BaseLoc.getOpcode() != ISD::FrameIndex) + return false; + const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo(); + int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); + int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); + int FS = MFI->getObjectSize(FI); + int BFS = MFI->getObjectSize(BFI); + if (FS != BFS || FS != (int)Bytes) return false; + return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); + } + if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1)); + if (V && (V->getSExtValue() == Dist*Bytes)) + return true; + } + + GlobalValue *GV1 = NULL; + GlobalValue *GV2 = NULL; + int64_t Offset1 = 0; + int64_t Offset2 = 0; + bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + if (isGA1 && isGA2 && GV1 == GV2) + return Offset1 == (Offset2 + Dist*Bytes); + return false; +} + + +/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if +/// it cannot be inferred. +unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { + // If this is a GlobalAddress + cst, return the alignment. + GlobalValue *GV; + int64_t GVOffset = 0; + if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) + return MinAlign(GV->getAlignment(), GVOffset); + + // If this is a direct reference to a stack slot, use information about the + // stack slot's alignment. + int FrameIdx = 1 << 31; + int64_t FrameOffset = 0; + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { + FrameIdx = FI->getIndex(); + } else if (Ptr.getOpcode() == ISD::ADD && + isa<ConstantSDNode>(Ptr.getOperand(1)) && + isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + FrameOffset = Ptr.getConstantOperandVal(1); + } + + if (FrameIdx != (1 << 31)) { + // FIXME: Handle FI+CST. + const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); + unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), + FrameOffset); + if (MFI.isFixedObjectIndex(FrameIdx)) { + int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; + + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. + unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = MinAlign(ObjectOffset, StackAlign); + + // Finally, the frame object itself may have a known alignment. Factor + // the alignment + offset into a new alignment. For example, if we know + // the FI is 8 byte aligned, but the pointer is 4 off, we really have a + // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte + // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. + return std::max(Align, FIInfoAlign); + } + return FIInfoAlign; + } + + return 0; +} + void SelectionDAG::dump() const { errs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; @@ -5882,6 +6126,9 @@ void SelectionDAG::dump() const { errs() << "\n\n"; } +void SelectionDAG::NodeOrdering::dump() const { +} + void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { print_types(OS, G); print_details(OS, G); @@ -6022,4 +6269,3 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return false; return true; } - diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 57d8903..7568384 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -583,6 +583,9 @@ void SelectionDAGBuilder::visit(Instruction &I) { } void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { + // Tell the DAG that we're processing a new instruction. + DAG.NewInst(); + // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { @@ -2108,7 +2111,7 @@ void SelectionDAGBuilder::visitSelect(User &I) { for (unsigned i = 0; i != NumValues; ++i) Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), - TrueVal.getValueType(), Cond, + TrueVal.getNode()->getValueType(i), Cond, SDValue(TrueVal.getNode(), TrueVal.getResNo() + i), SDValue(FalseVal.getNode(), FalseVal.getResNo() + i)); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c39437f..a640c7d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -60,8 +60,6 @@ using namespace llvm; static cl::opt<bool> -DisableLegalizeTypes("disable-legalize-types", cl::Hidden); -static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " "instruction selector")); @@ -362,6 +360,39 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } +/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is +/// attached with this instruction. +static void SetDebugLoc(unsigned MDDbgKind, + MetadataContext &TheMetadata, + Instruction *I, + SelectionDAGBuilder *SDB, + FastISel *FastIS, + MachineFunction *MF) { + if (!isa<DbgInfoIntrinsic>(I)) + if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) { + DILocation DILoc(Dbg); + DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); + + SDB->setCurDebugLoc(Loc); + + if (FastIS) + FastIS->setCurDebugLoc(Loc); + + // If the function doesn't have a default debug location yet, set + // it. This is kind of a hack. + if (MF->getDefaultDebugLoc().isUnknown()) + MF->setDefaultDebugLoc(Loc); + } +} + +/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown. +static void ResetDebugLoc(SelectionDAGBuilder *SDB, + FastISel *FastIS) { + SDB->setCurDebugLoc(DebugLoc::getUnknownLoc()); + if (FastIS) + FastIS->setCurDebugLoc(DebugLoc::getUnknownLoc()); +} + void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, BasicBlock::iterator Begin, BasicBlock::iterator End, @@ -373,20 +404,16 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, // Lower all of the non-terminator instructions. If a call is emitted // as a tail call, cease emitting nodes for this block. for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { - if (MDDbgKind) { - // Update DebugLoc if debug information is attached with this - // instruction. - if (!isa<DbgInfoIntrinsic>(I)) - if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) { - DILocation DILoc(Dbg); - DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); - SDB->setCurDebugLoc(Loc); - if (MF->getDefaultDebugLoc().isUnknown()) - MF->setDefaultDebugLoc(Loc); - } - } - if (!isa<TerminatorInst>(I)) + if (MDDbgKind) + SetDebugLoc(MDDbgKind, TheMetadata, I, SDB, 0, MF); + + if (!isa<TerminatorInst>(I)) { SDB->visit(*I); + + // Set the current debug location back to "unknown" so that it doesn't + // spuriously apply to subsequent instructions. + ResetDebugLoc(SDB, 0); + } } if (!SDB->HasTailCall) { @@ -401,7 +428,9 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, HandlePHINodesInSuccessorBlocks(LLVMBB); // Lower the terminator after the copies are emitted. + SetDebugLoc(MDDbgKind, TheMetadata, LLVMBB->getTerminator(), SDB, 0, MF); SDB->visit(*LLVMBB->getTerminator()); + ResetDebugLoc(SDB, 0); } } @@ -498,75 +527,73 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Second step, hack on the DAG until it only uses operations and types that // the target supports. - if (!DisableLegalizeTypes) { - if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + - BlockName); + if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + + BlockName); + + bool Changed; + if (TimePassesIsEnabled) { + NamedRegionTimer T("Type Legalization", GroupName); + Changed = CurDAG->LegalizeTypes(); + } else { + Changed = CurDAG->LegalizeTypes(); + } + + DEBUG(errs() << "Type-legalized selection DAG:\n"); + DEBUG(CurDAG->dump()); - bool Changed; + if (Changed) { + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lt input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization", GroupName); - Changed = CurDAG->LegalizeTypes(); + NamedRegionTimer T("DAG Combining after legalize types", GroupName); + CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } else { - Changed = CurDAG->LegalizeTypes(); + CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DEBUG(errs() << "Type-legalized selection DAG:\n"); + DEBUG(errs() << "Optimized type-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); + } - if (Changed) { - if (ViewDAGCombineLT) - CurDAG->viewGraph("dag-combine-lt input for " + BlockName); - - // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize types", GroupName); - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); - } else { - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); - } - - DEBUG(errs() << "Optimized type-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); - } + if (TimePassesIsEnabled) { + NamedRegionTimer T("Vector Legalization", GroupName); + Changed = CurDAG->LegalizeVectors(); + } else { + Changed = CurDAG->LegalizeVectors(); + } + if (Changed) { if (TimePassesIsEnabled) { - NamedRegionTimer T("Vector Legalization", GroupName); - Changed = CurDAG->LegalizeVectors(); + NamedRegionTimer T("Type Legalization 2", GroupName); + Changed = CurDAG->LegalizeTypes(); } else { - Changed = CurDAG->LegalizeVectors(); + Changed = CurDAG->LegalizeTypes(); } - if (Changed) { - if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization 2", GroupName); - Changed = CurDAG->LegalizeTypes(); - } else { - Changed = CurDAG->LegalizeTypes(); - } - - if (ViewDAGCombineLT) - CurDAG->viewGraph("dag-combine-lv input for " + BlockName); + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lv input for " + BlockName); - // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize vectors", GroupName); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } else { - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } - - DEBUG(errs() << "Optimized vector-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + // Run the DAG combiner in post-type-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName); + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } else { + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } + + DEBUG(errs() << "Optimized vector-legalized selection DAG:\n"); + DEBUG(CurDAG->dump()); } if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); if (TimePassesIsEnabled) { NamedRegionTimer T("DAG Legalization", GroupName); - CurDAG->Legalize(DisableLegalizeTypes, OptLevel); + CurDAG->Legalize(OptLevel); } else { - CurDAG->Legalize(DisableLegalizeTypes, OptLevel); + CurDAG->Legalize(OptLevel); } DEBUG(errs() << "Legalized selection DAG:\n"); @@ -738,24 +765,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, FastIS->startNewBlock(BB); // Do FastISel on as many instructions as possible. for (; BI != End; ++BI) { - if (MDDbgKind) { - // Update DebugLoc if debug information is attached with this - // instruction. - if (!isa<DbgInfoIntrinsic>(BI)) - if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) { - DILocation DILoc(Dbg); - DebugLoc Loc = ExtractDebugLocation(DILoc, - MF.getDebugLocInfo()); - FastIS->setCurDebugLoc(Loc); - if (MF.getDefaultDebugLoc().isUnknown()) - MF.setDefaultDebugLoc(Loc); - } - } - // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. if (isa<TerminatorInst>(BI)) if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { + ResetDebugLoc(SDB, FastIS); if (EnableFastISelVerbose || EnableFastISelAbort) { errs() << "FastISel miss: "; BI->dump(); @@ -765,13 +779,18 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, break; } + if (MDDbgKind) + SetDebugLoc(MDDbgKind, TheMetadata, BI, SDB, FastIS, &MF); + // First try normal tablegen-generated "fast" selection. - if (FastIS->SelectInstruction(BI)) + if (FastIS->SelectInstruction(BI)) { + ResetDebugLoc(SDB, FastIS); continue; + } - // Next, try calling the target to attempt to handle the instruction. - if (FastIS->TargetSelectInstruction(BI)) - continue; + // Clear out the debug location so that it doesn't carry over to + // unrelated instructions. + ResetDebugLoc(SDB, FastIS); // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(BI)) { @@ -786,10 +805,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, R = FuncInfo->CreateRegForValue(BI); } - SDB->setCurDebugLoc(FastIS->getCurDebugLoc()); - bool HadTailCall = false; - SelectBasicBlock(LLVMBB, BI, next(BI), HadTailCall); + SelectBasicBlock(LLVMBB, BI, llvm::next(BI), HadTailCall); // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { @@ -823,9 +840,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // not handled by FastISel. If FastISel is not run, this is the entire // block. if (BI != End) { - // If FastISel is run and it has known DebugLoc then use it. - if (FastIS && !FastIS->getCurDebugLoc().isUnknown()) - SDB->setCurDebugLoc(FastIS->getCurDebugLoc()); bool HadTailCall; SelectBasicBlock(LLVMBB, BI, End, HadTailCall); } @@ -1313,14 +1327,6 @@ SDNode *SelectionDAGISel::Select_UNDEF(const SDValue &N) { N.getValueType()); } -SDNode *SelectionDAGISel::Select_DBG_LABEL(const SDValue &N) { - SDValue Chain = N.getOperand(0); - unsigned C = cast<LabelSDNode>(N)->getLabelID(); - SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32); - return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::DBG_LABEL, - MVT::Other, Tmp, Chain); -} - SDNode *SelectionDAGISel::Select_EH_LABEL(const SDValue &N) { SDValue Chain = N.getOperand(0); unsigned C = cast<LabelSDNode>(N)->getLabelID(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index c5adc50..83fa5a8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -29,14 +29,14 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" -#include <fstream> using namespace llvm; namespace llvm { template<> struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits { - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} static bool hasEdgeDestLabels() { return true; @@ -50,6 +50,11 @@ namespace llvm { return ((const SDNode *) Node)->getValueType(i).getEVTString(); } + template<typename EdgeIter> + static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { + return itostr(I - SDNodeIterator::begin((SDNode *) Node)); + } + /// edgeTargetsEdgeSource - This method returns true if this outgoing edge /// should actually target another edge source, not a node. If this method /// is implemented, getEdgeTarget should be implemented. @@ -76,12 +81,12 @@ namespace llvm { static bool renderGraphFromBottomUp() { return true; } - + static bool hasNodeAddressLabel(const SDNode *Node, const SelectionDAG *Graph) { return true; } - + /// If you want to override the dot attributes printed for a particular /// edge, override this method. template<typename EdgeIter> @@ -94,7 +99,7 @@ namespace llvm { return "color=blue,style=dashed"; return ""; } - + static std::string getSimpleNodeLabel(const SDNode *Node, const SelectionDAG *G) { @@ -132,7 +137,7 @@ namespace llvm { std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, const SelectionDAG *G) { - return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel (Node, G); + return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G); } @@ -142,7 +147,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), false, Title); #else errs() << "SelectionDAG::viewGraph is only available in debug builds on " @@ -186,7 +191,7 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { #ifndef NDEBUG std::map<const SDNode *, std::string>::const_iterator I = NodeGraphAttrs.find(N); - + if (I != NodeGraphAttrs.end()) return I->second; else @@ -252,8 +257,7 @@ void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) { // Visually mark that we hit the limit if (strcmp(Color, "red") == 0) { setSubgraphColorHelper(N, "blue", visited, 0, printed); - } - else if (strcmp(Color, "yellow") == 0) { + } else if (strcmp(Color, "yellow") == 0) { setSubgraphColorHelper(N, "green", visited, 0, printed); } } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 68bc2d6..1026169 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -911,7 +911,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TargetLoweringOpt &TLO, unsigned Depth) const { unsigned BitWidth = DemandedMask.getBitWidth(); - assert(Op.getValueSizeInBits() == BitWidth && + assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth && "Mask size mismatches value type size!"); APInt NewMask = DemandedMask; DebugLoc dl = Op.getDebugLoc(); @@ -1240,7 +1240,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // demand the input sign bit. APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); if (HighBits.intersects(NewMask)) - InDemandedMask |= APInt::getSignBit(VT.getSizeInBits()); + InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, TLO, Depth+1)) @@ -2184,48 +2184,6 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, } -/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a -/// location that is 'Dist' units away from the location that the 'Base' load -/// is loading from. -bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, - unsigned Bytes, int Dist, - const MachineFrameInfo *MFI) const { - if (LD->getChain() != Base->getChain()) - return false; - EVT VT = LD->getValueType(0); - if (VT.getSizeInBits() / 8 != Bytes) - return false; - - SDValue Loc = LD->getOperand(1); - SDValue BaseLoc = Base->getOperand(1); - if (Loc.getOpcode() == ISD::FrameIndex) { - if (BaseLoc.getOpcode() != ISD::FrameIndex) - return false; - int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); - int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); - int FS = MFI->getObjectSize(FI); - int BFS = MFI->getObjectSize(BFI); - if (FS != BFS || FS != (int)Bytes) return false; - return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); - } - if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1)); - if (V && (V->getSExtValue() == Dist*Bytes)) - return true; - } - - GlobalValue *GV1 = NULL; - GlobalValue *GV2 = NULL; - int64_t Offset1 = 0; - int64_t Offset2 = 0; - bool isGA1 = isGAPlusOffset(Loc.getNode(), GV1, Offset1); - bool isGA2 = isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); - if (isGA1 && isGA2 && GV1 == GV2) - return Offset1 == (Offset2 + Dist*Bytes); - return false; -} - - SDValue TargetLowering:: PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { // Default implementation: no optimization. diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 5876371..ed407eb 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -130,7 +130,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // See PR3149: // 172 %ECX<def> = MOV32rr %reg1039<kill> // 180 INLINEASM <es:subl $5,$1 - // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, %EAX<kill>, + // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, + // %EAX<kill>, // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0 // 188 %EAX<def> = MOV32rr %EAX<kill> // 196 %ECX<def> = MOV32rr %ECX<kill> @@ -281,12 +282,12 @@ TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) { } } -/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA -/// being the source and IntB being the dest, thus this defines a value number -/// in IntB. If the source value number (in IntA) is defined by a commutable -/// instruction and its other operand is coalesced to the copy dest register, -/// see if we can transform the copy into a noop by commuting the definition. For -/// example, +/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with +/// IntA being the source and IntB being the dest, thus this defines a value +/// number in IntB. If the source value number (in IntA) is defined by a +/// commutable instruction and its other operand is coalesced to the copy dest +/// register, see if we can transform the copy into a noop by commuting the +/// definition. For example, /// /// A3 = op A2 B0<kill> /// ... @@ -508,7 +509,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, if (BHasSubRegs) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { LiveInterval &SRLI = li_->getInterval(*SR); - SRLI.MergeInClobberRange(*li_, AI->start, End, li_->getVNInfoAllocator()); + SRLI.MergeInClobberRange(*li_, AI->start, End, + li_->getVNInfoAllocator()); } } } @@ -708,7 +710,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, checkForDeadDef = true; } - MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI)); + MachineBasicBlock::iterator MII = + llvm::next(MachineBasicBlock::iterator(CopyMI)); tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_); MachineInstr *NewMI = prior(MII); @@ -1314,7 +1317,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { "coalesced to another register.\n"); return false; // Not coalescable. } - } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){ + } else if (tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + if (SrcSubIdx && DstSubIdx && SrcSubIdx != DstSubIdx) { + // e.g. %reg16404:1<def> = MOV8rr %reg16412:2<kill> + Again = true; + return false; // Not coalescable. + } + } else { llvm_unreachable("Unrecognized copy instruction!"); } @@ -1611,9 +1620,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } } else { - // If the virtual register live interval is long but it has low use desity, - // do not join them, instead mark the physical register as its allocation - // preference. + // If the virtual register live interval is long but it has low use + // density, do not join them, instead mark the physical register as its + // allocation preference. LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg; unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; @@ -1938,6 +1947,10 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ if (Overlaps) { // If we haven't already recorded that this value # is safe, check it. if (!InVector(LHSIt->valno, EliminatedLHSVals)) { + // If it's re-defined by an early clobber somewhere in the live range, + // then conservatively abort coalescing. + if (LHSIt->valno->hasRedefByEC()) + return false; // Copy from the RHS? if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) return false; // Nope, bail out. @@ -1977,6 +1990,10 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ // if coalescing succeeds. Just skip the liverange. if (++LHSIt == LHSEnd) break; } else { + // If it's re-defined by an early clobber somewhere in the live range, + // then conservatively abort coalescing. + if (LHSIt->valno->hasRedefByEC()) + return false; // Otherwise, if this is a copy from the RHS, mark it as being merged // in. if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) { @@ -2316,6 +2333,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, if (LHSValNoAssignments[I->valno->id] != RHSValNoAssignments[J->valno->id]) return false; + // If it's re-defined by an early clobber somewhere in the live range, + // then conservatively abort coalescing. + if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC()) + return false; } if (I->end < J->end) { @@ -2401,9 +2422,15 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, // If this isn't a copy nor a extract_subreg, we can't join intervals. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + bool isInsUndef = false; if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(1).getReg(); + } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(2).getReg(); + if (Inst->getOperand(1).isUndef()) + isInsUndef = true; } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG || Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { DstReg = Inst->getOperand(0).getReg(); @@ -2413,7 +2440,8 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + if (isInsUndef || + (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())) ImpDefCopies.push_back(CopyRec(Inst, 0)); else if (SrcIsPhys || DstIsPhys) PhysCopies.push_back(CopyRec(Inst, 0)); @@ -2421,9 +2449,9 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, VirtCopies.push_back(CopyRec(Inst, 0)); } - // Try coalescing implicit copies first, followed by copies to / from - // physical registers, then finally copies from virtual registers to - // virtual registers. + // Try coalescing implicit copies and insert_subreg <undef> first, + // followed by copies to / from physical registers, then finally copies + // from virtual registers to virtual registers. for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { CopyRec &TheCopy = ImpDefCopies[i]; bool Again = false; @@ -2594,114 +2622,6 @@ void SimpleRegisterCoalescing::releaseMemory() { ReMatDefs.clear(); } -/// Returns true if the given live interval is zero length. -static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) { - for (LiveInterval::Ranges::const_iterator - i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) - if (i->end.getPrevIndex() > i->start) - return false; - return true; -} - - -void SimpleRegisterCoalescing::CalculateSpillWeights() { - SmallSet<unsigned, 4> Processed; - for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); - mbbi != mbbe; ++mbbi) { - MachineBasicBlock* MBB = mbbi; - SlotIndex MBBEnd = li_->getMBBEndIdx(MBB); - MachineLoop* loop = loopInfo->getLoopFor(MBB); - unsigned loopDepth = loop ? loop->getLoopDepth() : 0; - bool isExiting = loop ? loop->isLoopExiting(MBB) : false; - - for (MachineBasicBlock::const_iterator mii = MBB->begin(), mie = MBB->end(); - mii != mie; ++mii) { - const MachineInstr *MI = mii; - if (tii_->isIdentityCopy(*MI)) - continue; - - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) - continue; - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &mopi = MI->getOperand(i); - if (!mopi.isReg() || mopi.getReg() == 0) - continue; - unsigned Reg = mopi.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) - continue; - // Multiple uses of reg by the same instruction. It should not - // contribute to spill weight again. - if (!Processed.insert(Reg)) - continue; - - bool HasDef = mopi.isDef(); - bool HasUse = !HasDef; - for (unsigned j = i+1; j != e; ++j) { - const MachineOperand &mopj = MI->getOperand(j); - if (!mopj.isReg() || mopj.getReg() != Reg) - continue; - HasDef |= mopj.isDef(); - HasUse |= mopj.isUse(); - if (HasDef && HasUse) - break; - } - - LiveInterval &RegInt = li_->getInterval(Reg); - float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth); - if (HasDef && isExiting) { - // Looks like this is a loop count variable update. - SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); - const LiveRange *DLR = - li_->getInterval(Reg).getLiveRangeContaining(DefIdx); - if (DLR->end > MBBEnd) - Weight *= 3.0F; - } - RegInt.weight += Weight; - } - Processed.clear(); - } - } - - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { - LiveInterval &LI = *I->second; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // If the live interval length is essentially zero, i.e. in every live - // range the use follows def immediately, it doesn't make sense to spill - // it and hope it will be easier to allocate for this li. - if (isZeroLengthInterval(&LI, li_)) { - LI.weight = HUGE_VALF; - continue; - } - - bool isLoad = false; - SmallVector<LiveInterval*, 4> SpillIs; - if (li_->isReMaterializable(LI, SpillIs, isLoad)) { - // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If non of the defs are - // loads, then it's potentially very cheap to re-materialize. - // FIXME: this gets much more complicated once we support non-trivial - // re-materialization. - if (isLoad) - LI.weight *= 0.9F; - else - LI.weight *= 0.5F; - } - - // Slightly prefer live interval that has been assigned a preferred reg. - std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg); - if (Hint.first || Hint.second) - LI.weight *= 1.01F; - - // Divide the weight of the interval by its size. This encourages - // spilling of intervals that are large and have few uses, and - // discourages spilling of small intervals with many uses. - LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; - } - } -} - - bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { mf_ = &fn; mri_ = &fn.getRegInfo(); @@ -2727,7 +2647,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { joinIntervals(); DEBUG({ errs() << "********** INTERVALS POST JOINING **********\n"; - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){ + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); + I != E; ++I){ I->second->print(errs(), tri_); errs() << "\n"; } @@ -2768,7 +2689,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { DoDelete = true; } if (!DoDelete) - mii = next(mii); + mii = llvm::next(mii); else { li_->RemoveMachineInstrFromMaps(MI); mii = mbbi->erase(mii); @@ -2831,8 +2752,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { } } - CalculateSpillWeights(); - DEBUG(dump()); return true; } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 78f8a9a..605a740 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -244,10 +244,6 @@ namespace llvm { MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, unsigned Reg, SlotIndex &LastUseIdx) const; - /// CalculateSpillWeights - Compute spill weights for all virtual register - /// live intervals. - void CalculateSpillWeights(); - void printRegName(unsigned reg) const; }; diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 237d0b5..bc246c1 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -20,22 +20,25 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include <set> using namespace llvm; namespace { - enum SpillerName { trivial, standard }; + enum SpillerName { trivial, standard, splitting }; } static cl::opt<SpillerName> spillerOpt("spiller", cl::desc("Spiller to use: (default: standard)"), cl::Prefix, - cl::values(clEnumVal(trivial, "trivial spiller"), - clEnumVal(standard, "default spiller"), + cl::values(clEnumVal(trivial, "trivial spiller"), + clEnumVal(standard, "default spiller"), + clEnumVal(splitting, "splitting spiller"), clEnumValEnd), cl::init(standard)); +// Spiller virtual destructor implementation. Spiller::~Spiller() {} namespace { @@ -140,9 +143,9 @@ protected: // Insert store if necessary. if (hasDef) { - tii->storeRegToStackSlot(*mi->getParent(), next(miItr), newVReg, true, + tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, true, ss, trc); - MachineInstr *storeInstr(next(miItr)); + MachineInstr *storeInstr(llvm::next(miItr)); SlotIndex storeIndex = lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); SlotIndex beginIndex = storeIndex.getPrevIndex(); @@ -170,7 +173,8 @@ public: : SpillerBase(mf, lis, vrm) {} std::vector<LiveInterval*> spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &spillIs) { + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex*) { // Ignore spillIs - we don't use it. return trivialSpillEverywhere(li); } @@ -179,23 +183,336 @@ public: /// Falls back on LiveIntervals::addIntervalsForSpills. class StandardSpiller : public Spiller { -private: +protected: LiveIntervals *lis; const MachineLoopInfo *loopInfo; VirtRegMap *vrm; public: - StandardSpiller(MachineFunction *mf, LiveIntervals *lis, - const MachineLoopInfo *loopInfo, VirtRegMap *vrm) + StandardSpiller(LiveIntervals *lis, const MachineLoopInfo *loopInfo, + VirtRegMap *vrm) : lis(lis), loopInfo(loopInfo), vrm(vrm) {} /// Falls back on LiveIntervals::addIntervalsForSpills. std::vector<LiveInterval*> spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &spillIs) { + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex*) { return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); } }; +/// When a call to spill is placed this spiller will first try to break the +/// interval up into its component values (one new interval per value). +/// If this fails, or if a call is placed to spill a previously split interval +/// then the spiller falls back on the standard spilling mechanism. +class SplittingSpiller : public StandardSpiller { +public: + SplittingSpiller(MachineFunction *mf, LiveIntervals *lis, + const MachineLoopInfo *loopInfo, VirtRegMap *vrm) + : StandardSpiller(lis, loopInfo, vrm) { + + mri = &mf->getRegInfo(); + tii = mf->getTarget().getInstrInfo(); + tri = mf->getTarget().getRegisterInfo(); + } + + std::vector<LiveInterval*> spill(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex *earliestStart) { + + if (worthTryingToSplit(li)) { + return tryVNISplit(li, earliestStart); + } + // else + return StandardSpiller::spill(li, spillIs, earliestStart); + } + +private: + + MachineRegisterInfo *mri; + const TargetInstrInfo *tii; + const TargetRegisterInfo *tri; + DenseSet<LiveInterval*> alreadySplit; + + bool worthTryingToSplit(LiveInterval *li) const { + return (!alreadySplit.count(li) && li->getNumValNums() > 1); + } + + /// Try to break a LiveInterval into its component values. + std::vector<LiveInterval*> tryVNISplit(LiveInterval *li, + SlotIndex *earliestStart) { + + DEBUG(errs() << "Trying VNI split of %reg" << *li << "\n"); + + std::vector<LiveInterval*> added; + SmallVector<VNInfo*, 4> vnis; + + std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis)); + + for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(), + vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) { + VNInfo *vni = *vniItr; + + // Skip unused VNIs, or VNIs with no kills. + if (vni->isUnused() || vni->kills.empty()) + continue; + + DEBUG(errs() << " Extracted Val #" << vni->id << " as "); + LiveInterval *splitInterval = extractVNI(li, vni); + + if (splitInterval != 0) { + DEBUG(errs() << *splitInterval << "\n"); + added.push_back(splitInterval); + alreadySplit.insert(splitInterval); + if (earliestStart != 0) { + if (splitInterval->beginIndex() < *earliestStart) + *earliestStart = splitInterval->beginIndex(); + } + } else { + DEBUG(errs() << "0\n"); + } + } + + DEBUG(errs() << "Original LI: " << *li << "\n"); + + // If there original interval still contains some live ranges + // add it to added and alreadySplit. + if (!li->empty()) { + added.push_back(li); + alreadySplit.insert(li); + if (earliestStart != 0) { + if (li->beginIndex() < *earliestStart) + *earliestStart = li->beginIndex(); + } + } + + return added; + } + + /// Extract the given value number from the interval. + LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const { + assert(vni->isDefAccurate() || vni->isPHIDef()); + assert(!vni->kills.empty()); + + // Create a new vreg and live interval, copy VNI kills & ranges over. + const TargetRegisterClass *trc = mri->getRegClass(li->reg); + unsigned newVReg = mri->createVirtualRegister(trc); + vrm->grow(); + LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); + VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator()); + + // Start by copying all live ranges in the VN to the new interval. + for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end(); + rItr != rEnd; ++rItr) { + if (rItr->valno == vni) { + newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI)); + } + } + + // Erase the old VNI & ranges. + li->removeValNo(vni); + + // Collect all current uses of the register belonging to the given VNI. + // We'll use this to rename the register after we've dealt with the def. + std::set<MachineInstr*> uses; + for (MachineRegisterInfo::use_iterator + useItr = mri->use_begin(li->reg), useEnd = mri->use_end(); + useItr != useEnd; ++useItr) { + uses.insert(&*useItr); + } + + // Process the def instruction for this VNI. + if (newVNI->isPHIDef()) { + // Insert a copy at the start of the MBB. The range proceeding the + // copy will be attached to the original LiveInterval. + MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def); + tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc); + MachineInstr *copyMI = defMBB->begin(); + copyMI->addRegisterKilled(li->reg, tri); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB), + 0, false, lis->getVNInfoAllocator()); + phiDefVNI->setIsPHIDef(true); + phiDefVNI->addKill(copyIdx.getDefIndex()); + li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI)); + LiveRange *oldPHIDefRange = + newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB)); + + // If the old phi def starts in the middle of the range chop it up. + if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) { + LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end, + oldPHIDefRange->valno); + oldPHIDefRange->end = lis->getMBBStartIdx(defMBB); + newLI->addRange(oldPHIDefRange2); + } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) { + // Otherwise if it's at the start of the range just trim it. + oldPHIDefRange->start = copyIdx.getDefIndex(); + } else { + assert(false && "PHI def range doesn't cover PHI def?"); + } + + newVNI->def = copyIdx.getDefIndex(); + newVNI->setCopy(copyMI); + newVNI->setIsPHIDef(false); // not a PHI def anymore. + newVNI->setIsDefAccurate(true); + } else { + // non-PHI def. Rename the def. If it's two-addr that means renaming the use + // and inserting a new copy too. + MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def); + // We'll rename this now, so we can remove it from uses. + uses.erase(defInst); + unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg); + bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx), + twoAddrUseIsUndef = false; + + for (unsigned i = 0; i < defInst->getNumOperands(); ++i) { + MachineOperand &mo = defInst->getOperand(i); + if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) { + mo.setReg(newVReg); + if (isTwoAddr && mo.isUse() && mo.isUndef()) + twoAddrUseIsUndef = true; + } + } + + SlotIndex defIdx = lis->getInstructionIndex(defInst); + newVNI->def = defIdx.getDefIndex(); + + if (isTwoAddr && !twoAddrUseIsUndef) { + MachineBasicBlock *defMBB = defInst->getParent(); + tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc); + MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst)); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + copyMI->addRegisterKilled(li->reg, tri); + LiveRange *origUseRange = + li->getLiveRangeContaining(newVNI->def.getUseIndex()); + VNInfo *origUseVNI = origUseRange->valno; + origUseRange->end = copyIdx.getDefIndex(); + bool updatedKills = false; + for (unsigned k = 0; k < origUseVNI->kills.size(); ++k) { + if (origUseVNI->kills[k] == defIdx.getDefIndex()) { + origUseVNI->kills[k] = copyIdx.getDefIndex(); + updatedKills = true; + break; + } + } + assert(updatedKills && "Failed to update VNI kill list."); + VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI, + true, lis->getVNInfoAllocator()); + copyVNI->addKill(defIdx.getDefIndex()); + LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI); + newLI->addRange(copyRange); + } + } + + for (std::set<MachineInstr*>::iterator + usesItr = uses.begin(), usesEnd = uses.end(); + usesItr != usesEnd; ++usesItr) { + MachineInstr *useInst = *usesItr; + SlotIndex useIdx = lis->getInstructionIndex(useInst); + LiveRange *useRange = + newLI->getLiveRangeContaining(useIdx.getUseIndex()); + + // If this use doesn't belong to the new interval skip it. + if (useRange == 0) + continue; + + // This use doesn't belong to the VNI, skip it. + if (useRange->valno != newVNI) + continue; + + // Check if this instr is two address. + unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg); + bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx); + + // Rename uses (and defs for two-address instrs). + for (unsigned i = 0; i < useInst->getNumOperands(); ++i) { + MachineOperand &mo = useInst->getOperand(i); + if (mo.isReg() && (mo.isUse() || isTwoAddress) && + (mo.getReg() == li->reg)) { + mo.setReg(newVReg); + } + } + + // If this is a two address instruction we've got some extra work to do. + if (isTwoAddress) { + // We modified the def operand, so we need to copy back to the original + // reg. + MachineBasicBlock *useMBB = useInst->getParent(); + MachineBasicBlock::iterator useItr(useInst); + tii->copyRegToReg(*useMBB, next(useItr), li->reg, newVReg, trc, trc); + MachineInstr *copyMI = next(useItr); + copyMI->addRegisterKilled(newVReg, tri); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + + // Change the old two-address defined range & vni to start at + // (and be defined by) the copy. + LiveRange *origDefRange = + li->getLiveRangeContaining(useIdx.getDefIndex()); + origDefRange->start = copyIdx.getDefIndex(); + origDefRange->valno->def = copyIdx.getDefIndex(); + origDefRange->valno->setCopy(copyMI); + + // Insert a new range & vni for the two-address-to-copy value. This + // will be attached to the new live interval. + VNInfo *copyVNI = + newLI->getNextValue(useIdx.getDefIndex(), 0, true, + lis->getVNInfoAllocator()); + copyVNI->addKill(copyIdx.getDefIndex()); + LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI); + newLI->addRange(copyRange); + } + } + + // Iterate over any PHI kills - we'll need to insert new copies for them. + for (VNInfo::KillSet::iterator + killItr = newVNI->kills.begin(), killEnd = newVNI->kills.end(); + killItr != killEnd; ++killItr) { + SlotIndex killIdx(*killItr); + if (killItr->isPHI()) { + MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx); + LiveRange *oldKillRange = + newLI->getLiveRangeContaining(killIdx); + + assert(oldKillRange != 0 && "No kill range?"); + + tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(), + li->reg, newVReg, trc, trc); + MachineInstr *copyMI = prior(killMBB->getFirstTerminator()); + copyMI->addRegisterKilled(newVReg, tri); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + + // Save the current end. We may need it to add a new range if the + // current range runs of the end of the MBB. + SlotIndex newKillRangeEnd = oldKillRange->end; + oldKillRange->end = copyIdx.getDefIndex(); + + if (newKillRangeEnd != lis->getMBBEndIdx(killMBB).getNextSlot()) { + assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB).getNextSlot() && + "PHI kill range doesn't reach kill-block end. Not sane."); + newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB).getNextSlot(), + newKillRangeEnd, newVNI)); + } + + *killItr = oldKillRange->end; + VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(), + copyMI, true, + lis->getVNInfoAllocator()); + newKillVNI->addKill(lis->getMBBTerminatorGap(killMBB)); + newKillVNI->setHasPHIKill(true); + li->addRange(LiveRange(copyIdx.getDefIndex(), + lis->getMBBEndIdx(killMBB).getNextSlot(), + newKillVNI)); + } + + } + + newVNI->setHasPHIKill(false); + + return newLI; + } + +}; + } llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, @@ -203,7 +520,8 @@ llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) { switch (spillerOpt) { case trivial: return new TrivialSpiller(mf, lis, vrm); break; - case standard: return new StandardSpiller(mf, lis, loopInfo, vrm); break; + case standard: return new StandardSpiller(lis, loopInfo, vrm); break; + case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); break; default: llvm_unreachable("Unreachable!"); break; } } diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index c6bd985..dda52e8 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -21,6 +21,7 @@ namespace llvm { class MachineFunction; class MachineInstr; class MachineLoopInfo; + class SlotIndex; class VirtRegMap; class VNInfo; @@ -35,7 +36,8 @@ namespace llvm { /// Spill the given live range. The method used will depend on the Spiller /// implementation selected. virtual std::vector<LiveInterval*> spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &spillIs) = 0; + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex *earliestIndex = 0) = 0; }; diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index c299192..fd25a37 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -668,7 +668,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { if (DCELimit != -1 && (int)NumDead >= DCELimit) break; - MachineBasicBlock::iterator NextMI = next(I); + MachineBasicBlock::iterator NextMI = llvm::next(I); if (NextMI == MBB->end()) continue; int FirstSS, SecondSS; diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 9c0b596..bf58902 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -17,15 +17,19 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" using namespace llvm; +STATISTIC(NumTails , "Number of tails duplicated"); STATISTIC(NumTailDups , "Number of tail duplicated blocks"); STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); @@ -36,34 +40,71 @@ TailDuplicateSize("tail-dup-size", cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), cl::Hidden); +static cl::opt<bool> +TailDupVerify("tail-dup-verify", + cl::desc("Verify sanity of PHI instructions during taildup"), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> +TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); + +typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; + namespace { /// TailDuplicatePass - Perform tail duplication. class TailDuplicatePass : public MachineFunctionPass { + bool PreRegAlloc; const TargetInstrInfo *TII; MachineModuleInfo *MMI; + MachineRegisterInfo *MRI; + + // SSAUpdateVRs - A list of virtual registers for which to update SSA form. + SmallVector<unsigned, 16> SSAUpdateVRs; + + // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of + // source virtual registers. + DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; public: static char ID; - explicit TailDuplicatePass() : MachineFunctionPass(&ID) {} + explicit TailDuplicatePass(bool PreRA) : + MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "Tail Duplication"; } private: + void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB); + void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVector<std::pair<unsigned,unsigned>, 4> &Copies); + void DuplicateInstruction(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + MachineFunction &MF, + DenseMap<unsigned, unsigned> &LocalVRMap); + void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallSetVector<MachineBasicBlock*, 8> &Succs); bool TailDuplicateBlocks(MachineFunction &MF); - bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF); + bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVector<MachineInstr*, 16> &Copies); void RemoveDeadBlock(MachineBasicBlock *MBB); }; char TailDuplicatePass::ID = 0; } -FunctionPass *llvm::createTailDuplicatePass() { - return new TailDuplicatePass(); +FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) { + return new TailDuplicatePass(PreRegAlloc); } bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); + MRI = &MF.getRegInfo(); MMI = getAnalysisIfAvailable<MachineModuleInfo>(); bool MadeChange = false; @@ -77,36 +118,325 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { return MadeChange; } +static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = I; + SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(), + MBB->pred_end()); + MachineBasicBlock::iterator MI = MBB->begin(); + while (MI != MBB->end()) { + if (MI->getOpcode() != TargetInstrInfo::PHI) + break; + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + bool Found = false; + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); + if (PHIBB == PredBB) { + Found = true; + break; + } + } + if (!Found) { + errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + errs() << " missing input from predecessor BB#" + << PredBB->getNumber() << '\n'; + llvm_unreachable(0); + } + } + + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); + if (CheckExtra && !Preds.count(PHIBB)) { + // This is not a hard error. + errs() << "Warning: malformed PHI in BB#" << MBB->getNumber() + << ": " << *MI; + errs() << " extra input from predecessor BB#" + << PHIBB->getNumber() << '\n'; + } + if (PHIBB->getNumber() < 0) { + errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + errs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; + llvm_unreachable(0); + } + } + ++MI; + } + } +} + /// TailDuplicateBlocks - Look for small blocks that are unconditionally /// branched to and do not fall through. Tail-duplicate their instructions /// into their predecessors to eliminate (dynamic) branches. bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { bool MadeChange = false; + if (PreRegAlloc && TailDupVerify) { + DEBUG(errs() << "\n*** Before tail-duplicating\n"); + VerifyPHIs(MF, true); + } + + SmallVector<MachineInstr*, 8> NewPHIs; + MachineSSAUpdater SSAUpdate(MF, &NewPHIs); + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; + if (NumTails == TailDupLimit) + break; + // Only duplicate blocks that end with unconditional branches. if (MBB->canFallThrough()) continue; - MadeChange |= TailDuplicate(MBB, MF); - - // If it is dead, remove it. - if (MBB->pred_empty()) { - NumInstrDups -= MBB->size(); - RemoveDeadBlock(MBB); + // Save the successors list. + SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), + MBB->succ_end()); + + SmallVector<MachineBasicBlock*, 8> TDBBs; + SmallVector<MachineInstr*, 16> Copies; + if (TailDuplicate(MBB, MF, TDBBs, Copies)) { + ++NumTails; + + // TailBB's immediate successors are now successors of those predecessors + // which duplicated TailBB. Add the predecessors as sources to the PHI + // instructions. + bool isDead = MBB->pred_empty(); + if (PreRegAlloc) + UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); + + // If it is dead, remove it. + if (isDead) { + NumInstrDups -= MBB->size(); + RemoveDeadBlock(MBB); + ++NumDeadBlocks; + } + + // Update SSA form. + if (!SSAUpdateVRs.empty()) { + for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { + unsigned VReg = SSAUpdateVRs[i]; + SSAUpdate.Initialize(VReg); + + // If the original definition is still around, add it as an available + // value. + MachineInstr *DefMI = MRI->getVRegDef(VReg); + MachineBasicBlock *DefBB = 0; + if (DefMI) { + DefBB = DefMI->getParent(); + SSAUpdate.AddAvailableValue(DefBB, VReg); + } + + // Add the new vregs as available values. + DenseMap<unsigned, AvailableValsTy>::iterator LI = + SSAUpdateVals.find(VReg); + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + SSAUpdate.AddAvailableValue(SrcBB, SrcReg); + } + + // Rewrite uses that are outside of the original def's block. + MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); + while (UI != MRI->use_end()) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI->getParent() == DefBB) + continue; + SSAUpdate.RewriteUse(UseMO); + } + } + + SSAUpdateVRs.clear(); + SSAUpdateVals.clear(); + } + + // Eliminate some of the copies inserted tail duplication to maintain + // SSA form. + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + MachineInstr *Copy = Copies[i]; + unsigned Src, Dst, SrcSR, DstSR; + if (TII->isMoveInstr(*Copy, Src, Dst, SrcSR, DstSR)) { + MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); + if (++UI == MRI->use_end()) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); + } + } + } + + if (PreRegAlloc && TailDupVerify) + VerifyPHIs(MF, false); MadeChange = true; - ++NumDeadBlocks; } } + return MadeChange; } +static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, + const MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI->getParent() != BB) + return true; + } + return false; +} + +static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) + if (MI->getOperand(i+1).getMBB() == SrcBB) + return i; + return 0; +} + +/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for +/// SSA update. +void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB) { + DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg); + if (LI != SSAUpdateVals.end()) + LI->second.push_back(std::make_pair(BB, NewReg)); + else { + AvailableValsTy Vals; + Vals.push_back(std::make_pair(BB, NewReg)); + SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); + SSAUpdateVRs.push_back(OrigReg); + } +} + +/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. +/// Remember the source register that's contributed by PredBB and update SSA +/// update map. +void TailDuplicatePass::ProcessPHI(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVector<std::pair<unsigned,unsigned>, 4> &Copies) { + unsigned DefReg = MI->getOperand(0).getReg(); + unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); + assert(SrcOpIdx && "Unable to find matching PHI source?"); + unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DefReg); + LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); + + // Insert a copy from source to the end of the block. The def register is the + // available value liveout of the block. + unsigned NewDef = MRI->createVirtualRegister(RC); + Copies.push_back(std::make_pair(NewDef, SrcReg)); + if (isDefLiveOut(DefReg, TailBB, MRI)) + AddSSAUpdateEntry(DefReg, NewDef, PredBB); + + // Remove PredBB from the PHI node. + MI->RemoveOperand(SrcOpIdx+1); + MI->RemoveOperand(SrcOpIdx); + if (MI->getNumOperands() == 1) + MI->eraseFromParent(); +} + +/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update +/// the source operands due to earlier PHI translation. +void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + MachineFunction &MF, + DenseMap<unsigned, unsigned> &LocalVRMap) { + MachineInstr *NewMI = MF.CloneMachineInstr(MI); + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MO.isDef()) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + unsigned NewReg = MRI->createVirtualRegister(RC); + MO.setReg(NewReg); + LocalVRMap.insert(std::make_pair(Reg, NewReg)); + if (isDefLiveOut(Reg, TailBB, MRI)) + AddSSAUpdateEntry(Reg, NewReg, PredBB); + } else { + DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); + if (VI != LocalVRMap.end()) + MO.setReg(VI->second); + } + } + PredBB->insert(PredBB->end(), NewMI); +} + +/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor +/// blocks, the successors have gained new predecessors. Update the PHI +/// instructions in them accordingly. +void +TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallSetVector<MachineBasicBlock*,8> &Succs) { + for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), + SE = Succs.end(); SI != SE; ++SI) { + MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); + II != EE; ++II) { + if (II->getOpcode() != TargetInstrInfo::PHI) + break; + unsigned Idx = 0; + for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { + MachineOperand &MO = II->getOperand(i+1); + if (MO.getMBB() == FromBB) { + Idx = i; + break; + } + } + + assert(Idx != 0); + MachineOperand &MO0 = II->getOperand(Idx); + unsigned Reg = MO0.getReg(); + if (isDead) { + // Folded into the previous BB. + // There could be duplicate phi source entries. FIXME: Should sdisel + // or earlier pass fixed this? + for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { + MachineOperand &MO = II->getOperand(i+1); + if (MO.getMBB() == FromBB) { + II->RemoveOperand(i+1); + II->RemoveOperand(i); + } + } + II->RemoveOperand(Idx+1); + II->RemoveOperand(Idx); + } + DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); + if (LI != SSAUpdateVals.end()) { + // This register is defined in the tail block. + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + II->addOperand(MachineOperand::CreateReg(SrcReg, false)); + II->addOperand(MachineOperand::CreateMBB(SrcBB)); + } + } else { + // Live in tail block, must also be live in predecessors. + for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = TDBBs[j]; + II->addOperand(MachineOperand::CreateReg(Reg, false)); + II->addOperand(MachineOperand::CreateMBB(SrcBB)); + } + } + } + } +} + /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each /// of its predecessors. -bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, - MachineFunction &MF) { +bool +TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVector<MachineInstr*, 16> &Copies) { // Don't try to tail-duplicate single-block loops. if (TailBB->isSuccessor(TailBB)) return false; @@ -129,28 +459,36 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. - unsigned i = 0; + unsigned InstrCount = 0; bool HasCall = false; for (MachineBasicBlock::iterator I = TailBB->begin(); - I != TailBB->end(); ++I, ++i) { + I != TailBB->end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. if (I->getDesc().isNotDuplicable()) return false; + // Do not duplicate 'return' instructions if this is a pre-regalloc run. + // A return may expand into a lot more instructions (e.g. reload of callee + // saved registers) after PEI. + if (PreRegAlloc && I->getDesc().isReturn()) return false; // Don't duplicate more than the threshold. - if (i == MaxDuplicateCount) return false; + if (InstrCount == MaxDuplicateCount) return false; // Remember if we saw a call. if (I->getDesc().isCall()) HasCall = true; + if (I->getOpcode() != TargetInstrInfo::PHI) + InstrCount += 1; } // Heuristically, don't tail-duplicate calls if it would expand code size, // as it's less likely to be worth the extra cost. - if (i > 1 && HasCall) + if (InstrCount > 1 && HasCall) return false; + DEBUG(errs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; - SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), - TailBB->pred_end()); + SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; @@ -175,13 +513,35 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); + TDBBs.push_back(PredBB); + // Remove PredBB's unconditional branch. TII->RemoveBranch(*PredBB); + // Clone the contents of TailBB into PredBB. - for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); - I != E; ++I) { - MachineInstr *NewMI = MF.CloneMachineInstr(I); - PredBB->insert(PredBB->end(), NewMI); + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + while (I != TailBB->end()) { + MachineInstr *MI = &*I; + ++I; + if (MI->getOpcode() == TargetInstrInfo::PHI) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos); + } else { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap); + } + } + MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); + TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first, + CopyInfos[i].second, RC,RC); + MachineInstr *CopyMI = prior(Loc); + Copies.push_back(CopyMI); } NumInstrDups += TailBB->size() - 1; // subtract one for removed branch @@ -190,8 +550,8 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), - E = TailBB->succ_end(); I != E; ++I) - PredBB->addSuccessor(*I); + E = TailBB->succ_end(); I != E; ++I) + PredBB->addSuccessor(*I); Changed = true; ++NumTailDups; @@ -200,22 +560,56 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. - MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB)); + MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; SmallVector<MachineOperand, 4> PriorCond; bool PriorUnAnalyzable = - TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); + TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true); // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && - TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 && + TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 && !TailBB->hasAddressTaken()) { - DEBUG(errs() << "\nMerging into block: " << PrevBB + DEBUG(errs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); - PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end()); - PrevBB.removeSuccessor(PrevBB.succ_begin());; - assert(PrevBB.succ_empty()); - PrevBB.transferSuccessors(TailBB); + if (PreRegAlloc) { + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->getOpcode() == TargetInstrInfo::PHI) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos); + if (MI->getParent()) + MI->eraseFromParent(); + } + + // Now copy the non-PHI instructions. + while (I != TailBB->end()) { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + MachineInstr *MI = &*I++; + DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap); + MI->eraseFromParent(); + } + MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); + TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first, + CopyInfos[i].second, RC, RC); + MachineInstr *CopyMI = prior(Loc); + Copies.push_back(CopyMI); + } + } else { + // No PHIs to worry about, just splice the instructions over. + PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); + } + PrevBB->removeSuccessor(PrevBB->succ_begin()); + assert(PrevBB->succ_empty()); + PrevBB->transferSuccessors(TailBB); + TDBBs.push_back(PrevBB); Changed = true; } diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 102e2a3..393e315 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -329,7 +329,7 @@ TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr * return false; // For the def, it should be the only def of that register. - if (MO.isDef() && (next(MRI.def_begin(Reg)) != MRI.def_end() || + if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() || MRI.isLiveIn(Reg))) return false; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 5fa690b..98b95ac 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -211,7 +211,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, ++KillPos; unsigned NumVisited = 0; - for (MachineBasicBlock::iterator I = next(OldPos); I != KillPos; ++I) { + for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) { MachineInstr *OtherMI = I; if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. return false; @@ -412,7 +412,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); // If there are multiple defs, we can't do a simple analysis, so just // go with what the kill flag says. - if (next(Begin) != MRI->def_end()) + if (llvm::next(Begin) != MRI->def_end()) return true; DefMI = &*Begin; bool IsSrcPhys, IsDstPhys; @@ -643,7 +643,7 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, if (!Sunk) { DistanceMap.insert(std::make_pair(NewMI, Dist)); mi = NewMI; - nmi = next(mi); + nmi = llvm::next(mi); } return true; } @@ -923,7 +923,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { Processed.clear(); for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me; ) { - MachineBasicBlock::iterator nmi = next(mi); + MachineBasicBlock::iterator nmi = llvm::next(mi); const TargetInstrDesc &TID = mi->getDesc(); bool FirstTied = true; diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 10c8066..054c3b6 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -754,7 +754,7 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, } // Skip over the same register. - std::multimap<unsigned, int>::iterator NI = next(I); + std::multimap<unsigned, int>::iterator NI = llvm::next(I); while (NI != E && NI->first == Reg) { ++I; ++NI; @@ -1133,7 +1133,7 @@ private: std::vector<MachineOperand*> &KillOps, VirtRegMap &VRM) { - MachineBasicBlock::iterator NextMII = next(MII); + MachineBasicBlock::iterator NextMII = llvm::next(MII); if (NextMII == MBB.end()) return false; @@ -1186,7 +1186,7 @@ private: // Unfold next instructions that fold the same SS. do { MachineInstr &NextMI = *NextMII; - NextMII = next(NextMII); + NextMII = llvm::next(NextMII); NewMIs.clear(); if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) llvm_unreachable("Unable unfold the load / store folding instruction!"); @@ -1463,8 +1463,8 @@ private: std::vector<MachineOperand*> &KillOps, VirtRegMap &VRM) { - MachineBasicBlock::iterator oldNextMII = next(MII); - TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC); + MachineBasicBlock::iterator oldNextMII = llvm::next(MII); + TII->storeRegToStackSlot(MBB, llvm::next(MII), PhysReg, true, StackSlot, RC); MachineInstr *StoreMI = prior(oldNextMII); VRM.addSpillSlotUse(StackSlot, StoreMI); DEBUG(errs() << "Store:\t" << *StoreMI); @@ -1626,14 +1626,14 @@ private: DistanceMap.clear(); for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MII != E; ) { - MachineBasicBlock::iterator NextMII = next(MII); + MachineBasicBlock::iterator NextMII = llvm::next(MII); VirtRegMap::MI2VirtMapTy::const_iterator I, End; bool Erased = false; bool BackTracked = false; if (OptimizeByUnfold(MBB, MII, MaybeDeadStores, Spills, RegKills, KillOps, VRM)) - NextMII = next(MII); + NextMII = llvm::next(MII); MachineInstr &MI = *MII; @@ -1657,7 +1657,7 @@ private: // Back-schedule reloads and remats. MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(next(MII), MBB.begin(), PhysReg, TRI, false, + ComputeReloadLoc(llvm::next(MII), MBB.begin(), PhysReg, TRI, false, SS, TII, MF); TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SS, RC); @@ -1667,7 +1667,7 @@ private: ++NumPSpills; DistanceMap.insert(std::make_pair(LoadMI, Dist++)); } - NextMII = next(MII); + NextMII = llvm::next(MII); } // Insert restores here if asked to. @@ -1785,14 +1785,14 @@ private: const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); unsigned Phys = VRM.getPhys(VirtReg); int StackSlot = VRM.getStackSlot(VirtReg); - MachineBasicBlock::iterator oldNextMII = next(MII); - TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC); + MachineBasicBlock::iterator oldNextMII = llvm::next(MII); + TII->storeRegToStackSlot(MBB, llvm::next(MII), Phys, isKill, StackSlot, RC); MachineInstr *StoreMI = prior(oldNextMII); VRM.addSpillSlotUse(StackSlot, StoreMI); DEBUG(errs() << "Store:\t" << *StoreMI); VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); } - NextMII = next(MII); + NextMII = llvm::next(MII); } /// ReusedOperands - Keep track of operand reuse in case we need to undo @@ -2265,7 +2265,7 @@ private: if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot, Spills, RegKills, KillOps, TRI, VRM)) { - NextMII = next(MII); + NextMII = llvm::next(MII); BackTracked = true; goto ProcessNextInst; } @@ -2381,7 +2381,7 @@ private: MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; SpillRegToStackSlot(MBB, MII, -1, PhysReg, StackSlot, RC, true, LastStore, Spills, ReMatDefs, RegKills, KillOps, VRM); - NextMII = next(MII); + NextMII = llvm::next(MII); // Check to see if this is a noop copy. If so, eliminate the // instruction before considering the dest reg to be changed. |