diff options
Diffstat (limited to 'lib/CodeGen')
-rw-r--r-- | lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 49 | ||||
-rw-r--r-- | lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 37 | ||||
-rw-r--r-- | lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 7 | ||||
-rw-r--r-- | lib/CodeGen/DFAPacketizer.cpp | 89 | ||||
-rw-r--r-- | lib/CodeGen/LiveIntervalAnalysis.cpp | 12 | ||||
-rw-r--r-- | lib/CodeGen/MachineBasicBlock.cpp | 36 | ||||
-rw-r--r-- | lib/CodeGen/MachineBlockPlacement.cpp | 249 | ||||
-rw-r--r-- | lib/CodeGen/Passes.cpp | 55 | ||||
-rw-r--r-- | lib/CodeGen/ScheduleDAGInstrs.cpp | 6 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 3 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 8 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 5 | ||||
-rw-r--r-- | lib/CodeGen/SlotIndexes.cpp | 52 |
15 files changed, 374 insertions, 237 deletions
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 660684d..454a923 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -36,35 +36,20 @@ const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { llvm_unreachable("invalid AtomType!"); } -// The general case would need to have a less hard coded size for the -// length of the HeaderData, however, if we're constructing based on a -// single Atom then we know it will always be: 4 + 4 + 2 + 2. -DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) : - Header(12), - HeaderData(atom) { -} - // The length of the header data is always going to be 4 + 4 + 4*NumAtoms. -DwarfAccelTable::DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &atomList) : +DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) : Header(8 + (atomList.size() * 4)), - HeaderData(atomList) { -} + HeaderData(atomList), + Entries(Allocator) { } -DwarfAccelTable::~DwarfAccelTable() { - for (size_t i = 0, e = Data.size(); i < e; ++i) - delete Data[i]; - for (StringMap<DataArray>::iterator - EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) - for (DataArray::iterator DI = EI->second.begin(), - DE = EI->second.end(); DI != DE; ++DI) - delete (*DI); -} +DwarfAccelTable::~DwarfAccelTable() { } void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) { + assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. DataArray &DIEs = Entries[Name]; - DIEs.push_back(new HashDataContents(die, Flags)); + DIEs.push_back(new (Allocator) HashDataContents(die, Flags)); } void DwarfAccelTable::ComputeBucketCount(void) { @@ -85,31 +70,23 @@ void DwarfAccelTable::ComputeBucketCount(void) { Header.hashes_count = num; } -namespace { - // DIESorter - comparison predicate that sorts DIEs by their offset. - struct DIESorter { - bool operator()(const struct DwarfAccelTable::HashDataContents *A, - const struct DwarfAccelTable::HashDataContents *B) const { - return A->Die->getOffset() < B->Die->getOffset(); - } - }; +// compareDIEs - comparison predicate that sorts DIEs by their offset. +static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, + const DwarfAccelTable::HashDataContents *B) { + return A->Die->getOffset() < B->Die->getOffset(); } void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { // Create the individual hash data outputs. for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { - struct HashData *Entry = new HashData((*EI).getKeyData()); // Unique the entries. - std::stable_sort(EI->second.begin(), EI->second.end(), DIESorter()); + std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs); EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), EI->second.end()); - for (DataArray::const_iterator DI = EI->second.begin(), - DE = EI->second.end(); - DI != DE; ++DI) - Entry->addData((*DI)); + HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); Data.push_back(Entry); } @@ -216,7 +193,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { D->getStringPool()); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.size()); - for (std::vector<struct HashDataContents*>::const_iterator + for (ArrayRef<HashDataContents*>::const_iterator DI = (*HI)->Data.begin(), DE = (*HI)->Data.end(); DI != DE; ++DI) { // Emit the DIE offset diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 2278d4c..963b8cd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -15,6 +15,7 @@ #define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/DataTypes.h" @@ -164,22 +165,12 @@ public: private: struct TableHeaderData { - uint32_t die_offset_base; - std::vector<Atom> Atoms; + SmallVector<Atom, 1> Atoms; + + TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0) + : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { } - TableHeaderData(std::vector<DwarfAccelTable::Atom> &AtomList, - uint32_t offset = 0) : - die_offset_base(offset) { - for (size_t i = 0, e = AtomList.size(); i != e; ++i) - Atoms.push_back(AtomList[i]); - } - - TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0) - : die_offset_base(offset) { - Atoms.push_back(Atom); - } - #ifndef NDEBUG void print (raw_ostream &O) { O << "die_offset_base: " << die_offset_base << "\n"; @@ -221,11 +212,11 @@ private: StringRef Str; uint32_t HashValue; MCSymbol *Sym; - std::vector<struct HashDataContents*> Data; // offsets - HashData(StringRef S) : Str(S) { + ArrayRef<HashDataContents*> Data; // offsets + HashData(StringRef S, ArrayRef<HashDataContents*> Data) + : Str(S), Data(Data) { HashValue = DwarfAccelTable::HashDJB(S); } - void addData(struct HashDataContents *Datum) { Data.push_back(Datum); } #ifndef NDEBUG void print(raw_ostream &O) { O << "Name: " << Str << "\n"; @@ -255,15 +246,18 @@ private: void EmitHashes(AsmPrinter *); void EmitOffsets(AsmPrinter *, MCSymbol *); void EmitData(AsmPrinter *, DwarfDebug *D); - + + // Allocator for HashData and HashDataContents. + BumpPtrAllocator Allocator; + // Output Variables TableHeader Header; TableHeaderData HeaderData; std::vector<HashData*> Data; // String Data - typedef std::vector<struct HashDataContents*> DataArray; - typedef StringMap<DataArray> StringEntries; + typedef std::vector<HashDataContents*> DataArray; + typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries; StringEntries Entries; // Buckets/Hashes/Offsets @@ -274,8 +268,7 @@ private: // Public Implementation public: - DwarfAccelTable(DwarfAccelTable::Atom); - DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &); + DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); ~DwarfAccelTable(); void AddName(StringRef, DIE*, char = 0); void FinalizeTable(AsmPrinter *, const char *); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 69dc454..cc5b642 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1032,9 +1032,10 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add function template parameters. addTemplateParams(*SPDie, SP.getTemplateParams()); - // Unfortunately this code needs to stay here to work around - // a bug in older gdbs that requires the linkage name to resolve - // multiple template functions. + // Unfortunately this code needs to stay here instead of below the + // AT_specification code in order to work around a bug in older + // gdbs that requires the linkage name to resolve multiple template + // functions. StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index bfbe779..5ff641c 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -23,10 +23,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/MC/MCInstrItineraries.h" using namespace llvm; @@ -100,17 +100,17 @@ void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) { reserveResources(&MID); } -namespace llvm { +namespace { // DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides // Schedule method to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - MachineDominatorTree &MDT, bool IsPostRA); + MachineDominatorTree &MDT, bool IsPostRA); // Schedule - Actual scheduling work. void schedule(); }; -} +} // end anonymous namespace DefaultVLIWScheduler::DefaultVLIWScheduler( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, @@ -129,25 +129,49 @@ VLIWPacketizerList::VLIWPacketizerList( bool IsPostRA) : TM(MF.getTarget()), MF(MF) { TII = TM.getInstrInfo(); ResourceTracker = TII->CreateTargetScheduleState(&TM, 0); - VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); + SchedulerImpl = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); } // VLIWPacketizerList Dtor VLIWPacketizerList::~VLIWPacketizerList() { - if (VLIWScheduler) - delete VLIWScheduler; + delete SchedulerImpl; + delete ResourceTracker; +} + +// ignorePseudoInstruction - ignore pseudo instructions. +bool VLIWPacketizerList::ignorePseudoInstruction(MachineInstr *MI, + MachineBasicBlock *MBB) { + if (MI->isDebugValue()) + return true; + + if (TII->isSchedulingBoundary(MI, MBB, MF)) + return true; + + return false; +} + +// isSoloInstruction - return true if instruction I must end previous +// packet. +bool VLIWPacketizerList::isSoloInstruction(MachineInstr *I) { + if (I->isInlineAsm()) + return true; + + return false; +} - if (ResourceTracker) - delete ResourceTracker; +// addToPacket - Add I to the current packet and reserve resource. +void VLIWPacketizerList::addToPacket(MachineInstr *MI) { + CurrentPacketMIs.push_back(MI); + ResourceTracker->reserveResources(MI); } // endPacket - End the current packet, bundle packet instructions and reset // DFA state. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, - MachineInstr *MI) { + MachineInstr *I) { if (CurrentPacketMIs.size() > 1) { MachineInstr *MIFirst = CurrentPacketMIs.front(); - finalizeBundle(*MBB, MIFirst, MI); + finalizeBundle(*MBB, MIFirst, I); } CurrentPacketMIs.clear(); ResourceTracker->clearResources(); @@ -157,36 +181,31 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator BeginItr, MachineBasicBlock::iterator EndItr) { - assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); - VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size()); - VLIWScheduler->schedule(); - VLIWScheduler->exitRegion(); - - // Generate MI -> SU map. - //std::map <MachineInstr*, SUnit*> MIToSUnit; - MIToSUnit.clear(); - for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) { - SUnit *SU = &VLIWScheduler->SUnits[i]; - MIToSUnit[SU->getInstr()] = SU; - } + assert(MBB->end() == EndItr && "Bad EndIndex"); + + SchedulerImpl->enterRegion(MBB, BeginItr, EndItr, MBB->size()); + + // Build the DAG without reordering instructions. + SchedulerImpl->schedule(); + + // Remember scheduling units. + SUnits = SchedulerImpl->SUnits; // The main packetizer loop. for (; BeginItr != EndItr; ++BeginItr) { MachineInstr *MI = BeginItr; - this->initPacketizerState(); + // Ignore pseudo instructions. + if (ignorePseudoInstruction(MI, MBB)) + continue; // End the current packet if needed. - if (this->isSoloInstruction(MI)) { + if (isSoloInstruction(MI)) { endPacket(MBB, MI); continue; } - // Ignore pseudo instructions. - if (this->ignorePseudoInstruction(MI, MBB)) - continue; - - SUnit *SUI = MIToSUnit[MI]; + SUnit *SUI = SchedulerImpl->getSUnit(MI); assert(SUI && "Missing SUnit Info!"); // Ask DFA if machine resource is available for MI. @@ -196,13 +215,13 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); VI != VE; ++VI) { MachineInstr *MJ = *VI; - SUnit *SUJ = MIToSUnit[MJ]; + SUnit *SUJ = SchedulerImpl->getSUnit(MJ); assert(SUJ && "Missing SUnit Info!"); // Is it legal to packetize SUI and SUJ together. - if (!this->isLegalToPacketizeTogether(SUI, SUJ)) { + if (!isLegalToPacketizeTogether(SUI, SUJ)) { // Allow packetization if dependency can be pruned. - if (!this->isLegalToPruneDependencies(SUI, SUJ)) { + if (!isLegalToPruneDependencies(SUI, SUJ)) { // End the packet if dependency cannot be pruned. endPacket(MBB, MI); break; @@ -215,9 +234,11 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, } // Add MI to the current packet. - BeginItr = this->addToPacket(MI); + addToPacket(MI); } // For all instructions in BB. // End any packet left behind. endPacket(MBB, EndItr); + + SchedulerImpl->exitRegion(); } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 3ade660..934cc12 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1068,9 +1068,9 @@ public: #ifndef NDEBUG LIValidator validator; - std::for_each(Entering.begin(), Entering.end(), validator); - std::for_each(Internal.begin(), Internal.end(), validator); - std::for_each(Exiting.begin(), Exiting.end(), validator); + validator = std::for_each(Entering.begin(), Entering.end(), validator); + validator = std::for_each(Internal.begin(), Internal.end(), validator); + validator = std::for_each(Exiting.begin(), Exiting.end(), validator); assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness."); #endif @@ -1115,9 +1115,9 @@ public: #ifndef NDEBUG LIValidator validator; - std::for_each(Entering.begin(), Entering.end(), validator); - std::for_each(Internal.begin(), Internal.end(), validator); - std::for_each(Exiting.begin(), Exiting.end(), validator); + validator = std::for_each(Entering.begin(), Entering.end(), validator); + validator = std::for_each(Internal.begin(), Internal.end(), validator); + validator = std::for_each(Exiting.begin(), Exiting.end(), validator); assert(validator.rangesOk() && "moveAllOperandsInto broke liveness."); #endif } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 6c8a107..1abb8f2 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -392,22 +392,44 @@ void MachineBasicBlock::updateTerminator() { TII->InsertBranch(*this, TBB, 0, Cond, dl); } } else { + // Walk through the successors and find the successor which is not + // a landing pad and is not the conditional branch destination (in TBB) + // as the fallthrough successor. + MachineBasicBlock *FallthroughBB = 0; + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isLandingPad() || *SI == TBB) + continue; + assert(!FallthroughBB && "Found more than one fallthrough successor."); + FallthroughBB = *SI; + } + if (!FallthroughBB && canFallThrough()) { + // We fallthrough to the same basic block as the conditional jump + // targets. Remove the conditional jump, leaving unconditional + // fallthrough. + // FIXME: This does not seem like a reasonable pattern to support, but it + // has been seen in the wild coming out of degenerate ARM test cases. + TII->RemoveBranch(*this); + + // Finally update the unconditional successor to be reached via a branch + // if it would not be reached by fallthrough. + if (!isLayoutSuccessor(TBB)) + TII->InsertBranch(*this, TBB, 0, Cond, dl); + return; + } + // The block has a fallthrough conditional branch. - MachineBasicBlock *MBBA = *succ_begin(); - MachineBasicBlock *MBBB = *llvm::next(succ_begin()); - if (MBBA == TBB) std::swap(MBBB, MBBA); if (isLayoutSuccessor(TBB)) { if (TII->ReverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->InsertBranch(*this, MBBA, 0, Cond, dl); + TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); return; } TII->RemoveBranch(*this); - TII->InsertBranch(*this, MBBA, 0, Cond, dl); - } else if (!isLayoutSuccessor(MBBA)) { + TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); + } else if (!isLayoutSuccessor(FallthroughBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, MBBA, Cond, dl); + TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl); } } } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 22d7212..5ba6851 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -102,13 +102,13 @@ public: } /// \brief Iterator over blocks within the chain. - typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator iterator; + typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator; /// \brief Beginning of blocks within the chain. - iterator begin() const { return Blocks.begin(); } + iterator begin() { return Blocks.begin(); } /// \brief End of blocks within the chain. - iterator end() const { return Blocks.end(); } + iterator end() { return Blocks.end(); } /// \brief Merge a block chain into this one. /// @@ -211,12 +211,15 @@ class MachineBlockPlacement : public MachineFunctionPass { void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = 0); - MachineBasicBlock *findBestLoopTop(MachineFunction &F, - MachineLoop &L, + MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); + MachineBasicBlock *findBestLoopExit(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet); void buildLoopChains(MachineFunction &F, MachineLoop &L); + void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet); void buildCFGChains(MachineFunction &F); - void AlignLoops(MachineFunction &F); public: static char ID; // Pass identification, replacement for typeid @@ -540,13 +543,74 @@ void MachineBlockPlacement::buildChain( /// \brief Find the best loop top block for layout. /// +/// Look for a block which is strictly better than the loop header for laying +/// out at the top of the loop. This looks for one and only one pattern: +/// a latch block with no conditional exit. This block will cause a conditional +/// jump around it or will be the bottom of the loop if we lay it out in place, +/// but if it it doesn't end up at the bottom of the loop for any reason, +/// rotation alone won't fix it. Because such a block will always result in an +/// unconditional jump (for the backedge) rotating it in front of the loop +/// header is always profitable. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopTop(MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // Check that the header hasn't been fused with a preheader block due to + // crazy branches. If it has, we need to start with the header at the top to + // prevent pulling the preheader into the loop body. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return L.getHeader(); + + DEBUG(dbgs() << "Finding best loop top for: " + << getBlockName(L.getHeader()) << "\n"); + + BlockFrequency BestPredFreq; + MachineBasicBlock *BestPred = 0; + for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), + PE = L.getHeader()->pred_end(); + PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + if (!LoopBlockSet.count(Pred)) + continue; + DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " + << Pred->succ_size() << " successors, " + << MBFI->getBlockFreq(Pred) << " freq\n"); + if (Pred->succ_size() > 1) + continue; + + BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + if (!BestPred || PredFreq > BestPredFreq || + (!(PredFreq < BestPredFreq) && + Pred->isLayoutSuccessor(L.getHeader()))) { + BestPred = Pred; + BestPredFreq = PredFreq; + } + } + + // If no direct predecessor is fine, just use the loop header. + if (!BestPred) + return L.getHeader(); + + // Walk backwards through any straight line of predecessors. + while (BestPred->pred_size() == 1 && + (*BestPred->pred_begin())->succ_size() == 1 && + *BestPred->pred_begin() != L.getHeader()) + BestPred = *BestPred->pred_begin(); + + DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n"); + return BestPred; +} + + +/// \brief Find the best loop exiting block for layout. +/// /// This routine implements the logic to analyze the loop looking for the best /// block to layout at the top of the loop. Typically this is done to maximize /// fallthrough opportunities. MachineBasicBlock * -MachineBlockPlacement::findBestLoopTop(MachineFunction &F, - MachineLoop &L, - const BlockFilterSet &LoopBlockSet) { +MachineBlockPlacement::findBestLoopExit(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { // We don't want to layout the loop linearly in all cases. If the loop header // is just a normal basic block in the loop, we want to look for what block // within the loop is the best one to layout at the top. However, if the loop @@ -557,11 +621,11 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F, // header and only rotate if safe. BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; if (!LoopBlockSet.count(*HeaderChain.begin())) - return L.getHeader(); + return 0; BlockFrequency BestExitEdgeFreq; + unsigned BestExitLoopDepth = 0; MachineBasicBlock *ExitingBB = 0; - MachineBasicBlock *LoopingBB = 0; // If there are exits to outer loops, loop rotation can severely limit // fallthrough opportunites unless it selects such an exit. Keep a set of // blocks where rotating to exit with that block will reach an outer loop. @@ -584,15 +648,10 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F, // successor isn't found. MachineBasicBlock *OldExitingBB = ExitingBB; BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq; - // We also compute and store the best looping successor for use in layout. - MachineBasicBlock *BestLoopSucc = 0; + bool HasLoopingSucc = false; // FIXME: Due to the performance of the probability and weight routines in - // the MBPI analysis, we use the internal weights. This is only valid - // because it is purely a ranking function, we don't care about anything - // but the relative values. - uint32_t BestLoopSuccWeight = 0; - // FIXME: We also manually compute the probabilities to avoid quadratic - // behavior. + // the MBPI analysis, we use the internal weights and manually compute the + // probabilities to avoid quadratic behavior. uint32_t WeightScale = 0; uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale); for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), @@ -604,10 +663,8 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F, continue; BlockChain &SuccChain = *BlockToChain[*SI]; // Don't split chains, either this chain or the successor's chain. - if (&Chain == &SuccChain || *SI != *SuccChain.begin()) { - DEBUG(dbgs() << " " << (LoopBlockSet.count(*SI) ? "looping: " - : "exiting: ") - << getBlockName(*I) << " -> " + if (&Chain == &SuccChain) { + DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " (chain conflict)\n"); continue; } @@ -616,60 +673,103 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F, if (LoopBlockSet.count(*SI)) { DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " (" << SuccWeight << ")\n"); - if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight) - continue; - - BestLoopSucc = *SI; - BestLoopSuccWeight = SuccWeight; + HasLoopingSucc = true; continue; } + unsigned SuccLoopDepth = 0; + if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) { + SuccLoopDepth = ExitLoop->getLoopDepth(); + if (ExitLoop->contains(&L)) + BlocksExitingToOuterLoop.insert(*I); + } + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n"); + << getBlockName(*SI) << " [L:" << SuccLoopDepth + << "] (" << ExitEdgeFreq << ")\n"); // Note that we slightly bias this toward an existing layout successor to // retain incoming order in the absence of better information. // FIXME: Should we bias this more strongly? It's pretty weak. - if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq || + if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || + ExitEdgeFreq > BestExitEdgeFreq || ((*I)->isLayoutSuccessor(*SI) && !(ExitEdgeFreq < BestExitEdgeFreq))) { BestExitEdgeFreq = ExitEdgeFreq; ExitingBB = *I; } - - if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) - if (ExitLoop->contains(&L)) - BlocksExitingToOuterLoop.insert(*I); } // Restore the old exiting state, no viable looping successor was found. - if (!BestLoopSucc) { + if (!HasLoopingSucc) { ExitingBB = OldExitingBB; BestExitEdgeFreq = OldBestExitEdgeFreq; continue; } - - // If this was best exiting block thus far, also record the looping block. - if (ExitingBB == *I) - LoopingBB = BestLoopSucc; } - // Without a candidate exitting block or with only a single block in the + // Without a candidate exiting block or with only a single block in the // loop, just use the loop header to layout the loop. if (!ExitingBB || L.getNumBlocks() == 1) - return L.getHeader(); + return 0; // Also, if we have exit blocks which lead to outer loops but didn't select // one of them as the exiting block we are rotating toward, disable loop // rotation altogether. if (!BlocksExitingToOuterLoop.empty() && !BlocksExitingToOuterLoop.count(ExitingBB)) - return L.getHeader(); + return 0; - assert(LoopingBB && "All successors of a loop block are exit blocks!"); DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); - DEBUG(dbgs() << " Best top block: " << getBlockName(LoopingBB) << "\n"); - return LoopingBB; + return ExitingBB; +} + +/// \brief Attempt to rotate an exiting block to the bottom of the loop. +/// +/// Once we have built a chain, try to rotate it to line up the hot exit block +/// with fallthrough out of the loop if doing so doesn't introduce unnecessary +/// branches. For example, if the loop has fallthrough into its header and out +/// of its bottom already, don't rotate it. +void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, + MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet) { + if (!ExitingBB) + return; + + MachineBasicBlock *Top = *LoopChain.begin(); + bool ViableTopFallthrough = false; + for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(), + PE = Top->pred_end(); + PI != PE; ++PI) { + BlockChain *PredChain = BlockToChain[*PI]; + if (!LoopBlockSet.count(*PI) && + (!PredChain || *PI == *llvm::prior(PredChain->end()))) { + ViableTopFallthrough = true; + break; + } + } + + // If the header has viable fallthrough, check whether the current loop + // bottom is a viable exiting block. If so, bail out as rotating will + // introduce an unnecessary branch. + if (ViableTopFallthrough) { + MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end()); + for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), + SE = Bottom->succ_end(); + SI != SE; ++SI) { + BlockChain *SuccChain = BlockToChain[*SI]; + if (!LoopBlockSet.count(*SI) && + (!SuccChain || *SI == *SuccChain->begin())) + return; + } + } + + BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(), + ExitingBB); + if (ExitIt == LoopChain.end()) + return; + + std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end()); } /// \brief Forms basic block chains from the natural loop structures. @@ -688,8 +788,20 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, SmallVector<MachineBasicBlock *, 16> BlockWorkList; BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); - MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet); - BlockChain &LoopChain = *BlockToChain[LayoutTop]; + // First check to see if there is an obviously preferable top block for the + // loop. This will default to the header, but may end up as one of the + // predecessors to the header if there is one which will result in strictly + // fewer branches in the loop body. + MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet); + + // If we selected just the header for the loop top, look for a potentially + // profitable exit block in the event that rotating the loop can eliminate + // branches by placing an exit edge at the bottom. + MachineBasicBlock *ExitingBB = 0; + if (LoopTop == L.getHeader()) + ExitingBB = findBestLoopExit(F, L, LoopBlockSet); + + BlockChain &LoopChain = *BlockToChain[LoopTop]; // FIXME: This is a really lame way of walking the chains in the loop: we // walk the blocks, and use a set to prevent visiting a particular chain @@ -721,7 +833,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, BlockWorkList.push_back(*Chain.begin()); } - buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet); + buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); + rotateLoop(LoopChain, ExitingBB, LoopBlockSet); DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -733,7 +846,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; } for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); - BCI != BCE; ++BCI) + BCI != BCE; ++BCI) { + dbgs() << " ... " << getBlockName(*BCI) << "\n"; if (!LoopBlockSet.erase(*BCI)) { // We don't mark the loop as bad here because there are real situations // where this can occur. For example, with an unanalyzable fallthrough @@ -743,6 +857,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" << " Bad block: " << getBlockName(*BCI) << "\n"; } + } if (!LoopBlockSet.empty()) { BadLoop = true; @@ -882,28 +997,33 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) F.back().updateTerminator(); -} -/// \brief Recursive helper to align a loop and any nested loops. -static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) { - // Recurse through nested loops. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - AlignLoop(F, *I, Align); - - L->getTopBlock()->setAlignment(Align); -} - -/// \brief Align loop headers to target preferred alignments. -void MachineBlockPlacement::AlignLoops(MachineFunction &F) { + // Walk through the backedges of the function now that we have fully laid out + // the basic blocks and align the destination of each backedge. We don't rely + // on the loop info here so that we can align backedges in unnatural CFGs and + // backedges that were introduced purely because of the loop rotations done + // during this layout pass. + // FIXME: This isn't quite right, we shouldn't align backedges that result + // from blocks being sunken below the exit block for the function. if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) return; - unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) return; // Don't care about loop alignment. - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) - AlignLoop(F, *I, Align); + SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks; + for (BlockChain::iterator BI = FunctionChain.begin(), + BE = FunctionChain.end(); + BI != BE; ++BI) { + PreviousBlocks.insert(*BI); + // Set alignment on the destination of all the back edges in the new + // ordering. + for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(), + SE = (*BI)->succ_end(); + SI != SE; ++SI) + if (PreviousBlocks.count(*SI)) + (*SI)->setAlignment(Align); + } } bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { @@ -919,7 +1039,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { assert(BlockToChain.empty()); buildCFGChains(F); - AlignLoops(F); BlockToChain.clear(); ChainAllocator.DestroyAll(); diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 53d1fcf..490547b 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -37,8 +37,9 @@ static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, cl::desc("Disable tail duplication")); static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden, cl::desc("Disable pre-register allocation tail duplication")); -static cl::opt<bool> EnableBlockPlacement("enable-block-placement", - cl::Hidden, cl::desc("Enable probability-driven block placement")); +static cl::opt<bool> DisableBlockPlacement("disable-block-placement", + cl::Hidden, cl::desc("Disable the probability-driven block placement, and " + "re-enable the old code placement pass")); static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats", cl::Hidden, cl::desc("Collect probability-driven block placement stats")); static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden, @@ -206,7 +207,7 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) - : ImmutablePass(ID), TM(tm), PM(pm), Impl(0), Initialized(false), + : ImmutablePass(ID), TM(tm), PM(&pm), Impl(0), Initialized(false), DisableVerify(false), EnableTailMerge(true) { @@ -233,7 +234,7 @@ TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) { } TargetPassConfig::TargetPassConfig() - : ImmutablePass(ID), PM(*(PassManagerBase*)0) { + : ImmutablePass(ID), PM(0) { llvm_unreachable("TargetPassConfig should not be constructed on-the-fly"); } @@ -268,16 +269,16 @@ AnalysisID TargetPassConfig::addPass(char &ID) { Pass *P = Pass::createPass(FinalID); if (!P) llvm_unreachable("Pass ID not registered"); - PM.add(P); + PM->add(P); return FinalID; } void TargetPassConfig::printAndVerify(const char *Banner) const { if (TM->shouldPrintMachineCode()) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + PM->add(createMachineFunctionPrinterPass(dbgs(), Banner)); if (VerifyMachineCode) - PM.add(createMachineVerifierPass(Banner)); + PM->add(createMachineVerifierPass(Banner)); } /// Add common target configurable passes that perform LLVM IR to IR transforms @@ -287,46 +288,46 @@ void TargetPassConfig::addIRPasses() { // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. - PM.add(createTypeBasedAliasAnalysisPass()); - PM.add(createBasicAliasAnalysisPass()); + PM->add(createTypeBasedAliasAnalysisPass()); + PM->add(createBasicAliasAnalysisPass()); // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. if (!DisableVerify) - PM.add(createVerifierPass()); + PM->add(createVerifierPass()); // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { - PM.add(createLoopStrengthReducePass(getTargetLowering())); + PM->add(createLoopStrengthReducePass(getTargetLowering())); if (PrintLSR) - PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + PM->add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } - PM.add(createGCLoweringPass()); + PM->add(createGCLoweringPass()); // Make sure that no unreachable blocks are instruction selected. - PM.add(createUnreachableBlockEliminationPass()); + PM->add(createUnreachableBlockEliminationPass()); } /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. void TargetPassConfig::addISelPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) - PM.add(createCodeGenPreparePass(getTargetLowering())); + PM->add(createCodeGenPreparePass(getTargetLowering())); - PM.add(createStackProtectorPass(getTargetLowering())); + PM->add(createStackProtectorPass(getTargetLowering())); addPreISel(); if (PrintISelInput) - PM.add(createPrintFunctionPass("\n\n" - "*** Final LLVM Code input to ISel ***\n", - &dbgs())); + PM->add(createPrintFunctionPass("\n\n" + "*** Final LLVM Code input to ISel ***\n", + &dbgs())); // All passes which modify the LLVM IR are now complete; run the verifier // to ensure that the IR is valid. if (!DisableVerify) - PM.add(createVerifierPass()); + PM->add(createVerifierPass()); } /// Add the complete set of target-independent postISel code generator passes. @@ -404,7 +405,7 @@ void TargetPassConfig::addMachinePasses() { // GC addPass(GCMachineCodeAnalysisID); if (PrintGCInfo) - PM.add(createGCInfoPrinter(dbgs())); + PM->add(createGCInfoPrinter(dbgs())); // Basic block placement. if (getOptLevel() != CodeGenOpt::None) @@ -521,7 +522,7 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { addPass(PHIEliminationID); addPass(TwoAddressInstructionPassID); - PM.add(RegAllocPass); + PM->add(RegAllocPass); printAndVerify("After Register Allocation"); } @@ -563,7 +564,7 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { printAndVerify("After Machine Scheduling"); // Add the selected register allocation pass. - PM.add(RegAllocPass); + PM->add(RegAllocPass); printAndVerify("After Register Allocation"); // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature, @@ -610,10 +611,10 @@ void TargetPassConfig::addMachineLateOptimization() { /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { AnalysisID ID = &NoPassID; - if (EnableBlockPlacement) { - // MachineBlockPlacement is an experimental pass which is disabled by - // default currently. Eventually it should subsume CodePlacementOpt, so - // when enabled, the other is disabled. + if (!DisableBlockPlacement) { + // MachineBlockPlacement is a new pass which subsumes the functionality of + // CodPlacementOpt. The old code placement pass can be restored by + // disabling block placement, but eventually it will be removed. ID = addPass(MachineBlockPlacementID); } else { ID = addPass(CodePlacementOptID); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 6be1ab7..d46eb89 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -39,8 +39,8 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), - IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT), - FirstDbgValue(0) { + IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false), + LoopRegs(MLI, MDT), FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -554,7 +554,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { continue; } - assert(!MI->isTerminator() && !MI->isLabel() && + assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() && "Cannot schedule terminators or labels!"); SUnit *SU = MISUnitMap[MI]; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d1b998f..0914c66 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1080,6 +1080,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // If the root changed (e.g. it was a dead load, update the root). DAG.setRoot(Dummy.getValue()); + DAG.RemoveDeadNodes(); } SDValue DAGCombiner::visit(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3ae8345..9fe4480 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -417,7 +417,8 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); - return DAG.getNode(ISD::OR, DL, VT, Op1, Op2); + SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); } SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 69dd813..8ec1ae8 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -138,9 +138,11 @@ static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { // Don't add glue from a node to itself. if (GlueDestNode == N) return; - // Don't add glue to something which already has glue. - if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return; - + // Don't add glue to something that already has it, either as a use or value. + if (N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue || + N->getValueType(N->getNumValues() - 1) == MVT::Glue) { + return; + } for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) VTs.push_back(N->getValueType(I)); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 94cb958..f1e879b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5050,7 +5050,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::gcroot: if (GFI) { - const Value *Alloca = I.getArgOperand(0); + const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 09a2b1f..e341e15 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1367,8 +1367,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known - if ((KnownOne & KnownOne2) == KnownOne) { + // NB: it is okay if more bits are known than are requested + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side + if (KnownOne == KnownOne2) { // set bits are the same on both sides EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index c5bd3a3..26cf259 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -34,7 +34,8 @@ void SlotIndexes::releaseMemory() { mi2iMap.clear(); MBBRanges.clear(); idx2MBBMap.clear(); - clearList(); + indexList.clear(); + ileAllocator.Reset(); } bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { @@ -45,17 +46,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { // iterator in lock-step (though skipping it over indexes which have // null pointers in the instruction field). // At each iteration assert that the instruction pointed to in the index - // is the same one pointed to by the MI iterator. This + // is the same one pointed to by the MI iterator. This // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should // only need to be set up once after the first numbering is computed. mf = &fn; - initList(); // Check that the list contains only the sentinal. - assert(indexListHead->getNext() == 0 && - "Index list non-empty at initial numbering?"); + assert(indexList.empty() && "Index list non-empty at initial numbering?"); assert(idx2MBBMap.empty() && "Index -> MBB mapping non-empty at initial numbering?"); assert(MBBRanges.empty() && @@ -68,7 +67,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MBBRanges.resize(mf->getNumBlockIDs()); idx2MBBMap.reserve(mf->size()); - push_back(createEntry(0, index)); + indexList.push_back(createEntry(0, index)); // Iterate over the function. for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end(); @@ -76,7 +75,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MachineBasicBlock *mbb = &*mbbItr; // Insert an index for the MBB start. - SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block); + SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block); for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { @@ -85,20 +84,20 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { continue; // Insert a store index for the instr. - push_back(createEntry(mi, index += SlotIndex::InstrDist)); + indexList.push_back(createEntry(mi, index += SlotIndex::InstrDist)); // Save this base index in the maps. - mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), + mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(), SlotIndex::Slot_Block))); - + ++functionSize; } // We insert one blank instructions between basic blocks. - push_back(createEntry(0, index += SlotIndex::InstrDist)); + indexList.push_back(createEntry(0, index += SlotIndex::InstrDist)); MBBRanges[mbb->getNumber()].first = blockStartIndex; - MBBRanges[mbb->getNumber()].second = SlotIndex(back(), + MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(), SlotIndex::Slot_Block); idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); } @@ -119,38 +118,37 @@ void SlotIndexes::renumberIndexes() { unsigned index = 0; - for (IndexListEntry *curEntry = front(); curEntry != getTail(); - curEntry = curEntry->getNext()) { - curEntry->setIndex(index); + for (IndexList::iterator I = indexList.begin(), E = indexList.end(); + I != E; ++I) { + I->setIndex(index); index += SlotIndex::InstrDist; } } -// Renumber indexes locally after curEntry was inserted, but failed to get a new +// Renumber indexes locally after curItr was inserted, but failed to get a new // index. -void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) { +void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { // Number indexes with half the default spacing so we can catch up quickly. const unsigned Space = SlotIndex::InstrDist/2; assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); - IndexListEntry *start = curEntry->getPrev(); - unsigned index = start->getIndex(); - IndexListEntry *tail = getTail(); + IndexList::iterator startItr = prior(curItr); + unsigned index = startItr->getIndex(); do { - curEntry->setIndex(index += Space); - curEntry = curEntry->getNext(); + curItr->setIndex(index += Space); + ++curItr; // If the next index is bigger, we have caught up. - } while (curEntry != tail && curEntry->getIndex() <= index); + } while (curItr != indexList.end() && curItr->getIndex() <= index); - DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-' + DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-' << index << " ***\n"); ++NumLocalRenum; } void SlotIndexes::dump() const { - for (const IndexListEntry *itr = front(); itr != getTail(); - itr = itr->getNext()) { + for (IndexList::const_iterator itr = indexList.begin(); + itr != indexList.end(); ++itr) { dbgs() << itr->getIndex() << " "; if (itr->getInstr() != 0) { @@ -168,7 +166,7 @@ void SlotIndexes::dump() const { // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { if (isValid()) - os << entry().getIndex() << "Berd"[getSlot()]; + os << listEntry()->getIndex() << "Berd"[getSlot()]; else os << "invalid"; } |