summaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp49
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h37
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp7
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp89
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp12
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp36
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp249
-rw-r--r--lib/CodeGen/Passes.cpp55
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp5
-rw-r--r--lib/CodeGen/SlotIndexes.cpp52
15 files changed, 374 insertions, 237 deletions
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 660684d..454a923 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -36,35 +36,20 @@ const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
llvm_unreachable("invalid AtomType!");
}
-// The general case would need to have a less hard coded size for the
-// length of the HeaderData, however, if we're constructing based on a
-// single Atom then we know it will always be: 4 + 4 + 2 + 2.
-DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) :
- Header(12),
- HeaderData(atom) {
-}
-
// The length of the header data is always going to be 4 + 4 + 4*NumAtoms.
-DwarfAccelTable::DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &atomList) :
+DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) :
Header(8 + (atomList.size() * 4)),
- HeaderData(atomList) {
-}
+ HeaderData(atomList),
+ Entries(Allocator) { }
-DwarfAccelTable::~DwarfAccelTable() {
- for (size_t i = 0, e = Data.size(); i < e; ++i)
- delete Data[i];
- for (StringMap<DataArray>::iterator
- EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI)
- for (DataArray::iterator DI = EI->second.begin(),
- DE = EI->second.end(); DI != DE; ++DI)
- delete (*DI);
-}
+DwarfAccelTable::~DwarfAccelTable() { }
void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) {
+ assert(Data.empty() && "Already finalized!");
// If the string is in the list already then add this die to the list
// otherwise add a new one.
DataArray &DIEs = Entries[Name];
- DIEs.push_back(new HashDataContents(die, Flags));
+ DIEs.push_back(new (Allocator) HashDataContents(die, Flags));
}
void DwarfAccelTable::ComputeBucketCount(void) {
@@ -85,31 +70,23 @@ void DwarfAccelTable::ComputeBucketCount(void) {
Header.hashes_count = num;
}
-namespace {
- // DIESorter - comparison predicate that sorts DIEs by their offset.
- struct DIESorter {
- bool operator()(const struct DwarfAccelTable::HashDataContents *A,
- const struct DwarfAccelTable::HashDataContents *B) const {
- return A->Die->getOffset() < B->Die->getOffset();
- }
- };
+// compareDIEs - comparison predicate that sorts DIEs by their offset.
+static bool compareDIEs(const DwarfAccelTable::HashDataContents *A,
+ const DwarfAccelTable::HashDataContents *B) {
+ return A->Die->getOffset() < B->Die->getOffset();
}
void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) {
// Create the individual hash data outputs.
for (StringMap<DataArray>::iterator
EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
- struct HashData *Entry = new HashData((*EI).getKeyData());
// Unique the entries.
- std::stable_sort(EI->second.begin(), EI->second.end(), DIESorter());
+ std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs);
EI->second.erase(std::unique(EI->second.begin(), EI->second.end()),
EI->second.end());
- for (DataArray::const_iterator DI = EI->second.begin(),
- DE = EI->second.end();
- DI != DE; ++DI)
- Entry->addData((*DI));
+ HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second);
Data.push_back(Entry);
}
@@ -216,7 +193,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
D->getStringPool());
Asm->OutStreamer.AddComment("Num DIEs");
Asm->EmitInt32((*HI)->Data.size());
- for (std::vector<struct HashDataContents*>::const_iterator
+ for (ArrayRef<HashDataContents*>::const_iterator
DI = (*HI)->Data.begin(), DE = (*HI)->Data.end();
DI != DE; ++DI) {
// Emit the DIE offset
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 2278d4c..963b8cd 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -15,6 +15,7 @@
#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/DataTypes.h"
@@ -164,22 +165,12 @@ public:
private:
struct TableHeaderData {
-
uint32_t die_offset_base;
- std::vector<Atom> Atoms;
+ SmallVector<Atom, 1> Atoms;
+
+ TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0)
+ : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { }
- TableHeaderData(std::vector<DwarfAccelTable::Atom> &AtomList,
- uint32_t offset = 0) :
- die_offset_base(offset) {
- for (size_t i = 0, e = AtomList.size(); i != e; ++i)
- Atoms.push_back(AtomList[i]);
- }
-
- TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0)
- : die_offset_base(offset) {
- Atoms.push_back(Atom);
- }
-
#ifndef NDEBUG
void print (raw_ostream &O) {
O << "die_offset_base: " << die_offset_base << "\n";
@@ -221,11 +212,11 @@ private:
StringRef Str;
uint32_t HashValue;
MCSymbol *Sym;
- std::vector<struct HashDataContents*> Data; // offsets
- HashData(StringRef S) : Str(S) {
+ ArrayRef<HashDataContents*> Data; // offsets
+ HashData(StringRef S, ArrayRef<HashDataContents*> Data)
+ : Str(S), Data(Data) {
HashValue = DwarfAccelTable::HashDJB(S);
}
- void addData(struct HashDataContents *Datum) { Data.push_back(Datum); }
#ifndef NDEBUG
void print(raw_ostream &O) {
O << "Name: " << Str << "\n";
@@ -255,15 +246,18 @@ private:
void EmitHashes(AsmPrinter *);
void EmitOffsets(AsmPrinter *, MCSymbol *);
void EmitData(AsmPrinter *, DwarfDebug *D);
-
+
+ // Allocator for HashData and HashDataContents.
+ BumpPtrAllocator Allocator;
+
// Output Variables
TableHeader Header;
TableHeaderData HeaderData;
std::vector<HashData*> Data;
// String Data
- typedef std::vector<struct HashDataContents*> DataArray;
- typedef StringMap<DataArray> StringEntries;
+ typedef std::vector<HashDataContents*> DataArray;
+ typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries;
StringEntries Entries;
// Buckets/Hashes/Offsets
@@ -274,8 +268,7 @@ private:
// Public Implementation
public:
- DwarfAccelTable(DwarfAccelTable::Atom);
- DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &);
+ DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
~DwarfAccelTable();
void AddName(StringRef, DIE*, char = 0);
void FinalizeTable(AsmPrinter *, const char *);
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 69dc454..cc5b642 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1032,9 +1032,10 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
// Add function template parameters.
addTemplateParams(*SPDie, SP.getTemplateParams());
- // Unfortunately this code needs to stay here to work around
- // a bug in older gdbs that requires the linkage name to resolve
- // multiple template functions.
+ // Unfortunately this code needs to stay here instead of below the
+ // AT_specification code in order to work around a bug in older
+ // gdbs that requires the linkage name to resolve multiple template
+ // functions.
StringRef LinkageName = SP.getLinkageName();
if (!LinkageName.empty())
addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index bfbe779..5ff641c 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -23,10 +23,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
using namespace llvm;
@@ -100,17 +100,17 @@ void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
reserveResources(&MID);
}
-namespace llvm {
+namespace {
// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
// Schedule method to build the dependence graph.
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT, bool IsPostRA);
+ MachineDominatorTree &MDT, bool IsPostRA);
// Schedule - Actual scheduling work.
void schedule();
};
-}
+} // end anonymous namespace
DefaultVLIWScheduler::DefaultVLIWScheduler(
MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
@@ -129,25 +129,49 @@ VLIWPacketizerList::VLIWPacketizerList(
bool IsPostRA) : TM(MF.getTarget()), MF(MF) {
TII = TM.getInstrInfo();
ResourceTracker = TII->CreateTargetScheduleState(&TM, 0);
- VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+ SchedulerImpl = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
}
// VLIWPacketizerList Dtor
VLIWPacketizerList::~VLIWPacketizerList() {
- if (VLIWScheduler)
- delete VLIWScheduler;
+ delete SchedulerImpl;
+ delete ResourceTracker;
+}
+
+// ignorePseudoInstruction - ignore pseudo instructions.
+bool VLIWPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ if (MI->isDebugValue())
+ return true;
+
+ if (TII->isSchedulingBoundary(MI, MBB, MF))
+ return true;
+
+ return false;
+}
+
+// isSoloInstruction - return true if instruction I must end previous
+// packet.
+bool VLIWPacketizerList::isSoloInstruction(MachineInstr *I) {
+ if (I->isInlineAsm())
+ return true;
+
+ return false;
+}
- if (ResourceTracker)
- delete ResourceTracker;
+// addToPacket - Add I to the current packet and reserve resource.
+void VLIWPacketizerList::addToPacket(MachineInstr *MI) {
+ CurrentPacketMIs.push_back(MI);
+ ResourceTracker->reserveResources(MI);
}
// endPacket - End the current packet, bundle packet instructions and reset
// DFA state.
void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
- MachineInstr *MI) {
+ MachineInstr *I) {
if (CurrentPacketMIs.size() > 1) {
MachineInstr *MIFirst = CurrentPacketMIs.front();
- finalizeBundle(*MBB, MIFirst, MI);
+ finalizeBundle(*MBB, MIFirst, I);
}
CurrentPacketMIs.clear();
ResourceTracker->clearResources();
@@ -157,36 +181,31 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
MachineBasicBlock::iterator BeginItr,
MachineBasicBlock::iterator EndItr) {
- assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
- VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size());
- VLIWScheduler->schedule();
- VLIWScheduler->exitRegion();
-
- // Generate MI -> SU map.
- //std::map <MachineInstr*, SUnit*> MIToSUnit;
- MIToSUnit.clear();
- for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) {
- SUnit *SU = &VLIWScheduler->SUnits[i];
- MIToSUnit[SU->getInstr()] = SU;
- }
+ assert(MBB->end() == EndItr && "Bad EndIndex");
+
+ SchedulerImpl->enterRegion(MBB, BeginItr, EndItr, MBB->size());
+
+ // Build the DAG without reordering instructions.
+ SchedulerImpl->schedule();
+
+ // Remember scheduling units.
+ SUnits = SchedulerImpl->SUnits;
// The main packetizer loop.
for (; BeginItr != EndItr; ++BeginItr) {
MachineInstr *MI = BeginItr;
- this->initPacketizerState();
+ // Ignore pseudo instructions.
+ if (ignorePseudoInstruction(MI, MBB))
+ continue;
// End the current packet if needed.
- if (this->isSoloInstruction(MI)) {
+ if (isSoloInstruction(MI)) {
endPacket(MBB, MI);
continue;
}
- // Ignore pseudo instructions.
- if (this->ignorePseudoInstruction(MI, MBB))
- continue;
-
- SUnit *SUI = MIToSUnit[MI];
+ SUnit *SUI = SchedulerImpl->getSUnit(MI);
assert(SUI && "Missing SUnit Info!");
// Ask DFA if machine resource is available for MI.
@@ -196,13 +215,13 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
MachineInstr *MJ = *VI;
- SUnit *SUJ = MIToSUnit[MJ];
+ SUnit *SUJ = SchedulerImpl->getSUnit(MJ);
assert(SUJ && "Missing SUnit Info!");
// Is it legal to packetize SUI and SUJ together.
- if (!this->isLegalToPacketizeTogether(SUI, SUJ)) {
+ if (!isLegalToPacketizeTogether(SUI, SUJ)) {
// Allow packetization if dependency can be pruned.
- if (!this->isLegalToPruneDependencies(SUI, SUJ)) {
+ if (!isLegalToPruneDependencies(SUI, SUJ)) {
// End the packet if dependency cannot be pruned.
endPacket(MBB, MI);
break;
@@ -215,9 +234,11 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
}
// Add MI to the current packet.
- BeginItr = this->addToPacket(MI);
+ addToPacket(MI);
} // For all instructions in BB.
// End any packet left behind.
endPacket(MBB, EndItr);
+
+ SchedulerImpl->exitRegion();
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 3ade660..934cc12 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -1068,9 +1068,9 @@ public:
#ifndef NDEBUG
LIValidator validator;
- std::for_each(Entering.begin(), Entering.end(), validator);
- std::for_each(Internal.begin(), Internal.end(), validator);
- std::for_each(Exiting.begin(), Exiting.end(), validator);
+ validator = std::for_each(Entering.begin(), Entering.end(), validator);
+ validator = std::for_each(Internal.begin(), Internal.end(), validator);
+ validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness.");
#endif
@@ -1115,9 +1115,9 @@ public:
#ifndef NDEBUG
LIValidator validator;
- std::for_each(Entering.begin(), Entering.end(), validator);
- std::for_each(Internal.begin(), Internal.end(), validator);
- std::for_each(Exiting.begin(), Exiting.end(), validator);
+ validator = std::for_each(Entering.begin(), Entering.end(), validator);
+ validator = std::for_each(Internal.begin(), Internal.end(), validator);
+ validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
assert(validator.rangesOk() && "moveAllOperandsInto broke liveness.");
#endif
}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 6c8a107..1abb8f2 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -392,22 +392,44 @@ void MachineBasicBlock::updateTerminator() {
TII->InsertBranch(*this, TBB, 0, Cond, dl);
}
} else {
+ // Walk through the successors and find the successor which is not
+ // a landing pad and is not the conditional branch destination (in TBB)
+ // as the fallthrough successor.
+ MachineBasicBlock *FallthroughBB = 0;
+ for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+ if ((*SI)->isLandingPad() || *SI == TBB)
+ continue;
+ assert(!FallthroughBB && "Found more than one fallthrough successor.");
+ FallthroughBB = *SI;
+ }
+ if (!FallthroughBB && canFallThrough()) {
+ // We fallthrough to the same basic block as the conditional jump
+ // targets. Remove the conditional jump, leaving unconditional
+ // fallthrough.
+ // FIXME: This does not seem like a reasonable pattern to support, but it
+ // has been seen in the wild coming out of degenerate ARM test cases.
+ TII->RemoveBranch(*this);
+
+ // Finally update the unconditional successor to be reached via a branch
+ // if it would not be reached by fallthrough.
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ return;
+ }
+
// The block has a fallthrough conditional branch.
- MachineBasicBlock *MBBA = *succ_begin();
- MachineBasicBlock *MBBB = *llvm::next(succ_begin());
- if (MBBA == TBB) std::swap(MBBB, MBBA);
if (isLayoutSuccessor(TBB)) {
if (TII->ReverseBranchCondition(Cond)) {
// We can't reverse the condition, add an unconditional branch.
Cond.clear();
- TII->InsertBranch(*this, MBBA, 0, Cond, dl);
+ TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl);
return;
}
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, MBBA, 0, Cond, dl);
- } else if (!isLayoutSuccessor(MBBA)) {
+ TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl);
+ } else if (!isLayoutSuccessor(FallthroughBB)) {
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, MBBA, Cond, dl);
+ TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl);
}
}
}
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 22d7212..5ba6851 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -102,13 +102,13 @@ public:
}
/// \brief Iterator over blocks within the chain.
- typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator iterator;
+ typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
/// \brief Beginning of blocks within the chain.
- iterator begin() const { return Blocks.begin(); }
+ iterator begin() { return Blocks.begin(); }
/// \brief End of blocks within the chain.
- iterator end() const { return Blocks.end(); }
+ iterator end() { return Blocks.end(); }
/// \brief Merge a block chain into this one.
///
@@ -211,12 +211,15 @@ class MachineBlockPlacement : public MachineFunctionPass {
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = 0);
- MachineBasicBlock *findBestLoopTop(MachineFunction &F,
- MachineLoop &L,
+ MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopExit(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
void buildLoopChains(MachineFunction &F, MachineLoop &L);
+ void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
+ const BlockFilterSet &LoopBlockSet);
void buildCFGChains(MachineFunction &F);
- void AlignLoops(MachineFunction &F);
public:
static char ID; // Pass identification, replacement for typeid
@@ -540,13 +543,74 @@ void MachineBlockPlacement::buildChain(
/// \brief Find the best loop top block for layout.
///
+/// Look for a block which is strictly better than the loop header for laying
+/// out at the top of the loop. This looks for one and only one pattern:
+/// a latch block with no conditional exit. This block will cause a conditional
+/// jump around it or will be the bottom of the loop if we lay it out in place,
+/// but if it it doesn't end up at the bottom of the loop for any reason,
+/// rotation alone won't fix it. Because such a block will always result in an
+/// unconditional jump (for the backedge) rotating it in front of the loop
+/// header is always profitable.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // Check that the header hasn't been fused with a preheader block due to
+ // crazy branches. If it has, we need to start with the header at the top to
+ // prevent pulling the preheader into the loop body.
+ BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ if (!LoopBlockSet.count(*HeaderChain.begin()))
+ return L.getHeader();
+
+ DEBUG(dbgs() << "Finding best loop top for: "
+ << getBlockName(L.getHeader()) << "\n");
+
+ BlockFrequency BestPredFreq;
+ MachineBasicBlock *BestPred = 0;
+ for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
+ PE = L.getHeader()->pred_end();
+ PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+ if (!LoopBlockSet.count(Pred))
+ continue;
+ DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", "
+ << Pred->succ_size() << " successors, "
+ << MBFI->getBlockFreq(Pred) << " freq\n");
+ if (Pred->succ_size() > 1)
+ continue;
+
+ BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ if (!BestPred || PredFreq > BestPredFreq ||
+ (!(PredFreq < BestPredFreq) &&
+ Pred->isLayoutSuccessor(L.getHeader()))) {
+ BestPred = Pred;
+ BestPredFreq = PredFreq;
+ }
+ }
+
+ // If no direct predecessor is fine, just use the loop header.
+ if (!BestPred)
+ return L.getHeader();
+
+ // Walk backwards through any straight line of predecessors.
+ while (BestPred->pred_size() == 1 &&
+ (*BestPred->pred_begin())->succ_size() == 1 &&
+ *BestPred->pred_begin() != L.getHeader())
+ BestPred = *BestPred->pred_begin();
+
+ DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n");
+ return BestPred;
+}
+
+
+/// \brief Find the best loop exiting block for layout.
+///
/// This routine implements the logic to analyze the loop looking for the best
/// block to layout at the top of the loop. Typically this is done to maximize
/// fallthrough opportunities.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
- MachineLoop &L,
- const BlockFilterSet &LoopBlockSet) {
+MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
// within the loop is the best one to layout at the top. However, if the loop
@@ -557,11 +621,11 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
// header and only rotate if safe.
BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
if (!LoopBlockSet.count(*HeaderChain.begin()))
- return L.getHeader();
+ return 0;
BlockFrequency BestExitEdgeFreq;
+ unsigned BestExitLoopDepth = 0;
MachineBasicBlock *ExitingBB = 0;
- MachineBasicBlock *LoopingBB = 0;
// If there are exits to outer loops, loop rotation can severely limit
// fallthrough opportunites unless it selects such an exit. Keep a set of
// blocks where rotating to exit with that block will reach an outer loop.
@@ -584,15 +648,10 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
// successor isn't found.
MachineBasicBlock *OldExitingBB = ExitingBB;
BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
- // We also compute and store the best looping successor for use in layout.
- MachineBasicBlock *BestLoopSucc = 0;
+ bool HasLoopingSucc = false;
// FIXME: Due to the performance of the probability and weight routines in
- // the MBPI analysis, we use the internal weights. This is only valid
- // because it is purely a ranking function, we don't care about anything
- // but the relative values.
- uint32_t BestLoopSuccWeight = 0;
- // FIXME: We also manually compute the probabilities to avoid quadratic
- // behavior.
+ // the MBPI analysis, we use the internal weights and manually compute the
+ // probabilities to avoid quadratic behavior.
uint32_t WeightScale = 0;
uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
@@ -604,10 +663,8 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
continue;
BlockChain &SuccChain = *BlockToChain[*SI];
// Don't split chains, either this chain or the successor's chain.
- if (&Chain == &SuccChain || *SI != *SuccChain.begin()) {
- DEBUG(dbgs() << " " << (LoopBlockSet.count(*SI) ? "looping: "
- : "exiting: ")
- << getBlockName(*I) << " -> "
+ if (&Chain == &SuccChain) {
+ DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
<< getBlockName(*SI) << " (chain conflict)\n");
continue;
}
@@ -616,60 +673,103 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
if (LoopBlockSet.count(*SI)) {
DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> "
<< getBlockName(*SI) << " (" << SuccWeight << ")\n");
- if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight)
- continue;
-
- BestLoopSucc = *SI;
- BestLoopSuccWeight = SuccWeight;
+ HasLoopingSucc = true;
continue;
}
+ unsigned SuccLoopDepth = 0;
+ if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) {
+ SuccLoopDepth = ExitLoop->getLoopDepth();
+ if (ExitLoop->contains(&L))
+ BlocksExitingToOuterLoop.insert(*I);
+ }
+
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
- << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n");
+ << getBlockName(*SI) << " [L:" << SuccLoopDepth
+ << "] (" << ExitEdgeFreq << ")\n");
// Note that we slightly bias this toward an existing layout successor to
// retain incoming order in the absence of better information.
// FIXME: Should we bias this more strongly? It's pretty weak.
- if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq ||
+ if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
+ ExitEdgeFreq > BestExitEdgeFreq ||
((*I)->isLayoutSuccessor(*SI) &&
!(ExitEdgeFreq < BestExitEdgeFreq))) {
BestExitEdgeFreq = ExitEdgeFreq;
ExitingBB = *I;
}
-
- if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI))
- if (ExitLoop->contains(&L))
- BlocksExitingToOuterLoop.insert(*I);
}
// Restore the old exiting state, no viable looping successor was found.
- if (!BestLoopSucc) {
+ if (!HasLoopingSucc) {
ExitingBB = OldExitingBB;
BestExitEdgeFreq = OldBestExitEdgeFreq;
continue;
}
-
- // If this was best exiting block thus far, also record the looping block.
- if (ExitingBB == *I)
- LoopingBB = BestLoopSucc;
}
- // Without a candidate exitting block or with only a single block in the
+ // Without a candidate exiting block or with only a single block in the
// loop, just use the loop header to layout the loop.
if (!ExitingBB || L.getNumBlocks() == 1)
- return L.getHeader();
+ return 0;
// Also, if we have exit blocks which lead to outer loops but didn't select
// one of them as the exiting block we are rotating toward, disable loop
// rotation altogether.
if (!BlocksExitingToOuterLoop.empty() &&
!BlocksExitingToOuterLoop.count(ExitingBB))
- return L.getHeader();
+ return 0;
- assert(LoopingBB && "All successors of a loop block are exit blocks!");
DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
- DEBUG(dbgs() << " Best top block: " << getBlockName(LoopingBB) << "\n");
- return LoopingBB;
+ return ExitingBB;
+}
+
+/// \brief Attempt to rotate an exiting block to the bottom of the loop.
+///
+/// Once we have built a chain, try to rotate it to line up the hot exit block
+/// with fallthrough out of the loop if doing so doesn't introduce unnecessary
+/// branches. For example, if the loop has fallthrough into its header and out
+/// of its bottom already, don't rotate it.
+void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
+ MachineBasicBlock *ExitingBB,
+ const BlockFilterSet &LoopBlockSet) {
+ if (!ExitingBB)
+ return;
+
+ MachineBasicBlock *Top = *LoopChain.begin();
+ bool ViableTopFallthrough = false;
+ for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(),
+ PE = Top->pred_end();
+ PI != PE; ++PI) {
+ BlockChain *PredChain = BlockToChain[*PI];
+ if (!LoopBlockSet.count(*PI) &&
+ (!PredChain || *PI == *llvm::prior(PredChain->end()))) {
+ ViableTopFallthrough = true;
+ break;
+ }
+ }
+
+ // If the header has viable fallthrough, check whether the current loop
+ // bottom is a viable exiting block. If so, bail out as rotating will
+ // introduce an unnecessary branch.
+ if (ViableTopFallthrough) {
+ MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end());
+ for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
+ SE = Bottom->succ_end();
+ SI != SE; ++SI) {
+ BlockChain *SuccChain = BlockToChain[*SI];
+ if (!LoopBlockSet.count(*SI) &&
+ (!SuccChain || *SI == *SuccChain->begin()))
+ return;
+ }
+ }
+
+ BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(),
+ ExitingBB);
+ if (ExitIt == LoopChain.end())
+ return;
+
+ std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end());
}
/// \brief Forms basic block chains from the natural loop structures.
@@ -688,8 +788,20 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
- MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet);
- BlockChain &LoopChain = *BlockToChain[LayoutTop];
+ // First check to see if there is an obviously preferable top block for the
+ // loop. This will default to the header, but may end up as one of the
+ // predecessors to the header if there is one which will result in strictly
+ // fewer branches in the loop body.
+ MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
+
+ // If we selected just the header for the loop top, look for a potentially
+ // profitable exit block in the event that rotating the loop can eliminate
+ // branches by placing an exit edge at the bottom.
+ MachineBasicBlock *ExitingBB = 0;
+ if (LoopTop == L.getHeader())
+ ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
+
+ BlockChain &LoopChain = *BlockToChain[LoopTop];
// FIXME: This is a really lame way of walking the chains in the loop: we
// walk the blocks, and use a set to prevent visiting a particular chain
@@ -721,7 +833,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
BlockWorkList.push_back(*Chain.begin());
}
- buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet);
+ buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
+ rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
DEBUG({
// Crash at the end so we get all of the debugging output first.
@@ -733,7 +846,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
}
for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
- BCI != BCE; ++BCI)
+ BCI != BCE; ++BCI) {
+ dbgs() << " ... " << getBlockName(*BCI) << "\n";
if (!LoopBlockSet.erase(*BCI)) {
// We don't mark the loop as bad here because there are real situations
// where this can occur. For example, with an unanalyzable fallthrough
@@ -743,6 +857,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
<< " Bad block: " << getBlockName(*BCI) << "\n";
}
+ }
if (!LoopBlockSet.empty()) {
BadLoop = true;
@@ -882,28 +997,33 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
F.back().updateTerminator();
-}
-/// \brief Recursive helper to align a loop and any nested loops.
-static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) {
- // Recurse through nested loops.
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- AlignLoop(F, *I, Align);
-
- L->getTopBlock()->setAlignment(Align);
-}
-
-/// \brief Align loop headers to target preferred alignments.
-void MachineBlockPlacement::AlignLoops(MachineFunction &F) {
+ // Walk through the backedges of the function now that we have fully laid out
+ // the basic blocks and align the destination of each backedge. We don't rely
+ // on the loop info here so that we can align backedges in unnatural CFGs and
+ // backedges that were introduced purely because of the loop rotations done
+ // during this layout pass.
+ // FIXME: This isn't quite right, we shouldn't align backedges that result
+ // from blocks being sunken below the exit block for the function.
if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
return;
-
unsigned Align = TLI->getPrefLoopAlignment();
if (!Align)
return; // Don't care about loop alignment.
- for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
- AlignLoop(F, *I, Align);
+ SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks;
+ for (BlockChain::iterator BI = FunctionChain.begin(),
+ BE = FunctionChain.end();
+ BI != BE; ++BI) {
+ PreviousBlocks.insert(*BI);
+ // Set alignment on the destination of all the back edges in the new
+ // ordering.
+ for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(),
+ SE = (*BI)->succ_end();
+ SI != SE; ++SI)
+ if (PreviousBlocks.count(*SI))
+ (*SI)->setAlignment(Align);
+ }
}
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
@@ -919,7 +1039,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
assert(BlockToChain.empty());
buildCFGChains(F);
- AlignLoops(F);
BlockToChain.clear();
ChainAllocator.DestroyAll();
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 53d1fcf..490547b 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -37,8 +37,9 @@ static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
cl::desc("Disable tail duplication"));
static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
cl::desc("Disable pre-register allocation tail duplication"));
-static cl::opt<bool> EnableBlockPlacement("enable-block-placement",
- cl::Hidden, cl::desc("Enable probability-driven block placement"));
+static cl::opt<bool> DisableBlockPlacement("disable-block-placement",
+ cl::Hidden, cl::desc("Disable the probability-driven block placement, and "
+ "re-enable the old code placement pass"));
static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
@@ -206,7 +207,7 @@ TargetPassConfig::~TargetPassConfig() {
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
- : ImmutablePass(ID), TM(tm), PM(pm), Impl(0), Initialized(false),
+ : ImmutablePass(ID), TM(tm), PM(&pm), Impl(0), Initialized(false),
DisableVerify(false),
EnableTailMerge(true) {
@@ -233,7 +234,7 @@ TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetPassConfig::TargetPassConfig()
- : ImmutablePass(ID), PM(*(PassManagerBase*)0) {
+ : ImmutablePass(ID), PM(0) {
llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
}
@@ -268,16 +269,16 @@ AnalysisID TargetPassConfig::addPass(char &ID) {
Pass *P = Pass::createPass(FinalID);
if (!P)
llvm_unreachable("Pass ID not registered");
- PM.add(P);
+ PM->add(P);
return FinalID;
}
void TargetPassConfig::printAndVerify(const char *Banner) const {
if (TM->shouldPrintMachineCode())
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+ PM->add(createMachineFunctionPrinterPass(dbgs(), Banner));
if (VerifyMachineCode)
- PM.add(createMachineVerifierPass(Banner));
+ PM->add(createMachineVerifierPass(Banner));
}
/// Add common target configurable passes that perform LLVM IR to IR transforms
@@ -287,46 +288,46 @@ void TargetPassConfig::addIRPasses() {
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
- PM.add(createTypeBasedAliasAnalysisPass());
- PM.add(createBasicAliasAnalysisPass());
+ PM->add(createTypeBasedAliasAnalysisPass());
+ PM->add(createBasicAliasAnalysisPass());
// Before running any passes, run the verifier to determine if the input
// coming from the front-end and/or optimizer is valid.
if (!DisableVerify)
- PM.add(createVerifierPass());
+ PM->add(createVerifierPass());
// Run loop strength reduction before anything else.
if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
- PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ PM->add(createLoopStrengthReducePass(getTargetLowering()));
if (PrintLSR)
- PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ PM->add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
}
- PM.add(createGCLoweringPass());
+ PM->add(createGCLoweringPass());
// Make sure that no unreachable blocks are instruction selected.
- PM.add(createUnreachableBlockEliminationPass());
+ PM->add(createUnreachableBlockEliminationPass());
}
/// Add common passes that perform LLVM IR to IR transforms in preparation for
/// instruction selection.
void TargetPassConfig::addISelPrepare() {
if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
- PM.add(createCodeGenPreparePass(getTargetLowering()));
+ PM->add(createCodeGenPreparePass(getTargetLowering()));
- PM.add(createStackProtectorPass(getTargetLowering()));
+ PM->add(createStackProtectorPass(getTargetLowering()));
addPreISel();
if (PrintISelInput)
- PM.add(createPrintFunctionPass("\n\n"
- "*** Final LLVM Code input to ISel ***\n",
- &dbgs()));
+ PM->add(createPrintFunctionPass("\n\n"
+ "*** Final LLVM Code input to ISel ***\n",
+ &dbgs()));
// All passes which modify the LLVM IR are now complete; run the verifier
// to ensure that the IR is valid.
if (!DisableVerify)
- PM.add(createVerifierPass());
+ PM->add(createVerifierPass());
}
/// Add the complete set of target-independent postISel code generator passes.
@@ -404,7 +405,7 @@ void TargetPassConfig::addMachinePasses() {
// GC
addPass(GCMachineCodeAnalysisID);
if (PrintGCInfo)
- PM.add(createGCInfoPrinter(dbgs()));
+ PM->add(createGCInfoPrinter(dbgs()));
// Basic block placement.
if (getOptLevel() != CodeGenOpt::None)
@@ -521,7 +522,7 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
addPass(PHIEliminationID);
addPass(TwoAddressInstructionPassID);
- PM.add(RegAllocPass);
+ PM->add(RegAllocPass);
printAndVerify("After Register Allocation");
}
@@ -563,7 +564,7 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
printAndVerify("After Machine Scheduling");
// Add the selected register allocation pass.
- PM.add(RegAllocPass);
+ PM->add(RegAllocPass);
printAndVerify("After Register Allocation");
// FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
@@ -610,10 +611,10 @@ void TargetPassConfig::addMachineLateOptimization() {
/// Add standard basic block placement passes.
void TargetPassConfig::addBlockPlacement() {
AnalysisID ID = &NoPassID;
- if (EnableBlockPlacement) {
- // MachineBlockPlacement is an experimental pass which is disabled by
- // default currently. Eventually it should subsume CodePlacementOpt, so
- // when enabled, the other is disabled.
+ if (!DisableBlockPlacement) {
+ // MachineBlockPlacement is a new pass which subsumes the functionality of
+ // CodPlacementOpt. The old code placement pass can be restored by
+ // disabling block placement, but eventually it will be removed.
ID = addPass(MachineBlockPlacementID);
} else {
ID = addPass(CodePlacementOptID);
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 6be1ab7..d46eb89 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -39,8 +39,8 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
LiveIntervals *lis)
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
- IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT),
- FirstDbgValue(0) {
+ IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false),
+ LoopRegs(MLI, MDT), FirstDbgValue(0) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
@@ -554,7 +554,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
continue;
}
- assert(!MI->isTerminator() && !MI->isLabel() &&
+ assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() &&
"Cannot schedule terminators or labels!");
SUnit *SU = MISUnitMap[MI];
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d1b998f..0914c66 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1080,6 +1080,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// If the root changed (e.g. it was a dead load, update the root).
DAG.setRoot(Dummy.getValue());
+ DAG.RemoveDeadNodes();
}
SDValue DAGCombiner::visit(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3ae8345..9fe4480 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -417,7 +417,8 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
- return DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
+ SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
}
SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 69dd813..8ec1ae8 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -138,9 +138,11 @@ static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
// Don't add glue from a node to itself.
if (GlueDestNode == N) return;
- // Don't add glue to something which already has glue.
- if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;
-
+ // Don't add glue to something that already has it, either as a use or value.
+ if (N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue ||
+ N->getValueType(N->getNumValues() - 1) == MVT::Glue) {
+ return;
+ }
for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
VTs.push_back(N->getValueType(I));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 94cb958..f1e879b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5050,7 +5050,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::gcroot:
if (GFI) {
- const Value *Alloca = I.getArgOperand(0);
+ const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 09a2b1f..e341e15 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1367,8 +1367,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// bits on that side are also known to be set on the other side, turn this
// into an AND, as we know the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
- if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known
- if ((KnownOne & KnownOne2) == KnownOne) {
+ // NB: it is okay if more bits are known than are requested
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side
+ if (KnownOne == KnownOne2) { // set bits are the same on both sides
EVT VT = Op.getValueType();
SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index c5bd3a3..26cf259 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -34,7 +34,8 @@ void SlotIndexes::releaseMemory() {
mi2iMap.clear();
MBBRanges.clear();
idx2MBBMap.clear();
- clearList();
+ indexList.clear();
+ ileAllocator.Reset();
}
bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
@@ -45,17 +46,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
// iterator in lock-step (though skipping it over indexes which have
// null pointers in the instruction field).
// At each iteration assert that the instruction pointed to in the index
- // is the same one pointed to by the MI iterator. This
+ // is the same one pointed to by the MI iterator. This
// FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
// only need to be set up once after the first numbering is computed.
mf = &fn;
- initList();
// Check that the list contains only the sentinal.
- assert(indexListHead->getNext() == 0 &&
- "Index list non-empty at initial numbering?");
+ assert(indexList.empty() && "Index list non-empty at initial numbering?");
assert(idx2MBBMap.empty() &&
"Index -> MBB mapping non-empty at initial numbering?");
assert(MBBRanges.empty() &&
@@ -68,7 +67,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
MBBRanges.resize(mf->getNumBlockIDs());
idx2MBBMap.reserve(mf->size());
- push_back(createEntry(0, index));
+ indexList.push_back(createEntry(0, index));
// Iterate over the function.
for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end();
@@ -76,7 +75,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
MachineBasicBlock *mbb = &*mbbItr;
// Insert an index for the MBB start.
- SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block);
+ SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block);
for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
miItr != miEnd; ++miItr) {
@@ -85,20 +84,20 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
continue;
// Insert a store index for the instr.
- push_back(createEntry(mi, index += SlotIndex::InstrDist));
+ indexList.push_back(createEntry(mi, index += SlotIndex::InstrDist));
// Save this base index in the maps.
- mi2iMap.insert(std::make_pair(mi, SlotIndex(back(),
+ mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(),
SlotIndex::Slot_Block)));
-
+
++functionSize;
}
// We insert one blank instructions between basic blocks.
- push_back(createEntry(0, index += SlotIndex::InstrDist));
+ indexList.push_back(createEntry(0, index += SlotIndex::InstrDist));
MBBRanges[mbb->getNumber()].first = blockStartIndex;
- MBBRanges[mbb->getNumber()].second = SlotIndex(back(),
+ MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(),
SlotIndex::Slot_Block);
idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
}
@@ -119,38 +118,37 @@ void SlotIndexes::renumberIndexes() {
unsigned index = 0;
- for (IndexListEntry *curEntry = front(); curEntry != getTail();
- curEntry = curEntry->getNext()) {
- curEntry->setIndex(index);
+ for (IndexList::iterator I = indexList.begin(), E = indexList.end();
+ I != E; ++I) {
+ I->setIndex(index);
index += SlotIndex::InstrDist;
}
}
-// Renumber indexes locally after curEntry was inserted, but failed to get a new
+// Renumber indexes locally after curItr was inserted, but failed to get a new
// index.
-void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) {
+void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
// Number indexes with half the default spacing so we can catch up quickly.
const unsigned Space = SlotIndex::InstrDist/2;
assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
- IndexListEntry *start = curEntry->getPrev();
- unsigned index = start->getIndex();
- IndexListEntry *tail = getTail();
+ IndexList::iterator startItr = prior(curItr);
+ unsigned index = startItr->getIndex();
do {
- curEntry->setIndex(index += Space);
- curEntry = curEntry->getNext();
+ curItr->setIndex(index += Space);
+ ++curItr;
// If the next index is bigger, we have caught up.
- } while (curEntry != tail && curEntry->getIndex() <= index);
+ } while (curItr != indexList.end() && curItr->getIndex() <= index);
- DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-'
+ DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-'
<< index << " ***\n");
++NumLocalRenum;
}
void SlotIndexes::dump() const {
- for (const IndexListEntry *itr = front(); itr != getTail();
- itr = itr->getNext()) {
+ for (IndexList::const_iterator itr = indexList.begin();
+ itr != indexList.end(); ++itr) {
dbgs() << itr->getIndex() << " ";
if (itr->getInstr() != 0) {
@@ -168,7 +166,7 @@ void SlotIndexes::dump() const {
// Print a SlotIndex to a raw_ostream.
void SlotIndex::print(raw_ostream &os) const {
if (isValid())
- os << entry().getIndex() << "Berd"[getSlot()];
+ os << listEntry()->getIndex() << "Berd"[getSlot()];
else
os << "invalid";
}
OpenPOWER on IntegriCloud