diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/BranchFolding.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/BranchFolding.cpp | 245 |
1 files changed, 181 insertions, 64 deletions
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index 6fba161..3c439e6 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -1,4 +1,4 @@ -//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===// +//===- BranchFolding.cpp - Fold machine code branch instructions ----------===// // // The LLVM Compiler Infrastructure // @@ -18,37 +18,55 @@ //===----------------------------------------------------------------------===// #include "BranchFolding.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include <algorithm> +#include <cassert> +#include <cstddef> +#include <iterator> +#include <numeric> +#include <vector> + using namespace llvm; -#define DEBUG_TYPE "branchfolding" +#define DEBUG_TYPE "branch-folder" STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); STATISTIC(NumHoist , "Number of times common instructions are hoisted"); +STATISTIC(NumTailCalls, "Number of tail calls optimized"); static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", cl::init(cl::BOU_UNSET), cl::Hidden); @@ -67,10 +85,12 @@ TailMergeSize("tail-merge-size", cl::init(3), cl::Hidden); namespace { + /// BranchFolderPass - Wrap branch folder in a machine function pass. class BranchFolderPass : public MachineFunctionPass { public: static char ID; + explicit BranchFolderPass(): MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -82,12 +102,13 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char BranchFolderPass::ID = 0; char &llvm::BranchFolderPassID = BranchFolderPass::ID; -INITIALIZE_PASS(BranchFolderPass, "branch-folder", +INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE, "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { @@ -123,8 +144,6 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, } } -/// RemoveDeadBlock - Remove the specified dead machine basic block from the -/// function, updating the CFG. void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); @@ -144,9 +163,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { MLI->removeBlock(MBB); } -/// OptimizeFunction - Perhaps branch folding, tail merging and other -/// CFG optimizations on the given function. Block placement changes the layout -/// and may create new tail merging opportunities. bool BranchFolder::OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, @@ -156,13 +172,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TriedMerging.clear(); + MachineRegisterInfo &MRI = MF.getRegInfo(); AfterBlockPlacement = AfterPlacement; TII = tii; TRI = tri; MMI = mmi; MLI = mli; + this->MRI = &MRI; - MachineRegisterInfo &MRI = MF.getRegInfo(); UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF); if (!UpdateLiveIns) MRI.invalidateLiveness(); @@ -348,23 +365,18 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, return TailLen; } -/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything -/// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { TII->ReplaceTailWithBranchTo(OldInst, NewDest); if (UpdateLiveIns) { NewDest->clearLiveIns(); - computeLiveIns(LiveRegs, *TRI, *NewDest); + computeLiveIns(LiveRegs, *MRI, *NewDest); } ++NumTailMerge; } -/// SplitMBBAt - Given a machine basic block and an iterator into it, split the -/// MBB so that the part before the iterator falls into the part starting at the -/// iterator. This returns the new MBB. MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MachineBasicBlock::iterator BBI1, const BasicBlock *BB) { @@ -375,7 +387,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Create the fall-through block. MachineFunction::iterator MBBI = CurMBB.getIterator(); - MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); + MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(BB); CurMBB.getParent()->insert(++MBBI, NewMBB); // Move all the successors of this block to the specified block. @@ -388,7 +400,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); // NewMBB belongs to the same loop as CurMBB. - if (MLI) + if (MLI) if (MachineLoop *ML = MLI->getLoopFor(&CurMBB)) ML->addBasicBlockToLoop(NewMBB, MLI->getBase()); @@ -396,7 +408,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); if (UpdateLiveIns) - computeLiveIns(LiveRegs, *TRI, *NewMBB); + computeLiveIns(LiveRegs, *MRI, *NewMBB); // Add the new block to the funclet. const auto &FuncletI = FuncletMembership.find(&CurMBB); @@ -436,7 +448,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB)); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; - DebugLoc dl; // FIXME: this is nowhere + DebugLoc dl = CurMBB->findBranchDebugLoc(); if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { MachineBasicBlock *NextBB = &*I; if (TBB == NextBB && !Cond.empty() && !FBB) { @@ -497,6 +509,15 @@ BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS, return MBFI.printBlockFreq(OS, Freq); } +void BranchFolder::MBFIWrapper::view(const Twine &Name, bool isSimple) { + MBFI.view(Name, isSimple); +} + +uint64_t +BranchFolder::MBFIWrapper::getEntryFreq() const { + return MBFI.getEntryFreq(); +} + /// CountTerminators - Count the number of terminators in the given /// block and set I to the position of the first non-terminator, if there /// is one, or MBB->end() otherwise. @@ -504,7 +525,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, MachineBasicBlock::iterator &I) { I = MBB->end(); unsigned NumTerms = 0; - for (;;) { + while (true) { if (I == MBB->begin()) { I = MBB->end(); break; @@ -516,6 +537,17 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, return NumTerms; } +/// A no successor, non-return block probably ends in unreachable and is cold. +/// Also consider a block that ends in an indirect branch to be a return block, +/// since many targets use plain indirect branches to return. +static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) { + if (!MBB->succ_empty()) + return false; + if (MBB->empty()) + return true; + return !(MBB->back().isReturn() || MBB->back().isIndirectBranch()); +} + /// ProfitableToMerge - Check if two machine basic blocks have a common tail /// and decide if it would be profitable to merge those tails. Return the /// length of the common tail and iterators to the first common instruction @@ -570,6 +602,15 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, return true; } + // If these are identical non-return blocks with no successors, merge them. + // Such blocks are typically cold calls to noreturn functions like abort, and + // are unlikely to become a fallthrough target after machine block placement. + // Tail merging these blocks is unlikely to create additional unconditional + // branches, and will reduce the size of this cold code. + if (I1 == MBB1->begin() && I2 == MBB2->begin() && + blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2)) + return true; + // If one of the blocks can be completely merged and happens to be in // a position where the other could fall through into it, merge any number // of instructions, because it can be done without a branch. @@ -579,6 +620,22 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin()) return true; + // If both blocks are identical and end in a branch, merge them unless they + // both have a fallthrough predecessor and successor. + // We can only do this after block placement because it depends on whether + // there are fallthroughs, and we don't know until after layout. + if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) { + auto BothFallThrough = [](MachineBasicBlock *MBB) { + if (MBB->succ_size() != 0 && !MBB->canFallThrough()) + return false; + MachineFunction::iterator I(MBB); + MachineFunction *MF = MBB->getParent(); + return (MBB != &*MF->begin()) && std::prev(I)->canFallThrough(); + }; + if (!BothFallThrough(MBB1) || !BothFallThrough(MBB2)) + return true; + } + // If both blocks have an unconditional branch temporarily stripped out, // count that as an additional common instruction for the following // heuristics. This heuristic is only accurate for single-succ blocks, so to @@ -604,16 +661,6 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, (I1 == MBB1->begin() || I2 == MBB2->begin()); } -/// ComputeSameTails - Look through all the blocks in MergePotentials that have -/// hash CurHash (guaranteed to match the last element). Build the vector -/// SameTails of all those that have the (same) largest number of instructions -/// in common of any pair of these blocks. SameTails entries contain an -/// iterator into MergePotentials (from which the MachineBasicBlock can be -/// found) and a MachineBasicBlock::iterator into that MBB indicating the -/// instruction where the matching code sequence begins. -/// Order of elements in SameTails is the reverse of the order in which -/// those blocks appear in MergePotentials (where they are not necessarily -/// consecutive). unsigned BranchFolder::ComputeSameTails(unsigned CurHash, unsigned MinCommonTailLength, MachineBasicBlock *SuccBB, @@ -650,8 +697,6 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, return maxCommonTailLength; } -/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from -/// MergePotentials, restoring branches at ends of blocks as appropriate. void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB) { @@ -671,8 +716,6 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, MergePotentials.erase(CurMPIter, MergePotentials.end()); } -/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist -/// only of the common tail. Create a block that does by splitting one. bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, MachineBasicBlock *SuccBB, unsigned maxCommonTailLength, @@ -723,6 +766,43 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, return true; } +void BranchFolder::MergeCommonTailDebugLocs(unsigned commonTailIndex) { + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + + std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size()); + for (unsigned int i = 0 ; i != SameTails.size() ; ++i) { + if (i != commonTailIndex) + NextCommonInsts[i] = SameTails[i].getTailStartPos(); + else { + assert(SameTails[i].getTailStartPos() == MBB->begin() && + "MBB is not a common tail only block"); + } + } + + for (auto &MI : *MBB) { + if (MI.isDebugValue()) + continue; + DebugLoc DL = MI.getDebugLoc(); + for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) { + if (i == commonTailIndex) + continue; + + auto &Pos = NextCommonInsts[i]; + assert(Pos != SameTails[i].getBlock()->end() && + "Reached BB end within common tail"); + while (Pos->isDebugValue()) { + ++Pos; + assert(Pos != SameTails[i].getBlock()->end() && + "Reached BB end within common tail"); + } + assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!"); + DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc()); + NextCommonInsts[i] = ++Pos; + } + MI.setDebugLoc(DL); + } +} + static void mergeOperations(MachineBasicBlock::iterator MBBIStartPos, MachineBasicBlock &MBBCommon) { @@ -875,10 +955,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Recompute common tail MBB's edge weights and block frequency. setCommonTailEdgeWeights(*MBB); - // Remove the original debug location from the common tail. - for (auto &MI : *MBB) - if (!MI.isDebugValue()) - MI.setDebugLoc(DebugLoc()); + // Merge debug locations across identical instructions for common tail. + MergeCommonTailDebugLocs(commonTailIndex); // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. @@ -1043,7 +1121,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Remove the unconditional branch at the end, if any. if (TBB && (Cond.empty() || FBB)) { - DebugLoc dl; // FIXME: this is nowhere + DebugLoc dl = PBB->findBranchDebugLoc(); TII->removeBranch(*PBB); if (!Cond.empty()) // reinsert conditional branch only, for now @@ -1193,8 +1271,6 @@ static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { return DebugLoc(); } -/// OptimizeBlock - Analyze and optimize control flow related to the specified -/// block. This is never called on the entry block. bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; MachineFunction &MF = *MBB->getParent(); @@ -1386,6 +1462,43 @@ ReoptimizeBlock: } } + if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && + MF.getFunction()->optForSize()) { + // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch + // direction, thereby defeating careful block placement and regressing + // performance. Therefore, only consider this for optsize functions. + MachineInstr &TailCall = *MBB->getFirstNonDebugInstr(); + if (TII->isUnconditionalTailCall(TailCall)) { + MachineBasicBlock *Pred = *MBB->pred_begin(); + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; + SmallVector<MachineOperand, 4> PredCond; + bool PredAnalyzable = + !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true); + + if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB && + PredTBB != PredFBB) { + // The predecessor has a conditional branch to this block which consists + // of only a tail call. Try to fold the tail call into the conditional + // branch. + if (TII->canMakeTailCallConditional(PredCond, TailCall)) { + // TODO: It would be nice if analyzeBranch() could provide a pointer + // to the branch instruction so replaceBranchWithTailCall() doesn't + // have to search for it. + TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall); + ++NumTailCalls; + Pred->removeSuccessor(MBB); + MadeChange = true; + return MadeChange; + } + } + // If the predecessor is falling through to this block, we could reverse + // the branch condition and fold the tail call into that. However, after + // that we might have to re-arrange the CFG to fall through to the other + // block and there is a high risk of regressing code size rather than + // improving it. + } + } + // Analyze the branch in the current block. MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr; SmallVector<MachineOperand, 4> CurCond; @@ -1508,7 +1621,6 @@ ReoptimizeBlock: // block doesn't fall through into some other block, see if we can find a // place to move this block where a fall-through will happen. if (!PrevBB.canFallThrough()) { - // Now we know that there was no fall-through into this block, check to // see if it has a fall-through into its successor. bool CurFallsThru = MBB->canFallThrough(); @@ -1599,8 +1711,6 @@ ReoptimizeBlock: // Hoist Common Code //===----------------------------------------------------------------------===// -/// HoistCommonCode - Hoist common instruction sequences at the start of basic -/// blocks to their common predecessor. bool BranchFolder::HoistCommonCode(MachineFunction &MF) { bool MadeChange = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) { @@ -1734,9 +1844,6 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, return PI; } -/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction -/// sequence at the start of the function, move the instructions before MBB -/// terminator if it's legal. bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; @@ -1763,8 +1870,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { return false; bool HasDups = false; - SmallVector<unsigned, 4> LocalDefs; - SmallSet<unsigned, 4> LocalDefsSet; + SmallVector<unsigned, 4> LocalDefs, LocalKills; + SmallSet<unsigned, 4> ActiveDefsSet, AllDefsSet; MachineBasicBlock::iterator TIB = TBB->begin(); MachineBasicBlock::iterator FIB = FBB->begin(); MachineBasicBlock::iterator TIE = TBB->end(); @@ -1818,7 +1925,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { IsSafe = false; break; } - } else if (!LocalDefsSet.count(Reg)) { + } else if (!ActiveDefsSet.count(Reg)) { if (Defs.count(Reg)) { // Use is defined by the instruction at the point of insertion. IsSafe = false; @@ -1838,18 +1945,22 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore)) break; - // Remove kills from LocalDefsSet, these registers had short live ranges. + // Remove kills from ActiveDefsSet, these registers had short live ranges. for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; unsigned Reg = MO.getReg(); - if (!Reg || !LocalDefsSet.count(Reg)) + if (!Reg) continue; + if (!AllDefsSet.count(Reg)) { + LocalKills.push_back(Reg); + continue; + } if (TargetRegisterInfo::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LocalDefsSet.erase(*AI); + ActiveDefsSet.erase(*AI); } else { - LocalDefsSet.erase(Reg); + ActiveDefsSet.erase(Reg); } } @@ -1861,7 +1972,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg)) continue; LocalDefs.push_back(Reg); - addRegAndItsAliases(Reg, TRI, LocalDefsSet); + addRegAndItsAliases(Reg, TRI, ActiveDefsSet); + addRegAndItsAliases(Reg, TRI, AllDefsSet); } HasDups = true; @@ -1876,17 +1988,22 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { FBB->erase(FBB->begin(), FIB); // Update livein's. - bool AddedLiveIns = false; + bool ChangedLiveIns = false; for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Def = LocalDefs[i]; - if (LocalDefsSet.count(Def)) { + if (ActiveDefsSet.count(Def)) { TBB->addLiveIn(Def); FBB->addLiveIn(Def); - AddedLiveIns = true; + ChangedLiveIns = true; } } + for (unsigned K : LocalKills) { + TBB->removeLiveIn(K); + FBB->removeLiveIn(K); + ChangedLiveIns = true; + } - if (AddedLiveIns) { + if (ChangedLiveIns) { TBB->sortUniqueLiveIns(); FBB->sortUniqueLiveIns(); } |