summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp901
1 files changed, 608 insertions, 293 deletions
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index f5e3056..03dda8b 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -26,6 +26,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "BranchFolding.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -62,10 +64,12 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
"blocks in the function."),
cl::init(0), cl::Hidden);
-static cl::opt<unsigned>
- AlignAllLoops("align-all-loops",
- cl::desc("Force the alignment of all loops in the function."),
- cl::init(0), cl::Hidden);
+static cl::opt<unsigned> AlignAllNonFallThruBlocks(
+ "align-all-nofallthru-blocks",
+ cl::desc("Force the alignment of all "
+ "blocks that have no fall-through predecessors (i.e. don't add "
+ "nops that are executed)."),
+ cl::init(0), cl::Hidden);
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned> ExitBlockBias(
@@ -97,10 +101,15 @@ static cl::opt<bool>
cl::desc("Model the cost of loop rotation more "
"precisely by using profile data."),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ ForcePreciseRotationCost("force-precise-rotation-cost",
+ cl::desc("Force the use of precise cost "
+ "loop rotation strategy."),
+ cl::init(false), cl::Hidden);
static cl::opt<unsigned> MisfetchCost(
"misfetch-cost",
- cl::desc("Cost that models the probablistic risk of an instruction "
+ cl::desc("Cost that models the probabilistic risk of an instruction "
"misfetch due to a jump comparing to falling through, whose cost "
"is zero."),
cl::init(1), cl::Hidden);
@@ -109,6 +118,15 @@ static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
cl::desc("Cost of jump instructions."),
cl::init(1), cl::Hidden);
+static cl::opt<bool>
+BranchFoldPlacement("branch-fold-placement",
+ cl::desc("Perform branch folding during placement. "
+ "Reduces code size."),
+ cl::init(true), cl::Hidden);
+
+extern cl::opt<unsigned> StaticLikelyProb;
+extern cl::opt<unsigned> ProfileLikelyProb;
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
@@ -149,7 +167,7 @@ public:
/// function. It also registers itself as the chain that block participates
/// in with the BlockToChain mapping.
BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
- : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+ : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) {
assert(BB && "Cannot create a chain with a null basic block");
BlockToChain[BB] = this;
}
@@ -201,11 +219,16 @@ public:
}
#endif // NDEBUG
- /// \brief Count of predecessors within the loop currently being processed.
+ /// \brief Count of predecessors of any block within the chain which have not
+ /// yet been scheduled. In general, we will delay scheduling this chain
+ /// until those predecessors are scheduled (or we find a sufficiently good
+ /// reason to override this heuristic.) Note that when forming loop chains,
+ /// blocks outside the loop are ignored and treated as if they were already
+ /// scheduled.
///
- /// This count is updated at each loop we process to represent the number of
- /// in-loop predecessors of this chain.
- unsigned LoopPredecessors;
+ /// Note: This field is reinitialized multiple times - once for each loop,
+ /// and then once for the function as a whole.
+ unsigned UnscheduledPredecessors;
};
}
@@ -214,14 +237,21 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A typedef for a block filter set.
typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+ /// \brief work lists of blocks that are ready to be laid out
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+ SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
+
+ /// \brief Machine Function
+ MachineFunction *F;
+
/// \brief A handle to the branch probability pass.
const MachineBranchProbabilityInfo *MBPI;
/// \brief A handle to the function-wide block frequency pass.
- const MachineBlockFrequencyInfo *MBFI;
+ std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
/// \brief A handle to the loop info.
- const MachineLoopInfo *MLI;
+ MachineLoopInfo *MLI;
/// \brief A handle to the target's instruction info.
const TargetInstrInfo *TII;
@@ -254,33 +284,56 @@ class MachineBlockPlacement : public MachineFunctionPass {
DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
- SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = nullptr);
+ BranchProbability
+ collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
+ SmallVector<MachineBasicBlock *, 4> &Successors);
+ bool shouldPredBlockBeOutlined(MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
+ BranchProbability SuccProb,
+ BranchProbability HotProb);
+ bool
+ hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ BlockChain &SuccChain, BranchProbability SuccProb,
+ BranchProbability RealSuccProb, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
BlockChain &Chain,
const BlockFilterSet *BlockFilter);
MachineBasicBlock *
selectBestCandidateBlock(BlockChain &Chain,
- SmallVectorImpl<MachineBasicBlock *> &WorkList,
- const BlockFilterSet *BlockFilter);
+ SmallVectorImpl<MachineBasicBlock *> &WorkList);
MachineBasicBlock *
- getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain,
+ getFirstUnplacedBlock(const BlockChain &PlacedChain,
MachineFunction::iterator &PrevUnplacedBlockIt,
const BlockFilterSet *BlockFilter);
+
+ /// \brief Add a basic block to the work list if it is appropriate.
+ ///
+ /// If the optional parameter BlockFilter is provided, only MBB
+ /// present in the set will be added to the worklist. If nullptr
+ /// is provided, no filtering occurs.
+ void fillWorkLists(MachineBasicBlock *MBB,
+ SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
+ const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
- SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
- MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
+ MachineBasicBlock *findBestLoopExit(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
- BlockFilterSet collectLoopBlockSet(MachineFunction &F, MachineLoop &L);
- void buildLoopChains(MachineFunction &F, MachineLoop &L);
+ BlockFilterSet collectLoopBlockSet(MachineLoop &L);
+ void buildLoopChains(MachineLoop &L);
void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
const BlockFilterSet &LoopBlockSet);
void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
- void buildCFGChains(MachineFunction &F);
+ void collectMustExecuteBBs();
+ void buildCFGChains();
+ void optimizeBranches();
+ void alignBlocks();
public:
static char ID; // Pass identification, replacement for typeid
@@ -295,6 +348,7 @@ public:
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -319,18 +373,7 @@ static std::string getBlockName(MachineBasicBlock *BB) {
std::string Result;
raw_string_ostream OS(Result);
OS << "BB#" << BB->getNumber();
- OS << " (derived from LLVM BB '" << BB->getName() << "')";
- OS.flush();
- return Result;
-}
-
-/// \brief Helper to print the number of a MBB.
-///
-/// Only used by debug logging.
-static std::string getBlockNum(MachineBasicBlock *BB) {
- std::string Result;
- raw_string_ostream OS(Result);
- OS << "BB#" << BB->getNumber();
+ OS << " ('" << BB->getName() << "')";
OS.flush();
return Result;
}
@@ -344,7 +387,6 @@ static std::string getBlockNum(MachineBasicBlock *BB) {
/// chain which reach the zero-predecessor state to the worklist passed in.
void MachineBlockPlacement::markChainSuccessors(
BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
- SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
@@ -363,30 +405,26 @@ void MachineBlockPlacement::markChainSuccessors(
// This is a cross-chain edge that is within the loop, so decrement the
// loop predecessor count of the destination chain.
- if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
- BlockWorkList.push_back(*SuccChain.begin());
+ if (SuccChain.UnscheduledPredecessors == 0 ||
+ --SuccChain.UnscheduledPredecessors > 0)
+ continue;
+
+ auto *MBB = *SuccChain.begin();
+ if (MBB->isEHPad())
+ EHPadWorkList.push_back(MBB);
+ else
+ BlockWorkList.push_back(MBB);
}
}
}
-/// \brief Select the best successor for a block.
-///
-/// This looks across all successors of a particular block and attempts to
-/// select the "best" one to be the layout successor. It only considers direct
-/// successors which also pass the block filter. It will attempt to avoid
-/// breaking CFG structure, but cave and break such structures in the case of
-/// very hot successor edges.
-///
-/// \returns The best successor block found, or null if none are viable.
-MachineBasicBlock *
-MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
- BlockChain &Chain,
- const BlockFilterSet *BlockFilter) {
- const BranchProbability HotProb(4, 5); // 80%
-
- MachineBasicBlock *BestSucc = nullptr;
- auto BestProb = BranchProbability::getZero();
-
+/// This helper function collects the set of successors of block
+/// \p BB that are allowed to be its layout successors, and return
+/// the total branch probability of edges from \p BB to those
+/// blocks.
+BranchProbability MachineBlockPlacement::collectViableSuccessors(
+ MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter,
+ SmallVector<MachineBasicBlock *, 4> &Successors) {
// Adjust edge probabilities by excluding edges pointing to blocks that is
// either not in BlockFilter or is already in the current chain. Consider the
// following CFG:
@@ -400,20 +438,17 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
// Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
// A->C is chosen as a fall-through, D won't be selected as a successor of C
// due to CFG constraint (the probability of C->D is not greater than
- // HotProb). If we exclude E that is not in BlockFilter when calculating the
- // probability of C->D, D will be selected and we will get A C D B as the
- // layout of this loop.
+ // HotProb to break top-order). If we exclude E that is not in BlockFilter
+ // when calculating the probability of C->D, D will be selected and we
+ // will get A C D B as the layout of this loop.
auto AdjustedSumProb = BranchProbability::getOne();
- SmallVector<MachineBasicBlock *, 4> Successors;
for (MachineBasicBlock *Succ : BB->successors()) {
bool SkipSucc = false;
- if (BlockFilter && !BlockFilter->count(Succ)) {
+ if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
SkipSucc = true;
} else {
BlockChain *SuccChain = BlockToChain[Succ];
if (SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(Succ)
- << " -> Already merged!\n");
SkipSucc = true;
} else if (Succ != *SuccChain->begin()) {
DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
@@ -426,78 +461,267 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
Successors.push_back(Succ);
}
- DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
- for (MachineBasicBlock *Succ : Successors) {
- BranchProbability SuccProb;
- uint32_t SuccProbN = MBPI->getEdgeProbability(BB, Succ).getNumerator();
- uint32_t SuccProbD = AdjustedSumProb.getNumerator();
- if (SuccProbN >= SuccProbD)
- SuccProb = BranchProbability::getOne();
- else
- SuccProb = BranchProbability(SuccProbN, SuccProbD);
-
- // If we outline optional branches, look whether Succ is unavoidable, i.e.
- // dominates all terminators of the MachineFunction. If it does, other
- // successors must be optional. Don't do this for cold branches.
- if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() &&
- UnavoidableBlocks.count(Succ) > 0) {
- auto HasShortOptionalBranch = [&]() {
- for (MachineBasicBlock *Pred : Succ->predecessors()) {
- // Check whether there is an unplaced optional branch.
- if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain)
- continue;
- // Check whether the optional branch has exactly one BB.
- if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
- continue;
- // Check whether the optional branch is small.
- if (Pred->size() < OutlineOptionalThreshold)
- return true;
- }
+ return AdjustedSumProb;
+}
+
+/// The helper function returns the branch probability that is adjusted
+/// or normalized over the new total \p AdjustedSumProb.
+static BranchProbability
+getAdjustedProbability(BranchProbability OrigProb,
+ BranchProbability AdjustedSumProb) {
+ BranchProbability SuccProb;
+ uint32_t SuccProbN = OrigProb.getNumerator();
+ uint32_t SuccProbD = AdjustedSumProb.getNumerator();
+ if (SuccProbN >= SuccProbD)
+ SuccProb = BranchProbability::getOne();
+ else
+ SuccProb = BranchProbability(SuccProbN, SuccProbD);
+
+ return SuccProb;
+}
+
+/// When the option OutlineOptionalBranches is on, this method
+/// checks if the fallthrough candidate block \p Succ (of block
+/// \p BB) also has other unscheduled predecessor blocks which
+/// are also successors of \p BB (forming triangular shape CFG).
+/// If none of such predecessors are small, it returns true.
+/// The caller can choose to select \p Succ as the layout successors
+/// so that \p Succ's predecessors (optional branches) can be
+/// outlined.
+/// FIXME: fold this with more general layout cost analysis.
+bool MachineBlockPlacement::shouldPredBlockBeOutlined(
+ MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter, BranchProbability SuccProb,
+ BranchProbability HotProb) {
+ if (!OutlineOptionalBranches)
+ return false;
+ // If we outline optional branches, look whether Succ is unavoidable, i.e.
+ // dominates all terminators of the MachineFunction. If it does, other
+ // successors must be optional. Don't do this for cold branches.
+ if (SuccProb > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) {
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ // Check whether there is an unplaced optional branch.
+ if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
+ BlockToChain[Pred] == &Chain)
+ continue;
+ // Check whether the optional branch has exactly one BB.
+ if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
+ continue;
+ // Check whether the optional branch is small.
+ if (Pred->size() < OutlineOptionalThreshold)
return false;
- };
- if (!HasShortOptionalBranch())
- return Succ;
}
+ return true;
+ } else
+ return false;
+}
- // Only consider successors which are either "hot", or wouldn't violate
- // any CFG constraints.
- BlockChain &SuccChain = *BlockToChain[Succ];
- if (SuccChain.LoopPredecessors != 0) {
- if (SuccProb < HotProb) {
- DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
- << " (prob) (CFG conflict)\n");
- continue;
- }
+// When profile is not present, return the StaticLikelyProb.
+// When profile is available, we need to handle the triangle-shape CFG.
+static BranchProbability getLayoutSuccessorProbThreshold(
+ MachineBasicBlock *BB) {
+ if (!BB->getParent()->getFunction()->getEntryCount())
+ return BranchProbability(StaticLikelyProb, 100);
+ if (BB->succ_size() == 2) {
+ const MachineBasicBlock *Succ1 = *BB->succ_begin();
+ const MachineBasicBlock *Succ2 = *(BB->succ_begin() + 1);
+ if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) {
+ /* See case 1 below for the cost analysis. For BB->Succ to
+ * be taken with smaller cost, the following needs to hold:
+ * Prob(BB->Succ) > 2* Prob(BB->Pred)
+ * So the threshold T
+ * T = 2 * (1-Prob(BB->Pred). Since T + Prob(BB->Pred) == 1,
+ * We have T + T/2 = 1, i.e. T = 2/3. Also adding user specified
+ * branch bias, we have
+ * T = (2/3)*(ProfileLikelyProb/50)
+ * = (2*ProfileLikelyProb)/150)
+ */
+ return BranchProbability(2 * ProfileLikelyProb, 150);
+ }
+ }
+ return BranchProbability(ProfileLikelyProb, 100);
+}
- // Make sure that a hot successor doesn't have a globally more
- // important predecessor.
- auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
- BlockFrequency CandidateEdgeFreq =
- MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl();
- bool BadCFGConflict = false;
- for (MachineBasicBlock *Pred : Succ->predecessors()) {
- if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain)
- continue;
- BlockFrequency PredEdgeFreq =
- MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
- if (PredEdgeFreq >= CandidateEdgeFreq) {
- BadCFGConflict = true;
- break;
- }
- }
- if (BadCFGConflict) {
- DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
- << " (prob) (non-cold CFG conflict)\n");
- continue;
- }
+/// Checks to see if the layout candidate block \p Succ has a better layout
+/// predecessor than \c BB. If yes, returns true.
+bool MachineBlockPlacement::hasBetterLayoutPredecessor(
+ MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
+ BranchProbability SuccProb, BranchProbability RealSuccProb,
+ BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+
+ // There isn't a better layout when there are no unscheduled predecessors.
+ if (SuccChain.UnscheduledPredecessors == 0)
+ return false;
+
+ // There are two basic scenarios here:
+ // -------------------------------------
+ // Case 1: triangular shape CFG (if-then):
+ // BB
+ // | \
+ // | \
+ // | Pred
+ // | /
+ // Succ
+ // In this case, we are evaluating whether to select edge -> Succ, e.g.
+ // set Succ as the layout successor of BB. Picking Succ as BB's
+ // successor breaks the CFG constraints (FIXME: define these constraints).
+ // With this layout, Pred BB
+ // is forced to be outlined, so the overall cost will be cost of the
+ // branch taken from BB to Pred, plus the cost of back taken branch
+ // from Pred to Succ, as well as the additional cost associated
+ // with the needed unconditional jump instruction from Pred To Succ.
+
+ // The cost of the topological order layout is the taken branch cost
+ // from BB to Succ, so to make BB->Succ a viable candidate, the following
+ // must hold:
+ // 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost
+ // < freq(BB->Succ) * taken_branch_cost.
+ // Ignoring unconditional jump cost, we get
+ // freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
+ // prob(BB->Succ) > 2 * prob(BB->Pred)
+ //
+ // When real profile data is available, we can precisely compute the
+ // probability threshold that is needed for edge BB->Succ to be considered.
+ // Without profile data, the heuristic requires the branch bias to be
+ // a lot larger to make sure the signal is very strong (e.g. 80% default).
+ // -----------------------------------------------------------------
+ // Case 2: diamond like CFG (if-then-else):
+ // S
+ // / \
+ // | \
+ // BB Pred
+ // \ /
+ // Succ
+ // ..
+ //
+ // The current block is BB and edge BB->Succ is now being evaluated.
+ // Note that edge S->BB was previously already selected because
+ // prob(S->BB) > prob(S->Pred).
+ // At this point, 2 blocks can be placed after BB: Pred or Succ. If we
+ // choose Pred, we will have a topological ordering as shown on the left
+ // in the picture below. If we choose Succ, we have the solution as shown
+ // on the right:
+ //
+ // topo-order:
+ //
+ // S----- ---S
+ // | | | |
+ // ---BB | | BB
+ // | | | |
+ // | pred-- | Succ--
+ // | | | |
+ // ---succ ---pred--
+ //
+ // cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred)
+ // = freq(S->Pred) + freq(S->BB)
+ //
+ // If we have profile data (i.e, branch probabilities can be trusted), the
+ // cost (number of taken branches) with layout S->BB->Succ->Pred is 2 *
+ // freq(S->Pred) while the cost of topo order is freq(S->Pred) + freq(S->BB).
+ // We know Prob(S->BB) > Prob(S->Pred), so freq(S->BB) > freq(S->Pred), which
+ // means the cost of topological order is greater.
+ // When profile data is not available, however, we need to be more
+ // conservative. If the branch prediction is wrong, breaking the topo-order
+ // will actually yield a layout with large cost. For this reason, we need
+ // strong biased branch at block S with Prob(S->BB) in order to select
+ // BB->Succ. This is equivalent to looking the CFG backward with backward
+ // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
+ // profile data).
+
+ BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
+
+ // Forward checking. For case 2, SuccProb will be 1.
+ if (SuccProb < HotProb) {
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
+ << " (prob) (CFG conflict)\n");
+ return true;
+ }
+
+ // Make sure that a hot successor doesn't have a globally more
+ // important predecessor.
+ BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
+ bool BadCFGConflict = false;
+
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
+ (BlockFilter && !BlockFilter->count(Pred)) ||
+ BlockToChain[Pred] == &Chain)
+ continue;
+ // Do backward checking. For case 1, it is actually redundant check. For
+ // case 2 above, we need a backward checking to filter out edges that are
+ // not 'strongly' biased. With profile data available, the check is mostly
+ // redundant too (when threshold prob is set at 50%) unless S has more than
+ // two successors.
+ // BB Pred
+ // \ /
+ // Succ
+ // We select edge BB->Succ if
+ // freq(BB->Succ) > freq(Succ) * HotProb
+ // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
+ // HotProb
+ // i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
+ BlockFrequency PredEdgeFreq =
+ MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
+ if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
+ BadCFGConflict = true;
+ break;
}
+ }
+ if (BadCFGConflict) {
DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
- << " (prob)"
- << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
- << "\n");
+ << " (prob) (non-cold CFG conflict)\n");
+ return true;
+ }
+
+ return false;
+}
+
+/// \brief Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable.
+MachineBasicBlock *
+MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+ const BranchProbability HotProb(StaticLikelyProb, 100);
+
+ MachineBasicBlock *BestSucc = nullptr;
+ auto BestProb = BranchProbability::getZero();
+
+ SmallVector<MachineBasicBlock *, 4> Successors;
+ auto AdjustedSumProb =
+ collectViableSuccessors(BB, Chain, BlockFilter, Successors);
+
+ DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ for (MachineBasicBlock *Succ : Successors) {
+ auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
+ BranchProbability SuccProb =
+ getAdjustedProbability(RealSuccProb, AdjustedSumProb);
+
+ // This heuristic is off by default.
+ if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,
+ HotProb))
+ return Succ;
+
+ BlockChain &SuccChain = *BlockToChain[Succ];
+ // Skip the edge \c BB->Succ if block \c Succ has a better layout
+ // predecessor that yields lower global cost.
+ if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
+ Chain, BlockFilter))
+ continue;
+
+ DEBUG(
+ dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
+ << " (prob)"
+ << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
+ << "\n");
if (BestSucc && BestProb >= SuccProb)
continue;
BestSucc = Succ;
@@ -513,12 +737,11 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
/// profitable only really makes sense in the context of a loop. This returns
/// the most frequently visited block in the worklist, which in the case of
/// a loop, is the one most desirable to be physically close to the rest of the
-/// loop body in order to improve icache behavior.
+/// loop body in order to improve i-cache behavior.
///
/// \returns The best block found, or null if none are viable.
MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
- BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
- const BlockFilterSet *BlockFilter) {
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
// Once we need to walk the worklist looking for a candidate, cleanup the
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
@@ -529,24 +752,51 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
}),
WorkList.end());
+ if (WorkList.empty())
+ return nullptr;
+
+ bool IsEHPad = WorkList[0]->isEHPad();
+
MachineBasicBlock *BestBlock = nullptr;
BlockFrequency BestFreq;
for (MachineBasicBlock *MBB : WorkList) {
+ assert(MBB->isEHPad() == IsEHPad);
+
BlockChain &SuccChain = *BlockToChain[MBB];
- if (&SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n");
+ if (&SuccChain == &Chain)
continue;
- }
- assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+
+ assert(SuccChain.UnscheduledPredecessors == 0 && "Found CFG-violating block");
BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
- if (BestBlock && BestFreq >= CandidateFreq)
+
+ // For ehpad, we layout the least probable first as to avoid jumping back
+ // from least probable landingpads to more probable ones.
+ //
+ // FIXME: Using probability is probably (!) not the best way to achieve
+ // this. We should probably have a more principled approach to layout
+ // cleanup code.
+ //
+ // The goal is to get:
+ //
+ // +--------------------------+
+ // | V
+ // InnerLp -> InnerCleanup OuterLp -> OuterCleanup -> Resume
+ //
+ // Rather than:
+ //
+ // +-------------------------------------+
+ // V |
+ // OuterLp -> OuterCleanup -> Resume InnerLp -> InnerCleanup
+ if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
continue;
+
BestBlock = MBB;
BestFreq = CandidateFreq;
}
+
return BestBlock;
}
@@ -558,10 +808,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
/// LastUnplacedBlockIt. We update this iterator on each call to avoid
/// re-scanning the entire sequence on repeated calls to this routine.
MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
- MachineFunction &F, const BlockChain &PlacedChain,
+ const BlockChain &PlacedChain,
MachineFunction::iterator &PrevUnplacedBlockIt,
const BlockFilterSet *BlockFilter) {
- for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+ for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E;
++I) {
if (BlockFilter && !BlockFilter->count(&*I))
continue;
@@ -576,22 +826,51 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
return nullptr;
}
+void MachineBlockPlacement::fillWorkLists(
+ MachineBasicBlock *MBB,
+ SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
+ const BlockFilterSet *BlockFilter = nullptr) {
+ BlockChain &Chain = *BlockToChain[MBB];
+ if (!UpdatedPreds.insert(&Chain).second)
+ return;
+
+ assert(Chain.UnscheduledPredecessors == 0);
+ for (MachineBasicBlock *ChainBB : Chain) {
+ assert(BlockToChain[ChainBB] == &Chain);
+ for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+ if (BlockFilter && !BlockFilter->count(Pred))
+ continue;
+ if (BlockToChain[Pred] == &Chain)
+ continue;
+ ++Chain.UnscheduledPredecessors;
+ }
+ }
+
+ if (Chain.UnscheduledPredecessors != 0)
+ return;
+
+ MBB = *Chain.begin();
+ if (MBB->isEHPad())
+ EHPadWorkList.push_back(MBB);
+ else
+ BlockWorkList.push_back(MBB);
+}
+
void MachineBlockPlacement::buildChain(
MachineBasicBlock *BB, BlockChain &Chain,
- SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
- assert(BB);
- assert(BlockToChain[BB] == &Chain);
- MachineFunction &F = *BB->getParent();
- MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
+ assert(BB && "BB must not be null.\n");
+ assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
+ MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
MachineBasicBlock *LoopHeaderBB = BB;
- markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
BB = *std::prev(Chain.end());
for (;;) {
- assert(BB);
- assert(BlockToChain[BB] == &Chain);
- assert(*std::prev(Chain.end()) == BB);
+ assert(BB && "null block found at end of chain in loop.");
+ assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
+ assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
+
// Look for the best viable successor if there is one to place immediately
// after this block.
@@ -601,11 +880,12 @@ void MachineBlockPlacement::buildChain(
// block among those we've identified as not violating the loop's CFG at
// this point. This won't be a fallthrough, but it will increase locality.
if (!BestSucc)
- BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
+ BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
+ if (!BestSucc)
+ BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
if (!BestSucc) {
- BestSucc =
- getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter);
+ BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
if (!BestSucc)
break;
@@ -615,18 +895,18 @@ void MachineBlockPlacement::buildChain(
// Place this block, updating the datastructures to reflect its placement.
BlockChain &SuccChain = *BlockToChain[BestSucc];
- // Zero out LoopPredecessors for the successor we're about to merge in case
+ // Zero out UnscheduledPredecessors for the successor we're about to merge in case
// we selected a successor that didn't fit naturally into the CFG.
- SuccChain.LoopPredecessors = 0;
- DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to "
- << getBlockNum(BestSucc) << "\n");
- markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ SuccChain.UnscheduledPredecessors = 0;
+ DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
+ << getBlockName(BestSucc) << "\n");
+ markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
Chain.merge(BestSucc, &SuccChain);
BB = *std::prev(Chain.end());
}
DEBUG(dbgs() << "Finished forming chain for header block "
- << getBlockNum(*Chain.begin()) << "\n");
+ << getBlockName(*Chain.begin()) << "\n");
}
/// \brief Find the best loop top block for layout.
@@ -673,8 +953,10 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
}
// If no direct predecessor is fine, just use the loop header.
- if (!BestPred)
+ if (!BestPred) {
+ DEBUG(dbgs() << " final top unchanged\n");
return L.getHeader();
+ }
// Walk backwards through any straight line of predecessors.
while (BestPred->pred_size() == 1 &&
@@ -692,7 +974,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
/// block to layout at the top of the loop. Typically this is done to maximize
/// fallthrough opportunities.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
+MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
@@ -710,7 +992,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
unsigned BestExitLoopDepth = 0;
MachineBasicBlock *ExitingBB = nullptr;
// If there are exits to outer loops, loop rotation can severely limit
- // fallthrough opportunites unless it selects such an exit. Keep a set of
+ // fallthrough opportunities unless it selects such an exit. Keep a set of
// blocks where rotating to exit with that block will reach an outer loop.
SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
@@ -780,7 +1062,6 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
// Restore the old exiting state, no viable looping successor was found.
ExitingBB = OldExitingBB;
BestExitEdgeFreq = OldBestExitEdgeFreq;
- continue;
}
}
// Without a candidate exiting block or with only a single block in the
@@ -973,7 +1254,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
}
}
- DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockNum(*Iter)
+ DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockName(*Iter)
<< " to the top: " << Cost.getFrequency() << "\n");
if (Cost < SmallestRotationCost) {
@@ -983,7 +1264,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
}
if (RotationPos != LoopChain.end()) {
- DEBUG(dbgs() << "Rotate loop by making " << getBlockNum(*RotationPos)
+ DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos)
<< " to the top\n");
std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
}
@@ -994,7 +1275,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
/// When profile data is available, exclude cold blocks from the returned set;
/// otherwise, collect all blocks in the loop.
MachineBlockPlacement::BlockFilterSet
-MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) {
+MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) {
BlockFilterSet LoopBlockSet;
// Filter cold blocks off from LoopBlockSet when profile data is available.
@@ -1006,7 +1287,7 @@ MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) {
// will be merged into the first outer loop chain for which this block is not
// cold anymore. This needs precise profile data and we only do this when
// profile data is available.
- if (F.getFunction()->getEntryCount()) {
+ if (F->getFunction()->getEntryCount()) {
BlockFrequency LoopFreq(0);
for (auto LoopPred : L.getHeader()->predecessors())
if (!L.contains(LoopPred))
@@ -1031,21 +1312,22 @@ MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) {
/// as much as possible. We can then stitch the chains together in a way which
/// both preserves the topological structure and minimizes taken conditional
/// branches.
-void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
- MachineLoop &L) {
+void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
// First recurse through any nested loops, building chains for those inner
// loops.
for (MachineLoop *InnerLoop : L)
- buildLoopChains(F, *InnerLoop);
+ buildLoopChains(*InnerLoop);
- SmallVector<MachineBasicBlock *, 16> BlockWorkList;
- BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L);
+ assert(BlockWorkList.empty());
+ assert(EHPadWorkList.empty());
+ BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
// Check if we have profile data for this function. If yes, we will rotate
// this loop by modeling costs more precisely which requires the profile data
// for better layout.
bool RotateLoopWithProfile =
- PreciseRotationCost && F.getFunction()->getEntryCount();
+ ForcePreciseRotationCost ||
+ (PreciseRotationCost && F->getFunction()->getEntryCount());
// First check to see if there is an obviously preferable top block for the
// loop. This will default to the header, but may end up as one of the
@@ -1060,7 +1342,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
// branches by placing an exit edge at the bottom.
MachineBasicBlock *ExitingBB = nullptr;
if (!RotateLoopWithProfile && LoopTop == L.getHeader())
- ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
+ ExitingBB = findBestLoopExit(L, LoopBlockSet);
BlockChain &LoopChain = *BlockToChain[LoopTop];
@@ -1068,29 +1350,13 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
// walk the blocks, and use a set to prevent visiting a particular chain
// twice.
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
- assert(LoopChain.LoopPredecessors == 0);
+ assert(LoopChain.UnscheduledPredecessors == 0);
UpdatedPreds.insert(&LoopChain);
- for (MachineBasicBlock *LoopBB : LoopBlockSet) {
- BlockChain &Chain = *BlockToChain[LoopBB];
- if (!UpdatedPreds.insert(&Chain).second)
- continue;
+ for (MachineBasicBlock *LoopBB : LoopBlockSet)
+ fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
- assert(Chain.LoopPredecessors == 0);
- for (MachineBasicBlock *ChainBB : Chain) {
- assert(BlockToChain[ChainBB] == &Chain);
- for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
- if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred))
- continue;
- ++Chain.LoopPredecessors;
- }
- }
-
- if (Chain.LoopPredecessors == 0)
- BlockWorkList.push_back(*Chain.begin());
- }
-
- buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
+ buildChain(LoopTop, LoopChain, &LoopBlockSet);
if (RotateLoopWithProfile)
rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
@@ -1100,7 +1366,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
DEBUG({
// Crash at the end so we get all of the debugging output first.
bool BadLoop = false;
- if (LoopChain.LoopPredecessors) {
+ if (LoopChain.UnscheduledPredecessors) {
BadLoop = true;
dbgs() << "Loop chain contains a block without its preds placed!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
@@ -1129,13 +1395,42 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
}
assert(!BadLoop && "Detected problems with the placement of this loop.");
});
+
+ BlockWorkList.clear();
+ EHPadWorkList.clear();
+}
+
+/// When OutlineOpitonalBranches is on, this method collects BBs that
+/// dominates all terminator blocks of the function \p F.
+void MachineBlockPlacement::collectMustExecuteBBs() {
+ if (OutlineOptionalBranches) {
+ // Find the nearest common dominator of all of F's terminators.
+ MachineBasicBlock *Terminator = nullptr;
+ for (MachineBasicBlock &MBB : *F) {
+ if (MBB.succ_size() == 0) {
+ if (Terminator == nullptr)
+ Terminator = &MBB;
+ else
+ Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
+ }
+ }
+
+ // MBBs dominating this common dominator are unavoidable.
+ UnavoidableBlocks.clear();
+ for (MachineBasicBlock &MBB : *F) {
+ if (MDT->dominates(&MBB, Terminator)) {
+ UnavoidableBlocks.insert(&MBB);
+ }
+ }
+ }
}
-void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
+void MachineBlockPlacement::buildCFGChains() {
// Ensure that every BB in the function has an associated chain to simplify
// the assumptions of the remaining algorithm.
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
- for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE;
+ ++FI) {
MachineBasicBlock *BB = &*FI;
BlockChain *Chain =
new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
@@ -1144,7 +1439,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
for (;;) {
Cond.clear();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
- if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+ if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
break;
MachineFunction::iterator NextFI = std::next(FI);
@@ -1161,55 +1456,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
}
- if (OutlineOptionalBranches) {
- // Find the nearest common dominator of all of F's terminators.
- MachineBasicBlock *Terminator = nullptr;
- for (MachineBasicBlock &MBB : F) {
- if (MBB.succ_size() == 0) {
- if (Terminator == nullptr)
- Terminator = &MBB;
- else
- Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
- }
- }
-
- // MBBs dominating this common dominator are unavoidable.
- UnavoidableBlocks.clear();
- for (MachineBasicBlock &MBB : F) {
- if (MDT->dominates(&MBB, Terminator)) {
- UnavoidableBlocks.insert(&MBB);
- }
- }
- }
+ // Turned on with OutlineOptionalBranches option
+ collectMustExecuteBBs();
// Build any loop-based chains.
for (MachineLoop *L : *MLI)
- buildLoopChains(F, *L);
+ buildLoopChains(*L);
- SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+ assert(BlockWorkList.empty());
+ assert(EHPadWorkList.empty());
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
- for (MachineBasicBlock &MBB : F) {
- BlockChain &Chain = *BlockToChain[&MBB];
- if (!UpdatedPreds.insert(&Chain).second)
- continue;
-
- assert(Chain.LoopPredecessors == 0);
- for (MachineBasicBlock *ChainBB : Chain) {
- assert(BlockToChain[ChainBB] == &Chain);
- for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
- if (BlockToChain[Pred] == &Chain)
- continue;
- ++Chain.LoopPredecessors;
- }
- }
-
- if (Chain.LoopPredecessors == 0)
- BlockWorkList.push_back(*Chain.begin());
- }
+ for (MachineBasicBlock &MBB : *F)
+ fillWorkLists(&MBB, UpdatedPreds);
- BlockChain &FunctionChain = *BlockToChain[&F.front()];
- buildChain(&F.front(), FunctionChain, BlockWorkList);
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
+ buildChain(&F->front(), FunctionChain);
#ifndef NDEBUG
typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
@@ -1218,7 +1480,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Crash at the end so we get all of the debugging output first.
bool BadFunc = false;
FunctionBlockSetType FunctionBlockSet;
- for (MachineBasicBlock &MBB : F)
+ for (MachineBasicBlock &MBB : *F)
FunctionBlockSet.insert(&MBB);
for (MachineBasicBlock *ChainBB : FunctionChain)
@@ -1238,13 +1500,14 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
});
// Splice the blocks into place.
- MachineFunction::iterator InsertPos = F.begin();
+ MachineFunction::iterator InsertPos = F->begin();
+ DEBUG(dbgs() << "[MBP] Function: "<< F->getName() << "\n");
for (MachineBasicBlock *ChainBB : FunctionChain) {
DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
: " ... ")
<< getBlockName(ChainBB) << "\n");
if (InsertPos != MachineFunction::iterator(ChainBB))
- F.splice(InsertPos, ChainBB);
+ F->splice(InsertPos, ChainBB);
else
++InsertPos;
@@ -1258,69 +1521,90 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// boiler plate.
Cond.clear();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
- if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
- // The "PrevBB" is not yet updated to reflect current code layout, so,
- // o. it may fall-through to a block without explict "goto" instruction
- // before layout, and no longer fall-through it after layout; or
- // o. just opposite.
- //
- // AnalyzeBranch() may return erroneous value for FBB when these two
- // situations take place. For the first scenario FBB is mistakenly set
- // NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
- // is mistakenly pointing to "*BI".
- //
- bool needUpdateBr = true;
- if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
- PrevBB->updateTerminator();
- needUpdateBr = false;
- Cond.clear();
- TBB = FBB = nullptr;
- if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
- // FIXME: This should never take place.
- TBB = FBB = nullptr;
- }
- }
+ // The "PrevBB" is not yet updated to reflect current code layout, so,
+ // o. it may fall-through to a block without explicit "goto" instruction
+ // before layout, and no longer fall-through it after layout; or
+ // o. just opposite.
+ //
+ // analyzeBranch() may return erroneous value for FBB when these two
+ // situations take place. For the first scenario FBB is mistakenly set NULL;
+ // for the 2nd scenario, the FBB, which is expected to be NULL, is
+ // mistakenly pointing to "*BI".
+ // Thus, if the future change needs to use FBB before the layout is set, it
+ // has to correct FBB first by using the code similar to the following:
+ //
+ // if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
+ // PrevBB->updateTerminator();
+ // Cond.clear();
+ // TBB = FBB = nullptr;
+ // if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // // FIXME: This should never take place.
+ // TBB = FBB = nullptr;
+ // }
+ // }
+ if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond))
+ PrevBB->updateTerminator();
+ }
+
+ // Fixup the last block.
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+ if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond))
+ F->back().updateTerminator();
+
+ BlockWorkList.clear();
+ EHPadWorkList.clear();
+}
+
+void MachineBlockPlacement::optimizeBranches() {
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
+ SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+
+ // Now that all the basic blocks in the chain have the proper layout,
+ // make a final call to AnalyzeBranch with AllowModify set.
+ // Indeed, the target may be able to optimize the branches in a way we
+ // cannot because all branches may not be analyzable.
+ // E.g., the target may be able to remove an unconditional branch to
+ // a fallthrough when it occurs after predicated terminators.
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+ if (!TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true)) {
// If PrevBB has a two-way branch, try to re-order the branches
// such that we branch to the successor with higher probability first.
if (TBB && !Cond.empty() && FBB &&
- MBPI->getEdgeProbability(PrevBB, FBB) >
- MBPI->getEdgeProbability(PrevBB, TBB) &&
+ MBPI->getEdgeProbability(ChainBB, FBB) >
+ MBPI->getEdgeProbability(ChainBB, TBB) &&
!TII->ReverseBranchCondition(Cond)) {
DEBUG(dbgs() << "Reverse order of the two branches: "
- << getBlockName(PrevBB) << "\n");
+ << getBlockName(ChainBB) << "\n");
DEBUG(dbgs() << " Edge probability: "
- << MBPI->getEdgeProbability(PrevBB, FBB) << " vs "
- << MBPI->getEdgeProbability(PrevBB, TBB) << "\n");
+ << MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
+ << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
DebugLoc dl; // FIXME: this is nowhere
- TII->RemoveBranch(*PrevBB);
- TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
- needUpdateBr = true;
+ TII->RemoveBranch(*ChainBB);
+ TII->InsertBranch(*ChainBB, FBB, TBB, Cond, dl);
+ ChainBB->updateTerminator();
}
- if (needUpdateBr)
- PrevBB->updateTerminator();
}
}
+}
- // Fixup the last block.
- Cond.clear();
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
- if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
- F.back().updateTerminator();
-
+void MachineBlockPlacement::alignBlocks() {
// Walk through the backedges of the function now that we have fully laid out
// the basic blocks and align the destination of each backedge. We don't rely
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- // FIXME: Use Function::optForSize().
- if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
+ if (F->getFunction()->optForSize())
return;
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
if (FunctionChain.begin() == FunctionChain.end())
return; // Empty chain.
const BranchProbability ColdProb(1, 5); // 20%
- BlockFrequency EntryFreq = MBFI->getBlockFreq(&F.front());
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front());
BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
for (MachineBasicBlock *ChainBB : FunctionChain) {
if (ChainBB == *FunctionChain.begin())
@@ -1334,11 +1618,6 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (!L)
continue;
- if (AlignAllLoops) {
- ChainBB->setAlignment(AlignAllLoops);
- continue;
- }
-
unsigned Align = TLI->getPrefLoopAlignment(L);
if (!Align)
continue; // Don't care about loop alignment.
@@ -1380,31 +1659,67 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
}
-bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
- // Check for single-block functions and skip them.
- if (std::next(F.begin()) == F.end())
+bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
return false;
- if (skipOptnoneFunction(*F.getFunction()))
+ // Check for single-block functions and skip them.
+ if (std::next(MF.begin()) == MF.end())
return false;
+ F = &MF;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
+ getAnalysis<MachineBlockFrequencyInfo>());
MLI = &getAnalysis<MachineLoopInfo>();
- TII = F.getSubtarget().getInstrInfo();
- TLI = F.getSubtarget().getTargetLowering();
+ TII = MF.getSubtarget().getInstrInfo();
+ TLI = MF.getSubtarget().getTargetLowering();
MDT = &getAnalysis<MachineDominatorTree>();
assert(BlockToChain.empty());
- buildCFGChains(F);
+ buildCFGChains();
+
+ // Changing the layout can create new tail merging opportunities.
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ // TailMerge can create jump into if branches that make CFG irreducible for
+ // HW that requires structured CFG.
+ bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
+ PassConfig->getEnableTailMerge() &&
+ BranchFoldPlacement;
+ // No tail merging opportunities if the block number is less than four.
+ if (MF.size() > 3 && EnableTailMerge) {
+ BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
+ *MBPI);
+
+ if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
+ /*AfterBlockPlacement=*/true)) {
+ // Redo the layout if tail merging creates/removes/moves blocks.
+ BlockToChain.clear();
+ ChainAllocator.DestroyAll();
+ buildCFGChains();
+ }
+ }
+
+ optimizeBranches();
+ alignBlocks();
BlockToChain.clear();
ChainAllocator.DestroyAll();
if (AlignAllBlock)
// Align all of the blocks in the function to a specific alignment.
- for (MachineBasicBlock &MBB : F)
+ for (MachineBasicBlock &MBB : MF)
MBB.setAlignment(AlignAllBlock);
+ else if (AlignAllNonFallThruBlocks) {
+ // Align all of the blocks that have no fall-through predecessors to a
+ // specific alignment.
+ for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
+ auto LayoutPred = std::prev(MBI);
+ if (!LayoutPred->isSuccessor(&*MBI))
+ MBI->setAlignment(AlignAllNonFallThruBlocks);
+ }
+ }
// We always return true as we have no way to track whether the final order
// differs from the original order.
OpenPOWER on IntegriCloud