diff options
Diffstat (limited to 'contrib/llvm/lib/Target/WebAssembly/Relooper.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/WebAssembly/Relooper.cpp | 984 |
1 files changed, 984 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp b/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp new file mode 100644 index 0000000..9b718ef --- /dev/null +++ b/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp @@ -0,0 +1,984 @@ +//===-- Relooper.cpp - Top-level interface for WebAssembly ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// \brief This implements the Relooper algorithm. This implementation includes +/// optimizations added since the original academic paper [1] was published. +/// +/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In +/// Proceedings of the ACM international conference companion on Object +/// oriented programming systems languages and applications companion +/// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 +/// http://doi.acm.org/10.1145/2048147.2048224 +/// +//===-------------------------------------------------------------------===// + +#include "Relooper.h" +#include "WebAssembly.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <cstring> +#include <cstdlib> +#include <functional> +#include <list> +#include <stack> +#include <string> + +#define DEBUG_TYPE "relooper" + +using namespace llvm; +using namespace Relooper; + +static cl::opt<int> RelooperSplittingFactor( + "relooper-splitting-factor", + cl::desc( + "How much to discount code size when deciding whether to split a node"), + cl::init(5)); + +static cl::opt<unsigned> RelooperMultipleSwitchThreshold( + "relooper-multiple-switch-threshold", + cl::desc( + "How many entries to allow in a multiple before we use a switch"), + cl::init(10)); + +static cl::opt<unsigned> RelooperNestingLimit( + "relooper-nesting-limit", + cl::desc( + "How much nesting is acceptable"), + cl::init(20)); + + +namespace { +/// +/// Implements the relooper algorithm for a function's blocks. +/// +/// Implementation details: The Relooper instance has +/// ownership of the blocks and shapes, and frees them when done. +/// +struct RelooperAlgorithm { + std::deque<Block *> Blocks; + std::deque<Shape *> Shapes; + Shape *Root; + bool MinSize; + int BlockIdCounter; + int ShapeIdCounter; + + RelooperAlgorithm(); + ~RelooperAlgorithm(); + + void AddBlock(Block *New, int Id = -1); + + // Calculates the shapes + void Calculate(Block *Entry); + + // Sets us to try to minimize size + void SetMinSize(bool MinSize_) { MinSize = MinSize_; } +}; + +struct RelooperAnalysis final : public FunctionPass { + static char ID; + RelooperAnalysis() : FunctionPass(ID) {} + const char *getPassName() const override { return "relooper"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + bool runOnFunction(Function &F) override; +}; +} + +// RelooperAnalysis + +char RelooperAnalysis::ID = 0; +FunctionPass *llvm::createWebAssemblyRelooper() { + return new RelooperAnalysis(); +} + +bool RelooperAnalysis::runOnFunction(Function &F) { + DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n"); + RelooperAlgorithm R; + // FIXME: remove duplication between relooper's and LLVM's BBs. + std::map<const BasicBlock *, Block *> BB2B; + std::map<const Block *, const BasicBlock *> B2BB; + for (const BasicBlock &BB : F) { + // FIXME: getName is wrong here, Code is meant to represent amount of code. + // FIXME: use BranchVarInit for switch. + Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr); + R.AddBlock(B); + assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice"); + assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice"); + BB2B[&BB] = B; + B2BB[B] = &BB; + } + for (Block *B : R.Blocks) { + const BasicBlock *BB = B2BB[B]; + for (const BasicBlock *Successor : successors(BB)) + // FIXME: add branch's Condition and Code below. + B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr); + } + R.Calculate(BB2B[&F.getEntryBlock()]); + return false; // Analysis passes don't modify anything. +} + +// Helpers + +typedef MapVector<Block *, BlockSet> BlockBlockSetMap; +typedef std::list<Block *> BlockList; + +template <class T, class U> +static bool contains(const T &container, const U &contained) { + return container.count(contained); +} + + +// Branch + +Branch::Branch(const char *ConditionInit, const char *CodeInit) + : Ancestor(nullptr), Labeled(true) { + // FIXME: move from char* to LLVM data structures + Condition = ConditionInit ? strdup(ConditionInit) : nullptr; + Code = CodeInit ? strdup(CodeInit) : nullptr; +} + +Branch::~Branch() { + // FIXME: move from char* to LLVM data structures + free(static_cast<void *>(const_cast<char *>(Condition))); + free(static_cast<void *>(const_cast<char *>(Code))); +} + +// Block + +Block::Block(const char *CodeInit, const char *BranchVarInit) + : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) { + // FIXME: move from char* to LLVM data structures + Code = strdup(CodeInit); + BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr; +} + +Block::~Block() { + // FIXME: move from char* to LLVM data structures + free(static_cast<void *>(const_cast<char *>(Code))); + free(static_cast<void *>(const_cast<char *>(BranchVar))); +} + +void Block::AddBranchTo(Block *Target, const char *Condition, + const char *Code) { + assert(!contains(BranchesOut, Target) && + "cannot add more than one branch to the same target"); + BranchesOut[Target] = make_unique<Branch>(Condition, Code); +} + +// Relooper + +RelooperAlgorithm::RelooperAlgorithm() + : Root(nullptr), MinSize(false), BlockIdCounter(1), + ShapeIdCounter(0) { // block ID 0 is reserved for clearings +} + +RelooperAlgorithm::~RelooperAlgorithm() { + for (auto Curr : Blocks) + delete Curr; + for (auto Curr : Shapes) + delete Curr; +} + +void RelooperAlgorithm::AddBlock(Block *New, int Id) { + New->Id = Id == -1 ? BlockIdCounter++ : Id; + Blocks.push_back(New); +} + +struct RelooperRecursor { + RelooperAlgorithm *Parent; + RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} +}; + +void RelooperAlgorithm::Calculate(Block *Entry) { + // Scan and optimize the input + struct PreOptimizer : public RelooperRecursor { + PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} + BlockSet Live; + + void FindLive(Block *Root) { + BlockList ToInvestigate; + ToInvestigate.push_back(Root); + while (!ToInvestigate.empty()) { + Block *Curr = ToInvestigate.front(); + ToInvestigate.pop_front(); + if (contains(Live, Curr)) + continue; + Live.insert(Curr); + for (const auto &iter : Curr->BranchesOut) + ToInvestigate.push_back(iter.first); + } + } + + // If a block has multiple entries but no exits, and it is small enough, it + // is useful to split it. A common example is a C++ function where + // everything ends up at a final exit block and does some RAII cleanup. + // Without splitting, we will be forced to introduce labelled loops to + // allow reaching the final block + void SplitDeadEnds() { + unsigned TotalCodeSize = 0; + for (const auto &Curr : Live) { + TotalCodeSize += strlen(Curr->Code); + } + BlockSet Splits; + BlockSet Removed; + for (const auto &Original : Live) { + if (Original->BranchesIn.size() <= 1 || + !Original->BranchesOut.empty()) + continue; // only dead ends, for now + if (contains(Original->BranchesOut, Original)) + continue; // cannot split a looping node + if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) > + TotalCodeSize / RelooperSplittingFactor) + continue; // if splitting increases raw code size by a significant + // amount, abort + // Split the node (for simplicity, we replace all the blocks, even + // though we could have reused the original) + DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n"); + for (const auto &Prior : Original->BranchesIn) { + Block *Split = new Block(Original->Code, Original->BranchVar); + Parent->AddBlock(Split, Original->Id); + Split->BranchesIn.insert(Prior); + std::unique_ptr<Branch> Details; + Details.swap(Prior->BranchesOut[Original]); + Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition, + Details->Code); + for (const auto &iter : Original->BranchesOut) { + Block *Post = iter.first; + Branch *Details = iter.second.get(); + Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition, + Details->Code); + Post->BranchesIn.insert(Split); + } + Splits.insert(Split); + Removed.insert(Original); + } + for (const auto &iter : Original->BranchesOut) { + Block *Post = iter.first; + Post->BranchesIn.remove(Original); + } + } + for (const auto &iter : Splits) + Live.insert(iter); + for (const auto &iter : Removed) + Live.remove(iter); + } + }; + PreOptimizer Pre(this); + Pre.FindLive(Entry); + + // Add incoming branches from live blocks, ignoring dead code + for (unsigned i = 0; i < Blocks.size(); i++) { + Block *Curr = Blocks[i]; + if (!contains(Pre.Live, Curr)) + continue; + for (const auto &iter : Curr->BranchesOut) + iter.first->BranchesIn.insert(Curr); + } + + if (!MinSize) + Pre.SplitDeadEnds(); + + // Recursively process the graph + + struct Analyzer : public RelooperRecursor { + Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} + + // Add a shape to the list of shapes in this Relooper calculation + void Notice(Shape *New) { + New->Id = Parent->ShapeIdCounter++; + Parent->Shapes.push_back(New); + } + + // Create a list of entries from a block. If LimitTo is provided, only + // results in that set will appear + void GetBlocksOut(Block *Source, BlockSet &Entries, + BlockSet *LimitTo = nullptr) { + for (const auto &iter : Source->BranchesOut) + if (!LimitTo || contains(*LimitTo, iter.first)) + Entries.insert(iter.first); + } + + // Converts/processes all branchings to a specific target + void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor, + BlockSet &From) { + DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type + << "\n"); + for (auto iter = Target->BranchesIn.begin(); + iter != Target->BranchesIn.end();) { + Block *Prior = *iter; + if (!contains(From, Prior)) { + iter++; + continue; + } + std::unique_ptr<Branch> PriorOut; + PriorOut.swap(Prior->BranchesOut[Target]); + PriorOut->Ancestor = Ancestor; + PriorOut->Type = Type; + if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor)) + Multiple->Breaks++; // We are breaking out of this Multiple, so need a + // loop + iter++; // carefully increment iter before erasing + Target->BranchesIn.remove(Prior); + Target->ProcessedBranchesIn.insert(Prior); + Prior->ProcessedBranchesOut[Target].swap(PriorOut); + } + } + + Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) { + DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n"); + SimpleShape *Simple = new SimpleShape; + Notice(Simple); + Simple->Inner = Inner; + Inner->Parent = Simple; + if (Blocks.size() > 1) { + Blocks.remove(Inner); + GetBlocksOut(Inner, NextEntries, &Blocks); + BlockSet JustInner; + JustInner.insert(Inner); + for (const auto &iter : NextEntries) + Solipsize(iter, Branch::Direct, Simple, JustInner); + } + return Simple; + } + + Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries, + BlockSet &NextEntries) { + // Find the inner blocks in this loop. Proceed backwards from the entries + // until + // you reach a seen block, collecting as you go. + BlockSet InnerBlocks; + BlockSet Queue = Entries; + while (!Queue.empty()) { + Block *Curr = *(Queue.begin()); + Queue.remove(*Queue.begin()); + if (!contains(InnerBlocks, Curr)) { + // This element is new, mark it as inner and remove from outer + InnerBlocks.insert(Curr); + Blocks.remove(Curr); + // Add the elements prior to it + for (const auto &iter : Curr->BranchesIn) + Queue.insert(iter); + } + } + assert(!InnerBlocks.empty()); + + for (const auto &Curr : InnerBlocks) { + for (const auto &iter : Curr->BranchesOut) { + Block *Possible = iter.first; + if (!contains(InnerBlocks, Possible)) + NextEntries.insert(Possible); + } + } + + LoopShape *Loop = new LoopShape(); + Notice(Loop); + + // Solipsize the loop, replacing with break/continue and marking branches + // as Processed (will not affect later calculations) + // A. Branches to the loop entries become a continue to this shape + for (const auto &iter : Entries) + Solipsize(iter, Branch::Continue, Loop, InnerBlocks); + // B. Branches to outside the loop (a next entry) become breaks on this + // shape + for (const auto &iter : NextEntries) + Solipsize(iter, Branch::Break, Loop, InnerBlocks); + // Finish up + Shape *Inner = Process(InnerBlocks, Entries, nullptr); + Loop->Inner = Inner; + return Loop; + } + + // For each entry, find the independent group reachable by it. The + // independent group is the entry itself, plus all the blocks it can + // reach that cannot be directly reached by another entry. Note that we + // ignore directly reaching the entry itself by another entry. + // @param Ignore - previous blocks that are irrelevant + void FindIndependentGroups(BlockSet &Entries, + BlockBlockSetMap &IndependentGroups, + BlockSet *Ignore = nullptr) { + typedef std::map<Block *, Block *> BlockBlockMap; + + struct HelperClass { + BlockBlockSetMap &IndependentGroups; + BlockBlockMap Ownership; // For each block, which entry it belongs to. + // We have reached it from there. + + HelperClass(BlockBlockSetMap &IndependentGroupsInit) + : IndependentGroups(IndependentGroupsInit) {} + void InvalidateWithChildren(Block *New) { + // Being in the list means you need to be invalidated + BlockList ToInvalidate; + ToInvalidate.push_back(New); + while (!ToInvalidate.empty()) { + Block *Invalidatee = ToInvalidate.front(); + ToInvalidate.pop_front(); + Block *Owner = Ownership[Invalidatee]; + // Owner may have been invalidated, do not add to + // IndependentGroups! + if (contains(IndependentGroups, Owner)) + IndependentGroups[Owner].remove(Invalidatee); + if (Ownership[Invalidatee]) { // may have been seen before and + // invalidated already + Ownership[Invalidatee] = nullptr; + for (const auto &iter : Invalidatee->BranchesOut) { + Block *Target = iter.first; + BlockBlockMap::iterator Known = Ownership.find(Target); + if (Known != Ownership.end()) { + Block *TargetOwner = Known->second; + if (TargetOwner) + ToInvalidate.push_back(Target); + } + } + } + } + } + }; + HelperClass Helper(IndependentGroups); + + // We flow out from each of the entries, simultaneously. + // When we reach a new block, we add it as belonging to the one we got to + // it from. + // If we reach a new block that is already marked as belonging to someone, + // it is reachable by two entries and is not valid for any of them. + // Remove it and all it can reach that have been visited. + + // Being in the queue means we just added this item, and + // we need to add its children + BlockList Queue; + for (const auto &Entry : Entries) { + Helper.Ownership[Entry] = Entry; + IndependentGroups[Entry].insert(Entry); + Queue.push_back(Entry); + } + while (!Queue.empty()) { + Block *Curr = Queue.front(); + Queue.pop_front(); + Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership + // map if we are in the queue + if (!Owner) + continue; // we have been invalidated meanwhile after being reached + // from two entries + // Add all children + for (const auto &iter : Curr->BranchesOut) { + Block *New = iter.first; + BlockBlockMap::iterator Known = Helper.Ownership.find(New); + if (Known == Helper.Ownership.end()) { + // New node. Add it, and put it in the queue + Helper.Ownership[New] = Owner; + IndependentGroups[Owner].insert(New); + Queue.push_back(New); + continue; + } + Block *NewOwner = Known->second; + if (!NewOwner) + continue; // We reached an invalidated node + if (NewOwner != Owner) + // Invalidate this and all reachable that we have seen - we reached + // this from two locations + Helper.InvalidateWithChildren(New); + // otherwise, we have the same owner, so do nothing + } + } + + // Having processed all the interesting blocks, we remain with just one + // potential issue: + // If a->b, and a was invalidated, but then b was later reached by + // someone else, we must invalidate b. To check for this, we go over all + // elements in the independent groups, if an element has a parent which + // does *not* have the same owner, we/ must remove it and all its + // children. + + for (const auto &iter : Entries) { + BlockSet &CurrGroup = IndependentGroups[iter]; + BlockList ToInvalidate; + for (const auto &iter : CurrGroup) { + Block *Child = iter; + for (const auto &iter : Child->BranchesIn) { + Block *Parent = iter; + if (Ignore && contains(*Ignore, Parent)) + continue; + if (Helper.Ownership[Parent] != Helper.Ownership[Child]) + ToInvalidate.push_back(Child); + } + } + while (!ToInvalidate.empty()) { + Block *Invalidatee = ToInvalidate.front(); + ToInvalidate.pop_front(); + Helper.InvalidateWithChildren(Invalidatee); + } + } + + // Remove empty groups + for (const auto &iter : Entries) + if (IndependentGroups[iter].empty()) + IndependentGroups.erase(iter); + } + + Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries, + BlockBlockSetMap &IndependentGroups, Shape *Prev, + BlockSet &NextEntries) { + bool Fused = isa<SimpleShape>(Prev); + MultipleShape *Multiple = new MultipleShape(); + Notice(Multiple); + BlockSet CurrEntries; + for (auto &iter : IndependentGroups) { + Block *CurrEntry = iter.first; + BlockSet &CurrBlocks = iter.second; + // Create inner block + CurrEntries.clear(); + CurrEntries.insert(CurrEntry); + for (const auto &CurrInner : CurrBlocks) { + // Remove the block from the remaining blocks + Blocks.remove(CurrInner); + // Find new next entries and fix branches to them + for (auto iter = CurrInner->BranchesOut.begin(); + iter != CurrInner->BranchesOut.end();) { + Block *CurrTarget = iter->first; + auto Next = iter; + Next++; + if (!contains(CurrBlocks, CurrTarget)) { + NextEntries.insert(CurrTarget); + Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); + } + iter = Next; // increment carefully because Solipsize can remove us + } + } + Multiple->InnerMap[CurrEntry->Id] = + Process(CurrBlocks, CurrEntries, nullptr); + // If we are not fused, then our entries will actually be checked + if (!Fused) + CurrEntry->IsCheckedMultipleEntry = true; + } + // Add entries not handled as next entries, they are deferred + for (const auto &Entry : Entries) + if (!contains(IndependentGroups, Entry)) + NextEntries.insert(Entry); + // The multiple has been created, we can decide how to implement it + if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) { + Multiple->UseSwitch = true; + Multiple->Breaks++; // switch captures breaks + } + return Multiple; + } + + // Main function. + // Process a set of blocks with specified entries, returns a shape + // The Make* functions receive a NextEntries. If they fill it with data, + // those are the entries for the ->Next block on them, and the blocks + // are what remains in Blocks (which Make* modify). In this way + // we avoid recursing on Next (imagine a long chain of Simples, if we + // recursed we could blow the stack). + Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) { + BlockSet *Entries = &InitialEntries; + BlockSet TempEntries[2]; + int CurrTempIndex = 0; + BlockSet *NextEntries; + Shape *Ret = nullptr; + + auto Make = [&](Shape *Temp) { + if (Prev) + Prev->Next = Temp; + if (!Ret) + Ret = Temp; + Prev = Temp; + Entries = NextEntries; + }; + + while (1) { + CurrTempIndex = 1 - CurrTempIndex; + NextEntries = &TempEntries[CurrTempIndex]; + NextEntries->clear(); + + if (Entries->empty()) + return Ret; + if (Entries->size() == 1) { + Block *Curr = *(Entries->begin()); + if (Curr->BranchesIn.empty()) { + // One entry, no looping ==> Simple + Make(MakeSimple(Blocks, Curr, *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + // One entry, looping ==> Loop + Make(MakeLoop(Blocks, *Entries, *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + + // More than one entry, try to eliminate through a Multiple groups of + // independent blocks from an entry/ies. It is important to remove + // through multiples as opposed to looping since the former is more + // performant. + BlockBlockSetMap IndependentGroups; + FindIndependentGroups(*Entries, IndependentGroups); + + if (!IndependentGroups.empty()) { + // We can handle a group in a multiple if its entry cannot be reached + // by another group. + // Note that it might be reachable by itself - a loop. But that is + // fine, we will create a loop inside the multiple block (which + // is the performant order to do it). + for (auto iter = IndependentGroups.begin(); + iter != IndependentGroups.end();) { + Block *Entry = iter->first; + BlockSet &Group = iter->second; + auto curr = iter++; // iterate carefully, we may delete + for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin(); + iterBranch != Entry->BranchesIn.end(); iterBranch++) { + Block *Origin = *iterBranch; + if (!contains(Group, Origin)) { + // Reached from outside the group, so we cannot handle this + IndependentGroups.erase(curr); + break; + } + } + } + + // As an optimization, if we have 2 independent groups, and one is a + // small dead end, we can handle only that dead end. + // The other then becomes a Next - without nesting in the code and + // recursion in the analysis. + // TODO: if the larger is the only dead end, handle that too + // TODO: handle >2 groups + // TODO: handle not just dead ends, but also that do not branch to the + // NextEntries. However, must be careful there since we create a + // Next, and that Next can prevent eliminating a break (since we no + // longer naturally reach the same place), which may necessitate a + // one-time loop, which makes the unnesting pointless. + if (IndependentGroups.size() == 2) { + // Find the smaller one + auto iter = IndependentGroups.begin(); + Block *SmallEntry = iter->first; + auto SmallSize = iter->second.size(); + iter++; + Block *LargeEntry = iter->first; + auto LargeSize = iter->second.size(); + if (SmallSize != LargeSize) { // ignore the case where they are + // identical - keep things symmetrical + // there + if (SmallSize > LargeSize) { + Block *Temp = SmallEntry; + SmallEntry = LargeEntry; + LargeEntry = Temp; // Note: we did not flip the Sizes too, they + // are now invalid. TODO: use the smaller + // size as a limit? + } + // Check if dead end + bool DeadEnd = true; + BlockSet &SmallGroup = IndependentGroups[SmallEntry]; + for (const auto &Curr : SmallGroup) { + for (const auto &iter : Curr->BranchesOut) { + Block *Target = iter.first; + if (!contains(SmallGroup, Target)) { + DeadEnd = false; + break; + } + } + if (!DeadEnd) + break; + } + if (DeadEnd) + IndependentGroups.erase(LargeEntry); + } + } + + if (!IndependentGroups.empty()) + // Some groups removable ==> Multiple + Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev, + *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + // No independent groups, must be loopable ==> Loop + Make(MakeLoop(Blocks, *Entries, *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + } + }; + + // Main + + BlockSet AllBlocks; + for (const auto &Curr : Pre.Live) { + AllBlocks.insert(Curr); + } + + BlockSet Entries; + Entries.insert(Entry); + Root = Analyzer(this).Process(AllBlocks, Entries, nullptr); + assert(Root); + + /// + /// Relooper post-optimizer + /// + struct PostOptimizer { + RelooperAlgorithm *Parent; + std::stack<Shape *> LoopStack; + + PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} + + void ShapeSwitch(Shape* var, + std::function<void (SimpleShape*)> simple, + std::function<void (MultipleShape*)> multiple, + std::function<void (LoopShape*)> loop) { + switch (var->getKind()) { + case Shape::SK_Simple: { + simple(cast<SimpleShape>(var)); + break; + } + case Shape::SK_Multiple: { + multiple(cast<MultipleShape>(var)); + break; + } + case Shape::SK_Loop: { + loop(cast<LoopShape>(var)); + break; + } + } + } + + // Find the blocks that natural control flow can get us directly to, or + // through a multiple that we ignore + void FollowNaturalFlow(Shape *S, BlockSet &Out) { + ShapeSwitch(S, [&](SimpleShape* Simple) { + Out.insert(Simple->Inner); + }, [&](MultipleShape* Multiple) { + for (const auto &iter : Multiple->InnerMap) { + FollowNaturalFlow(iter.second, Out); + } + FollowNaturalFlow(Multiple->Next, Out); + }, [&](LoopShape* Loop) { + FollowNaturalFlow(Loop->Inner, Out); + }); + } + + void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) { + if (Root->Next) { + Root->Natural = Root->Next; + FindNaturals(Root->Next, Otherwise); + } else { + Root->Natural = Otherwise; + } + + ShapeSwitch(Root, [](SimpleShape* Simple) { + }, [&](MultipleShape* Multiple) { + for (const auto &iter : Multiple->InnerMap) { + FindNaturals(iter.second, Root->Natural); + } + }, [&](LoopShape* Loop){ + FindNaturals(Loop->Inner, Loop->Inner); + }); + } + + // Remove unneeded breaks and continues. + // A flow operation is trivially unneeded if the shape we naturally get to + // by normal code execution is the same as the flow forces us to. + void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr, + LoopShape *LastLoop = nullptr, + unsigned Depth = 0) { + BlockSet NaturalBlocks; + FollowNaturalFlow(Natural, NaturalBlocks); + Shape *Next = Root; + while (Next) { + Root = Next; + Next = nullptr; + ShapeSwitch( + Root, + [&](SimpleShape* Simple) { + if (Simple->Inner->BranchVar) + LastLoop = + nullptr; // a switch clears out the loop (TODO: only for + // breaks, not continue) + + if (Simple->Next) { + if (!Simple->Inner->BranchVar && + Simple->Inner->ProcessedBranchesOut.size() == 2 && + Depth < RelooperNestingLimit) { + // If there is a next block, we already know at Simple + // creation time to make direct branches, and we can do + // nothing more in general. But, we try to optimize the + // case of a break and a direct: This would normally be + // if (break?) { break; } .. + // but if we make sure to nest the else, we can save the + // break, + // if (!break?) { .. } + // This is also better because the more canonical nested + // form is easier to further optimize later. The + // downside is more nesting, which adds to size in builds with + // whitespace. + // Note that we avoid switches, as it complicates control flow + // and is not relevant for the common case we optimize here. + bool Found = false; + bool Abort = false; + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Block *Target = iter.first; + Branch *Details = iter.second.get(); + if (Details->Type == Branch::Break) { + Found = true; + if (!contains(NaturalBlocks, Target)) + Abort = true; + } else if (Details->Type != Branch::Direct) + Abort = true; + } + if (Found && !Abort) { + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Branch *Details = iter.second.get(); + if (Details->Type == Branch::Break) { + Details->Type = Branch::Direct; + if (MultipleShape *Multiple = + dyn_cast<MultipleShape>(Details->Ancestor)) + Multiple->Breaks--; + } else { + assert(Details->Type == Branch::Direct); + Details->Type = Branch::Nested; + } + } + } + Depth++; // this optimization increases depth, for us and all + // our next chain (i.e., until this call returns) + } + Next = Simple->Next; + } else { + // If there is no next then Natural is where we will + // go to by doing nothing, so we can potentially optimize some + // branches to direct. + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Block *Target = iter.first; + Branch *Details = iter.second.get(); + if (Details->Type != Branch::Direct && + contains(NaturalBlocks, + Target)) { // note: cannot handle split blocks + Details->Type = Branch::Direct; + if (MultipleShape *Multiple = + dyn_cast<MultipleShape>(Details->Ancestor)) + Multiple->Breaks--; + } else if (Details->Type == Branch::Break && LastLoop && + LastLoop->Natural == Details->Ancestor->Natural) { + // it is important to simplify breaks, as simpler breaks + // enable other optimizations + Details->Labeled = false; + if (MultipleShape *Multiple = + dyn_cast<MultipleShape>(Details->Ancestor)) + Multiple->Breaks--; + } + } + } + }, [&](MultipleShape* Multiple) + { + for (const auto &iter : Multiple->InnerMap) { + RemoveUnneededFlows(iter.second, Multiple->Next, + Multiple->Breaks ? nullptr : LastLoop, + Depth + 1); + } + Next = Multiple->Next; + }, [&](LoopShape* Loop) + { + RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1); + Next = Loop->Next; + }); + } + } + + // After we know which loops exist, we can calculate which need to be + // labeled + void FindLabeledLoops(Shape *Root) { + Shape *Next = Root; + while (Next) { + Root = Next; + Next = nullptr; + + ShapeSwitch( + Root, + [&](SimpleShape *Simple) { + MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next); + // If we are fusing a Multiple with a loop into this Simple, then + // visit it now + if (Fused && Fused->Breaks) + LoopStack.push(Fused); + if (Simple->Inner->BranchVar) + LoopStack.push(nullptr); // a switch means breaks are now useless, + // push a dummy + if (Fused) { + if (Fused->UseSwitch) + LoopStack.push(nullptr); // a switch means breaks are now + // useless, push a dummy + for (const auto &iter : Fused->InnerMap) { + FindLabeledLoops(iter.second); + } + } + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Branch *Details = iter.second.get(); + if (Details->Type == Branch::Break || + Details->Type == Branch::Continue) { + assert(!LoopStack.empty()); + if (Details->Ancestor != LoopStack.top() && Details->Labeled) { + if (MultipleShape *Multiple = + dyn_cast<MultipleShape>(Details->Ancestor)) { + Multiple->Labeled = true; + } else { + LoopShape *Loop = cast<LoopShape>(Details->Ancestor); + Loop->Labeled = true; + } + } else { + Details->Labeled = false; + } + } + if (Fused && Fused->UseSwitch) + LoopStack.pop(); + if (Simple->Inner->BranchVar) + LoopStack.pop(); + if (Fused && Fused->Breaks) + LoopStack.pop(); + if (Fused) + Next = Fused->Next; + else + Next = Root->Next; + } + } + , [&](MultipleShape* Multiple) { + if (Multiple->Breaks) + LoopStack.push(Multiple); + for (const auto &iter : Multiple->InnerMap) + FindLabeledLoops(iter.second); + if (Multiple->Breaks) + LoopStack.pop(); + Next = Root->Next; + } + , [&](LoopShape* Loop) { + LoopStack.push(Loop); + FindLabeledLoops(Loop->Inner); + LoopStack.pop(); + Next = Root->Next; + }); + } + } + + void Process(Shape * Root) { + FindNaturals(Root); + RemoveUnneededFlows(Root); + FindLabeledLoops(Root); + } + }; + + PostOptimizer(this).Process(Root); +} |