summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2014-03-21 17:53:59 +0000
committerdim <dim@FreeBSD.org>2014-03-21 17:53:59 +0000
commit9cedb8bb69b89b0f0c529937247a6a80cabdbaec (patch)
treec978f0e9ec1ab92dc8123783f30b08a7fd1e2a39 /contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
parent03fdc2934eb61c44c049a02b02aa974cfdd8a0eb (diff)
downloadFreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.zip
FreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.tar.gz
MFC 261991:
Upgrade our copy of llvm/clang to 3.4 release. This version supports all of the features in the current working draft of the upcoming C++ standard, provisionally named C++1y. The code generator's performance is greatly increased, and the loop auto-vectorizer is now enabled at -Os and -O2 in addition to -O3. The PowerPC backend has made several major improvements to code generation quality and compile time, and the X86, SPARC, ARM32, Aarch64 and SystemZ backends have all seen major feature work. Release notes for llvm and clang can be found here: <http://llvm.org/releases/3.4/docs/ReleaseNotes.html> <http://llvm.org/releases/3.4/tools/clang/docs/ReleaseNotes.html> MFC 262121 (by emaste): Update lldb for clang/llvm 3.4 import This commit largely restores the lldb source to the upstream r196259 snapshot with the addition of threaded inferior support and a few bug fixes. Specific upstream lldb revisions restored include: SVN git 181387 779e6ac 181703 7bef4e2 182099 b31044e 182650 f2dcf35 182683 0d91b80 183862 15c1774 183929 99447a6 184177 0b2934b 184948 4dc3761 184954 007e7bc 186990 eebd175 Sponsored by: DARPA, AFRL MFC 262186 (by emaste): Fix mismerge in r262121 A break statement was lost in the merge. The error had no functional impact, but restore it to reduce the diff against upstream. MFC 262303: Pull in r197521 from upstream clang trunk (by rdivacky): Use the integrated assembler by default on FreeBSD/ppc and ppc64. Requested by: jhibbits MFC 262611: Pull in r196874 from upstream llvm trunk: Fix a crash that occurs when PWD is invalid. MCJIT needs to be able to run in hostile environments, even when PWD is invalid. There's no need to crash MCJIT in this case. The obvious fix is to simply leave MCContext's CompilationDir empty when PWD can't be determined. This way, MCJIT clients, and other clients that link with LLVM don't need a valid working directory. If we do want to guarantee valid CompilationDir, that should be done only for clients of getCompilationDir(). This is as simple as checking for an empty string. The only current use of getCompilationDir is EmitGenDwarfInfo, which won't conceivably run with an invalid working dir. However, in the purely hypothetically and untestable case that this happens, the AT_comp_dir will be omitted from the compilation_unit DIE. This should help fix assertions occurring with ports-mgmt/tinderbox, when it is using jails, and sometimes invalidates clang's current working directory. Reported by: decke MFC 262809: Pull in r203007 from upstream clang trunk: Don't produce an alias between destructors with different calling conventions. Fixes pr19007. (Please note that is an LLVM PR identifier, not a FreeBSD one.) This should fix Firefox and/or libxul crashes (due to problems with regparm/stdcall calling conventions) on i386. Reported by: multiple users on freebsd-current PR: bin/187103 MFC 263048: Repair recognition of "CC" as an alias for the C++ compiler, since it was silently broken by upstream for a Windows-specific use-case. Apparently some versions of CMake still rely on this archaic feature... Reported by: rakuco MFC 263049: Garbage collect the old way of adding the libstdc++ include directories in clang's InitHeaderSearch.cpp. This has been superseded by David Chisnall's commit in r255321. Moreover, if libc++ is used, the libstdc++ include directories should not be in the search path at all. These directories are now only used if you pass -stdlib=libstdc++.
Diffstat (limited to 'contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp')
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp4133
1 files changed, 1493 insertions, 2640 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp b/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
index b0cd0f9..507570f 100644
--- a/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -8,14 +8,17 @@
/// \file
//==-----------------------------------------------------------------------===//
-#define DEBUGME 0
#define DEBUG_TYPE "structcfg"
+#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
-#include "AMDIL.h"
+#include "R600InstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -28,9 +31,12 @@
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEFAULT_VEC_SLOTS 8
+
// TODO: move-begin.
//===----------------------------------------------------------------------===//
@@ -43,12 +49,8 @@ STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
"matched");
STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
"matched");
-STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
- "pattern matched");
STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
"pattern matched");
-STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
- "matched");
STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
@@ -57,39 +59,29 @@ STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
// Miscellaneous utility for CFGStructurizer.
//
//===----------------------------------------------------------------------===//
-namespace llvmCFGStruct {
+namespace {
#define SHOWNEWINSTR(i) \
- if (DEBUGME) errs() << "New instr: " << *i << "\n"
+ DEBUG(dbgs() << "New instr: " << *i << "\n");
#define SHOWNEWBLK(b, msg) \
-if (DEBUGME) { \
- errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- errs() << "\n"; \
-}
+DEBUG( \
+ dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ dbgs() << "\n"; \
+);
#define SHOWBLK_DETAIL(b, msg) \
-if (DEBUGME) { \
+DEBUG( \
if (b) { \
- errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- b->print(errs()); \
- errs() << "\n"; \
+ dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(dbgs()); \
+ dbgs() << "\n"; \
} \
-}
+);
#define INVALIDSCCNUM -1
-#define INVALIDREGNUM 0
-
-template<class LoopinfoT>
-void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
- for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
- iterEnd = LoopInfo.end();
- iter != iterEnd; ++iter) {
- (*iter)->print(OS, 0);
- }
-}
template<class NodeT>
-void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
+void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
size_t sz = Src.size();
for (size_t i = 0; i < sz/2; ++i) {
NodeT *t = Src[i];
@@ -98,7 +90,7 @@ void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
}
}
-} //end namespace llvmCFGStruct
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
//
@@ -106,43 +98,17 @@ void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
//
//===----------------------------------------------------------------------===//
-namespace llvmCFGStruct {
-template<class PassT>
-struct CFGStructTraits {
-};
-template <class InstrT>
-class BlockInformation {
-public:
- bool isRetired;
- int sccNum;
- //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
- //Instructions defining the corresponding successor.
- BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
-};
+namespace {
-template <class BlockT, class InstrT, class RegiT>
-class LandInformation {
+class BlockInformation {
public:
- BlockT *landBlk;
- std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
- //WHILELOOP(thisloop) init before entering
- //thisloop.
- std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
- //WHILELOOP(thisloop) init after entering
- //thisloop.
- std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
- //land block, branch cond on this reg.
- std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
- //endif" after ENDLOOP(thisloop) break
- //outerLoopOf(thisLoop).
- std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
- //endif" after ENDLOOP(thisloop) continue on
- //outerLoopOf(thisLoop).
- LandInformation() : landBlk(NULL) {}
+ bool IsRetired;
+ int SccNum;
+ BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
};
-} //end of namespace llvmCFGStruct
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
//
@@ -150,1026 +116,1213 @@ public:
//
//===----------------------------------------------------------------------===//
-namespace llvmCFGStruct {
-// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
-template<class PassT>
-class CFGStructurizer {
+namespace {
+class AMDGPUCFGStructurizer : public MachineFunctionPass {
public:
- typedef enum {
+ typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
+ typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
+ typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
+
+ enum PathToKind {
Not_SinglePath = 0,
SinglePath_InPath = 1,
SinglePath_NotInPath = 2
- } PathToKind;
+ };
-public:
- typedef typename PassT::InstructionType InstrT;
- typedef typename PassT::FunctionType FuncT;
- typedef typename PassT::DominatortreeType DomTreeT;
- typedef typename PassT::PostDominatortreeType PostDomTreeT;
- typedef typename PassT::DomTreeNodeType DomTreeNodeT;
- typedef typename PassT::LoopinfoType LoopInfoT;
-
- typedef GraphTraits<FuncT *> FuncGTraits;
- //typedef FuncGTraits::nodes_iterator BlockIterator;
- typedef typename FuncT::iterator BlockIterator;
-
- typedef typename FuncGTraits::NodeType BlockT;
- typedef GraphTraits<BlockT *> BlockGTraits;
- typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
- //typedef BlockGTraits::succ_iterator InstructionIterator;
- typedef typename BlockT::iterator InstrIterator;
-
- typedef CFGStructTraits<PassT> CFGTraits;
- typedef BlockInformation<InstrT> BlockInfo;
- typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
-
- typedef int RegiT;
- typedef typename PassT::LoopType LoopT;
- typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
- typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
- //landing info for loop break
- typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
+ static char ID;
-public:
- CFGStructurizer();
- ~CFGStructurizer();
+ AMDGPUCFGStructurizer(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm),
+ TII(static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
+ TRI(&TII->getRegisterInfo()) { }
+
+ const char *getPassName() const {
+ return "AMD IL Control Flow Graph structurizer Pass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ }
/// Perform the CFG structurization
- bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+ bool run();
/// Perform the CFG preparation
- bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+ /// This step will remove every unconditionnal/dead jump instructions and make
+ /// sure all loops have an exit block
+ bool prepare();
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(MF.dump(););
+ OrderedBlks.clear();
+ FuncRep = &MF;
+ MLI = &getAnalysis<MachineLoopInfo>();
+ DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
+ MDT = &getAnalysis<MachineDominatorTree>();
+ DEBUG(MDT->print(dbgs(), (const llvm::Module*)0););
+ PDT = &getAnalysis<MachinePostDominatorTree>();
+ DEBUG(PDT->print(dbgs()););
+ prepare();
+ run();
+ DEBUG(MF.dump(););
+ return true;
+ }
-private:
- void reversePredicateSetter(typename BlockT::iterator);
- void orderBlocks();
- void printOrderedBlocks(llvm::raw_ostream &OS);
- int patternMatch(BlockT *CurBlock);
- int patternMatchGroup(BlockT *CurBlock);
-
- int serialPatternMatch(BlockT *CurBlock);
- int ifPatternMatch(BlockT *CurBlock);
- int switchPatternMatch(BlockT *CurBlock);
- int loopendPatternMatch(BlockT *CurBlock);
- int loopPatternMatch(BlockT *CurBlock);
-
- int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
- int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
- //int loopWithoutBreak(BlockT *);
-
- void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
- BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
- void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
- BlockT *ContBlock, LoopT *contLoop);
- bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
- int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock);
- int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock);
- int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock, BlockT **LandBlockPtr);
- void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock, BlockT *LandBlock,
- bool Detail = false);
- PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
- bool AllowSideEntry = true);
- BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
- bool AllowSideEntry = true);
- int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
- void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
-
- void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
- BlockT *TrueBlock, BlockT *FalseBlock,
- BlockT *LandBlock);
- void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
- void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
- BlockT *ExitLandBlock, RegiT SetReg);
- void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
- RegiT SetReg);
- BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
- std::set<BlockT*> &ExitBlockSet,
- BlockT *ExitLandBlk);
- BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
- BlockTSmallerVector &ExitingBlocks,
- BlockTSmallerVector &ExitBlocks);
- BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
- void removeUnconditionalBranch(BlockT *SrcBlock);
- void removeRedundantConditionalBranch(BlockT *SrcBlock);
- void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
-
- void removeSuccessor(BlockT *SrcBlock);
- BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
- BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
-
- void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
- InstrIterator InsertPos);
-
- void recordSccnum(BlockT *SrcBlock, int SCCNum);
- int getSCCNum(BlockT *srcBlk);
-
- void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
- bool isRetiredBlock(BlockT *SrcBlock);
- bool isActiveLoophead(BlockT *CurBlock);
- bool needMigrateBlock(BlockT *Block);
-
- BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
- BlockTSmallerVector &exitBlocks,
- std::set<BlockT*> &ExitBlockSet);
- void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
- BlockT *getLoopLandBlock(LoopT *LoopRep);
- LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
-
- void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
-
- bool hasBackEdge(BlockT *curBlock);
- unsigned getLoopDepth (LoopT *LoopRep);
- int countActiveBlock(
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
- BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
- BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+protected:
+ TargetMachine &TM;
+ MachineDominatorTree *MDT;
+ MachinePostDominatorTree *PDT;
+ MachineLoopInfo *MLI;
+ const R600InstrInfo *TII;
+ const AMDGPURegisterInfo *TRI;
+
+ // PRINT FUNCTIONS
+ /// Print the ordered Blocks.
+ void printOrderedBlocks() const {
+ size_t i = 0;
+ for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
+ iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
+ dbgs() << "BB" << (*iterBlk)->getNumber();
+ dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+ if (i != 0 && i % 10 == 0) {
+ dbgs() << "\n";
+ } else {
+ dbgs() << " ";
+ }
+ }
+ }
+ static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
+ for (MachineLoop::iterator iter = LoopInfo.begin(),
+ iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
+ (*iter)->print(dbgs(), 0);
+ }
+ }
+
+ // UTILITY FUNCTIONS
+ int getSCCNum(MachineBasicBlock *MBB) const;
+ MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
+ bool hasBackEdge(MachineBasicBlock *MBB) const;
+ static unsigned getLoopDepth(MachineLoop *LoopRep);
+ bool isRetiredBlock(MachineBasicBlock *MBB) const;
+ bool isActiveLoophead(MachineBasicBlock *MBB) const;
+ PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
+ bool AllowSideEntry = true) const;
+ int countActiveBlock(MBBVector::const_iterator It,
+ MBBVector::const_iterator E) const;
+ bool needMigrateBlock(MachineBasicBlock *MBB) const;
+
+ // Utility Functions
+ void reversePredicateSetter(MachineBasicBlock::iterator I);
+ /// Compute the reversed DFS post order of Blocks
+ void orderBlocks(MachineFunction *MF);
+
+ // Function originaly from CFGStructTraits
+ void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
+ DebugLoc DL = DebugLoc());
+ MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
+ DebugLoc DL = DebugLoc());
+ MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
+ void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
+ DebugLoc DL);
+ void insertCondBranchBefore(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
+ DebugLoc DL);
+ void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum);
+ static int getBranchNzeroOpcode(int OldOpcode);
+ static int getBranchZeroOpcode(int OldOpcode);
+ static int getContinueNzeroOpcode(int OldOpcode);
+ static int getContinueZeroOpcode(int OldOpcode);
+ static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
+ static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
+ static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
+ MachineInstr *MI);
+ static bool isCondBranch(MachineInstr *MI);
+ static bool isUncondBranch(MachineInstr *MI);
+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
+ /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
+ ///
+ /// BB with backward-edge could have move instructions after the branch
+ /// instruction. Such move instruction "belong to" the loop backward-edge.
+ MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
+ static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
+ static MachineInstr *getContinueInstr(MachineBasicBlock *MBB);
+ static bool isReturnBlock(MachineBasicBlock *MBB);
+ static void cloneSuccessorList(MachineBasicBlock *DstMBB,
+ MachineBasicBlock *SrcMBB) ;
+ static MachineBasicBlock *clone(MachineBasicBlock *MBB);
+ /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
+ /// because the AMDGPU instruction is not recognized as terminator fix this
+ /// and retire this routine
+ void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
+ MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
+ static void wrapup(MachineBasicBlock *MBB);
+
+
+ int patternMatch(MachineBasicBlock *MBB);
+ int patternMatchGroup(MachineBasicBlock *MBB);
+ int serialPatternMatch(MachineBasicBlock *MBB);
+ int ifPatternMatch(MachineBasicBlock *MBB);
+ int loopendPatternMatch();
+ int mergeLoop(MachineLoop *LoopRep);
+ int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
+
+ void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
+ MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
+ MachineLoop *ContLoop);
+ /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
+ /// the same loop with LoopLandInfo without explicitly keeping track of
+ /// loopContBlks and loopBreakBlks, this is a method to get the information.
+ bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
+ MachineBasicBlock *Src2MBB);
+ int handleJumpintoIf(MachineBasicBlock *HeadMBB,
+ MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
+ int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
+ MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
+ int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
+ MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
+ MachineBasicBlock **LandMBBPtr);
+ void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
+ MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
+ MachineBasicBlock *LandMBB, bool Detail = false);
+ int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
+ MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
+ void mergeSerialBlock(MachineBasicBlock *DstMBB,
+ MachineBasicBlock *SrcMBB);
+
+ void mergeIfthenelseBlock(MachineInstr *BranchMI,
+ MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
+ MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
+ void mergeLooplandBlock(MachineBasicBlock *DstMBB,
+ MachineBasicBlock *LandMBB);
+ void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
+ MachineBasicBlock *LandMBB);
+ void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
+ MachineBasicBlock *ContMBB);
+ /// normalizeInfiniteLoopExit change
+ /// B1:
+ /// uncond_br LoopHeader
+ ///
+ /// to
+ /// B1:
+ /// cond_br 1 LoopHeader dummyExit
+ /// and return the newly added dummy exit block
+ MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
+ void removeUnconditionalBranch(MachineBasicBlock *MBB);
+ /// Remove duplicate branches instructions in a block.
+ /// For instance
+ /// B0:
+ /// cond_br X B1 B2
+ /// cond_br X B1 B2
+ /// is transformed to
+ /// B0:
+ /// cond_br X B1 B2
+ void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
+ void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
+ void removeSuccessor(MachineBasicBlock *MBB);
+ MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
+ MachineBasicBlock *PredMBB);
+ void migrateInstruction(MachineBasicBlock *SrcMBB,
+ MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
+ void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
+ void retireBlock(MachineBasicBlock *MBB);
+ void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = NULL);
+
+ MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
+ /// This is work around solution for findNearestCommonDominator not avaiable
+ /// to post dom a proper fix should go to Dominators.h.
+ MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2);
private:
- DomTreeT *domTree;
- PostDomTreeT *postDomTree;
- LoopInfoT *loopInfo;
- PassT *passRep;
- FuncT *funcRep;
-
- BlockInfoMap blockInfoMap;
- LoopLandInfoMap loopLandInfoMap;
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
- const AMDGPURegisterInfo *TRI;
+ MBBInfoMap BlockInfoMap;
+ LoopLandInfoMap LLInfoMap;
+ std::map<MachineLoop *, bool> Visited;
+ MachineFunction *FuncRep;
+ SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
+};
+
+int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
+ MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
+ if (It == BlockInfoMap.end())
+ return INVALIDSCCNUM;
+ return (*It).second->SccNum;
+}
-}; //template class CFGStructurizer
+MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
+ const {
+ LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
+ if (It == LLInfoMap.end())
+ return NULL;
+ return (*It).second;
+}
+
+bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
+ MachineLoop *LoopRep = MLI->getLoopFor(MBB);
+ if (!LoopRep)
+ return false;
+ MachineBasicBlock *LoopHeader = LoopRep->getHeader();
+ return MBB->isSuccessor(LoopHeader);
+}
+
+unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) {
+ return LoopRep ? LoopRep->getLoopDepth() : 0;
+}
-template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
- : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
+bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
+ MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
+ if (It == BlockInfoMap.end())
+ return false;
+ return (*It).second->IsRetired;
}
-template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
- for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
- E = blockInfoMap.end(); I != E; ++I) {
- delete I->second;
+bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
+ MachineLoop *LoopRep = MLI->getLoopFor(MBB);
+ while (LoopRep && LoopRep->getHeader() == MBB) {
+ MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
+ if(!LoopLand)
+ return true;
+ if (!isRetiredBlock(LoopLand))
+ return true;
+ LoopRep = LoopRep->getParentLoop();
}
+ return false;
+}
+AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
+ MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
+ bool AllowSideEntry) const {
+ assert(DstMBB);
+ if (SrcMBB == DstMBB)
+ return SinglePath_InPath;
+ while (SrcMBB && SrcMBB->succ_size() == 1) {
+ SrcMBB = *SrcMBB->succ_begin();
+ if (SrcMBB == DstMBB)
+ return SinglePath_InPath;
+ if (!AllowSideEntry && SrcMBB->pred_size() > 1)
+ return Not_SinglePath;
+ }
+ if (SrcMBB && SrcMBB->succ_size()==0)
+ return SinglePath_NotInPath;
+ return Not_SinglePath;
}
-template<class PassT>
-bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
- const AMDGPURegisterInfo * tri) {
- passRep = &pass;
- funcRep = &func;
- TRI = tri;
+int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
+ MBBVector::const_iterator E) const {
+ int Count = 0;
+ while (It != E) {
+ if (!isRetiredBlock(*It))
+ ++Count;
+ ++It;
+ }
+ return Count;
+}
- bool changed = false;
+bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
+ unsigned BlockSizeThreshold = 30;
+ unsigned CloneInstrThreshold = 100;
+ bool MultiplePreds = MBB && (MBB->pred_size() > 1);
- //FIXME: if not reducible flow graph, make it so ???
+ if(!MultiplePreds)
+ return false;
+ unsigned BlkSize = MBB->size();
+ return ((BlkSize > BlockSizeThreshold) &&
+ (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
+}
- if (DEBUGME) {
- errs() << "AMDGPUCFGStructurizer::prepare\n";
+void AMDGPUCFGStructurizer::reversePredicateSetter(
+ MachineBasicBlock::iterator I) {
+ while (I--) {
+ if (I->getOpcode() == AMDGPU::PRED_X) {
+ switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2)
+ .setImm(OPCODE_IS_NOT_ZERO_INT);
+ return;
+ case OPCODE_IS_NOT_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2)
+ .setImm(OPCODE_IS_ZERO_INT);
+ return;
+ case OPCODE_IS_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2)
+ .setImm(OPCODE_IS_NOT_ZERO);
+ return;
+ case OPCODE_IS_NOT_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2)
+ .setImm(OPCODE_IS_ZERO);
+ return;
+ default:
+ llvm_unreachable("PRED_X Opcode invalid!");
+ }
+ }
}
+}
+
+void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
+ int NewOpcode, DebugLoc DL) {
+ MachineInstr *MI = MBB->getParent()
+ ->CreateMachineInstr(TII->get(NewOpcode), DL);
+ MBB->push_back(MI);
+ //assume the instruction doesn't take any reg operand ...
+ SHOWNEWINSTR(MI);
+}
+
+MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
+ int NewOpcode, DebugLoc DL) {
+ MachineInstr *MI =
+ MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
+ if (MBB->begin() != MBB->end())
+ MBB->insert(MBB->begin(), MI);
+ else
+ MBB->push_back(MI);
+ SHOWNEWINSTR(MI);
+ return MI;
+}
+
+MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
+ MachineBasicBlock::iterator I, int NewOpcode) {
+ MachineInstr *OldMI = &(*I);
+ MachineBasicBlock *MBB = OldMI->getParent();
+ MachineInstr *NewMBB =
+ MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
+ MBB->insert(I, NewMBB);
+ //assume the instruction doesn't take any reg operand ...
+ SHOWNEWINSTR(NewMBB);
+ return NewMBB;
+}
+
+void AMDGPUCFGStructurizer::insertCondBranchBefore(
+ MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) {
+ MachineInstr *OldMI = &(*I);
+ MachineBasicBlock *MBB = OldMI->getParent();
+ MachineFunction *MF = MBB->getParent();
+ MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
+ MBB->insert(I, NewMI);
+ MachineInstrBuilder MIB(*MF, NewMI);
+ MIB.addReg(OldMI->getOperand(1).getReg(), false);
+ SHOWNEWINSTR(NewMI);
+ //erase later oldInstr->eraseFromParent();
+}
+
+void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
+ DebugLoc DL) {
+ MachineFunction *MF = blk->getParent();
+ MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
+ //insert before
+ blk->insert(I, NewInstr);
+ MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
+ SHOWNEWINSTR(NewInstr);
+}
- loopInfo = CFGTraits::getLoopInfo(pass);
- if (DEBUGME) {
- errs() << "LoopInfo:\n";
- PrintLoopinfo(*loopInfo, errs());
+void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB,
+ int NewOpcode, int RegNum) {
+ MachineFunction *MF = MBB->getParent();
+ MachineInstr *NewInstr =
+ MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
+ MBB->push_back(NewInstr);
+ MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
+ SHOWNEWINSTR(NewInstr);
+}
+
+int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
+ switch(OldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
+ default: llvm_unreachable("internal error");
}
+ return -1;
+}
- orderBlocks();
- if (DEBUGME) {
- errs() << "Ordered blocks:\n";
- printOrderedBlocks(errs());
+int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
+ switch(OldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
+ default: llvm_unreachable("internal error");
}
+ return -1;
+}
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
-
- for (typename LoopInfoT::iterator iter = loopInfo->begin(),
- iterEnd = loopInfo->end();
- iter != iterEnd; ++iter) {
- LoopT* loopRep = (*iter);
- BlockTSmallerVector exitingBlks;
- loopRep->getExitingBlocks(exitingBlks);
-
- if (exitingBlks.size() == 0) {
- BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
- if (dummyExitBlk != NULL)
- retBlks.push_back(dummyExitBlk);
- }
+int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
+ switch(OldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
+ default: llvm_unreachable("internal error");
+ };
+ return -1;
+}
+
+int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
+ switch(OldOpcode) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
+ default: llvm_unreachable("internal error");
}
+ return -1;
+}
- // Remove unconditional branch instr.
- // Add dummy exit block iff there are multiple returns.
+MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
+ return MI->getOperand(0).getMBB();
+}
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
- iterBlk != iterEndBlk;
- ++iterBlk) {
- BlockT *curBlk = *iterBlk;
- removeUnconditionalBranch(curBlk);
- removeRedundantConditionalBranch(curBlk);
- if (CFGTraits::isReturnBlock(curBlk)) {
- retBlks.push_back(curBlk);
+void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ MI->getOperand(0).setMBB(MBB);
+}
+
+MachineBasicBlock *
+AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ assert(MBB->succ_size() == 2);
+ MachineBasicBlock *TrueBranch = getTrueBranch(MI);
+ MachineBasicBlock::succ_iterator It = MBB->succ_begin();
+ MachineBasicBlock::succ_iterator Next = It;
+ ++Next;
+ return (*It == TrueBranch) ? *Next : *It;
+}
+
+bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case AMDGPU::JUMP_COND:
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32: return true;
+ default:
+ return false;
+ }
+ return false;
+}
+
+bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case AMDGPU::JUMP:
+ case AMDGPU::BRANCH:
+ return true;
+ default:
+ return false;
+ }
+ return false;
+}
+
+DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
+ //get DebugLoc from the first MachineBasicBlock instruction with debug info
+ DebugLoc DL;
+ for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
+ ++It) {
+ MachineInstr *instr = &(*It);
+ if (instr->getDebugLoc().isUnknown() == false)
+ DL = instr->getDebugLoc();
+ }
+ return DL;
+}
+
+MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock::reverse_iterator It = MBB->rbegin();
+ MachineInstr *MI = &*It;
+ if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
+ return MI;
+ return NULL;
+}
+
+MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
+ MachineBasicBlock *MBB) {
+ for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
+ It != E; ++It) {
+ // FIXME: Simplify
+ MachineInstr *MI = &*It;
+ if (MI) {
+ if (isCondBranch(MI) || isUncondBranch(MI))
+ return MI;
+ else if (!TII->isMov(MI->getOpcode()))
+ break;
}
- assert(curBlk->succ_size() <= 2);
- } //for
+ }
+ return NULL;
+}
- if (retBlks.size() >= 2) {
- addDummyExitBlock(retBlks);
- changed = true;
+MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
+ MachineBasicBlock::reverse_iterator It = MBB->rbegin();
+ if (It != MBB->rend()) {
+ MachineInstr *instr = &(*It);
+ if (instr->getOpcode() == AMDGPU::RETURN)
+ return instr;
}
+ return NULL;
+}
- return changed;
-} //CFGStructurizer::prepare
+MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
+ MachineBasicBlock::reverse_iterator It = MBB->rbegin();
+ if (It != MBB->rend()) {
+ MachineInstr *MI = &(*It);
+ if (MI->getOpcode() == AMDGPU::CONTINUE)
+ return MI;
+ }
+ return NULL;
+}
-template<class PassT>
-bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
- const AMDGPURegisterInfo * tri) {
- passRep = &pass;
- funcRep = &func;
- TRI = tri;
+bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
+ MachineInstr *MI = getReturnInstr(MBB);
+ bool IsReturn = (MBB->succ_size() == 0);
+ if (MI)
+ assert(IsReturn);
+ else if (IsReturn)
+ DEBUG(
+ dbgs() << "BB" << MBB->getNumber()
+ <<" is return block without RETURN instr\n";);
+ return IsReturn;
+}
- //Assume reducible CFG...
- if (DEBUGME) {
- errs() << "AMDGPUCFGStructurizer::run\n";
- func.viewCFG();
+void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
+ MachineBasicBlock *SrcMBB) {
+ for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
+ iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
+ DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
+}
+
+MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
+ MachineFunction *Func = MBB->getParent();
+ MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
+ Func->push_back(NewMBB); //insert to function
+ for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end();
+ It != E; ++It) {
+ MachineInstr *MI = Func->CloneMachineInstr(It);
+ NewMBB->push_back(MI);
}
+ return NewMBB;
+}
+
+void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
+ MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
+ MachineBasicBlock *NewBlk) {
+ MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
+ if (BranchMI && isCondBranch(BranchMI) &&
+ getTrueBranch(BranchMI) == OldMBB)
+ setTrueBranch(BranchMI, NewBlk);
+}
+
+void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
+ assert((!MBB->getParent()->getJumpTableInfo()
+ || MBB->getParent()->getJumpTableInfo()->isEmpty())
+ && "found a jump table");
+
+ //collect continue right before endloop
+ SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
+ MachineBasicBlock::iterator Pre = MBB->begin();
+ MachineBasicBlock::iterator E = MBB->end();
+ MachineBasicBlock::iterator It = Pre;
+ while (It != E) {
+ if (Pre->getOpcode() == AMDGPU::CONTINUE
+ && It->getOpcode() == AMDGPU::ENDLOOP)
+ ContInstr.push_back(Pre);
+ Pre = It;
+ ++It;
+ }
+
+ //delete continue right before endloop
+ for (unsigned i = 0; i < ContInstr.size(); ++i)
+ ContInstr[i]->eraseFromParent();
+
+ // TODO to fix up jump table so later phase won't be confused. if
+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+ // there isn't such an interface yet. alternatively, replace all the other
+ // blocks in the jump table with the entryBlk //}
+
+}
+
+
+bool AMDGPUCFGStructurizer::prepare() {
+ bool Changed = false;
+
+ //FIXME: if not reducible flow graph, make it so ???
+
+ DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
- domTree = CFGTraits::getDominatorTree(pass);
- if (DEBUGME) {
- domTree->print(errs(), (const llvm::Module*)0);
+ orderBlocks(FuncRep);
+
+ SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
+
+ // Add an ExitBlk to loop that don't have one
+ for (MachineLoopInfo::iterator It = MLI->begin(),
+ E = MLI->end(); It != E; ++It) {
+ MachineLoop *LoopRep = (*It);
+ MBBVector ExitingMBBs;
+ LoopRep->getExitingBlocks(ExitingMBBs);
+
+ if (ExitingMBBs.size() == 0) {
+ MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
+ if (DummyExitBlk)
+ RetBlks.push_back(DummyExitBlk);
+ }
}
- postDomTree = CFGTraits::getPostDominatorTree(pass);
- if (DEBUGME) {
- postDomTree->print(errs());
+ // Remove unconditional branch instr.
+ // Add dummy exit block iff there are multiple returns.
+ for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
+ It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
+ MachineBasicBlock *MBB = *It;
+ removeUnconditionalBranch(MBB);
+ removeRedundantConditionalBranch(MBB);
+ if (isReturnBlock(MBB)) {
+ RetBlks.push_back(MBB);
+ }
+ assert(MBB->succ_size() <= 2);
}
- loopInfo = CFGTraits::getLoopInfo(pass);
- if (DEBUGME) {
- errs() << "LoopInfo:\n";
- PrintLoopinfo(*loopInfo, errs());
+ if (RetBlks.size() >= 2) {
+ addDummyExitBlock(RetBlks);
+ Changed = true;
}
- orderBlocks();
+ return Changed;
+}
+
+bool AMDGPUCFGStructurizer::run() {
+
+ //Assume reducible CFG...
+ DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n";FuncRep->viewCFG(););
+
#ifdef STRESSTEST
//Use the worse block ordering to test the algorithm.
ReverseVector(orderedBlks);
#endif
- if (DEBUGME) {
- errs() << "Ordered blocks:\n";
- printOrderedBlocks(errs());
- }
- int numIter = 0;
- bool finish = false;
- BlockT *curBlk;
- bool makeProgress = false;
- int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
- orderedBlks.end());
+ DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
+ int NumIter = 0;
+ bool Finish = false;
+ MachineBasicBlock *MBB;
+ bool MakeProgress = false;
+ int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
+ OrderedBlks.end());
do {
- ++numIter;
- if (DEBUGME) {
- errs() << "numIter = " << numIter
- << ", numRemaintedBlk = " << numRemainedBlk << "\n";
- }
-
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlk = orderedBlks.begin();
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlkEnd = orderedBlks.end();
-
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- sccBeginIter = iterBlk;
- BlockT *sccBeginBlk = NULL;
- int sccNumBlk = 0; // The number of active blocks, init to a
+ ++NumIter;
+ DEBUG(
+ dbgs() << "numIter = " << NumIter
+ << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
+ );
+
+ SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
+ OrderedBlks.begin();
+ SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
+ OrderedBlks.end();
+
+ SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
+ It;
+ MachineBasicBlock *SccBeginMBB = NULL;
+ int SccNumBlk = 0; // The number of active blocks, init to a
// maximum possible number.
- int sccNumIter; // Number of iteration in this SCC.
-
- while (iterBlk != iterBlkEnd) {
- curBlk = *iterBlk;
-
- if (sccBeginBlk == NULL) {
- sccBeginIter = iterBlk;
- sccBeginBlk = curBlk;
- sccNumIter = 0;
- sccNumBlk = numRemainedBlk; // Init to maximum possible number.
- if (DEBUGME) {
- errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
- errs() << "\n";
- }
+ int SccNumIter; // Number of iteration in this SCC.
+
+ while (It != E) {
+ MBB = *It;
+
+ if (!SccBeginMBB) {
+ SccBeginIter = It;
+ SccBeginMBB = MBB;
+ SccNumIter = 0;
+ SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
+ DEBUG(
+ dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
+ dbgs() << "\n";
+ );
}
- if (!isRetiredBlock(curBlk)) {
- patternMatch(curBlk);
- }
+ if (!isRetiredBlock(MBB))
+ patternMatch(MBB);
- ++iterBlk;
+ ++It;
- bool contNextScc = true;
- if (iterBlk == iterBlkEnd
- || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+ bool ContNextScc = true;
+ if (It == E
+ || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
// Just finish one scc.
- ++sccNumIter;
- int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
- if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
- if (DEBUGME) {
- errs() << "Can't reduce SCC " << getSCCNum(curBlk)
- << ", sccNumIter = " << sccNumIter;
- errs() << "doesn't make any progress\n";
- }
- contNextScc = true;
- } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
- sccNumBlk = sccRemainedNumBlk;
- iterBlk = sccBeginIter;
- contNextScc = false;
- if (DEBUGME) {
- errs() << "repeat processing SCC" << getSCCNum(curBlk)
- << "sccNumIter = " << sccNumIter << "\n";
- func.viewCFG();
- }
+ ++SccNumIter;
+ int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
+ if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
+ DEBUG(
+ dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
+ << ", sccNumIter = " << SccNumIter;
+ dbgs() << "doesn't make any progress\n";
+ );
+ ContNextScc = true;
+ } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
+ SccNumBlk = sccRemainedNumBlk;
+ It = SccBeginIter;
+ ContNextScc = false;
+ DEBUG(
+ dbgs() << "repeat processing SCC" << getSCCNum(MBB)
+ << "sccNumIter = " << SccNumIter << "\n";
+ FuncRep->viewCFG();
+ );
} else {
// Finish the current scc.
- contNextScc = true;
+ ContNextScc = true;
}
} else {
// Continue on next component in the current scc.
- contNextScc = false;
+ ContNextScc = false;
}
- if (contNextScc) {
- sccBeginBlk = NULL;
- }
+ if (ContNextScc)
+ SccBeginMBB = NULL;
} //while, "one iteration" over the function.
- BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
- if (entryBlk->succ_size() == 0) {
- finish = true;
- if (DEBUGME) {
- errs() << "Reduce to one block\n";
- }
+ MachineBasicBlock *EntryMBB =
+ GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
+ if (EntryMBB->succ_size() == 0) {
+ Finish = true;
+ DEBUG(
+ dbgs() << "Reduce to one block\n";
+ );
} else {
- int newnumRemainedBlk
- = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+ int NewnumRemainedBlk
+ = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
// consider cloned blocks ??
- if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
- makeProgress = true;
- numRemainedBlk = newnumRemainedBlk;
+ if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
+ MakeProgress = true;
+ NumRemainedBlk = NewnumRemainedBlk;
} else {
- makeProgress = false;
- if (DEBUGME) {
- errs() << "No progress\n";
- }
+ MakeProgress = false;
+ DEBUG(
+ dbgs() << "No progress\n";
+ );
}
}
- } while (!finish && makeProgress);
+ } while (!Finish && MakeProgress);
// Misc wrap up to maintain the consistency of the Function representation.
- CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+ wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
// Detach retired Block, release memory.
- for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
- iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
- if ((*iterMap).second && (*iterMap).second->isRetired) {
- assert(((*iterMap).first)->getNumber() != -1);
- if (DEBUGME) {
- errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
- }
- (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
+ for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
+ It != E; ++It) {
+ if ((*It).second && (*It).second->IsRetired) {
+ assert(((*It).first)->getNumber() != -1);
+ DEBUG(
+ dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
+ );
+ (*It).first->eraseFromParent(); //Remove from the parent Function.
}
- delete (*iterMap).second;
+ delete (*It).second;
}
- blockInfoMap.clear();
+ BlockInfoMap.clear();
+ LLInfoMap.clear();
- // clear loopLandInfoMap
- for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
- iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
- delete (*iterMap).second;
- }
- loopLandInfoMap.clear();
+ DEBUG(
+ FuncRep->viewCFG();
+ );
- if (DEBUGME) {
- func.viewCFG();
- }
-
- if (!finish) {
- assert(!"IRREDUCIBL_CF");
- }
+ if (!Finish)
+ llvm_unreachable("IRREDUCIBL_CF");
return true;
-} //CFGStructurizer::run
-
-/// Print the ordered Blocks.
-///
-template<class PassT>
-void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
- size_t i = 0;
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
- iterBlk != iterBlkEnd;
- ++iterBlk, ++i) {
- os << "BB" << (*iterBlk)->getNumber();
- os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
- if (i != 0 && i % 10 == 0) {
- os << "\n";
- } else {
- os << " ";
- }
- }
-} //printOrderedBlocks
-
-/// Compute the reversed DFS post order of Blocks
-///
-template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
- int sccNum = 0;
- BlockT *bb;
- for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
- sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
- std::vector<BlockT *> &sccNext = *sccIter;
- for (typename std::vector<BlockT *>::const_iterator
- blockIter = sccNext.begin(), blockEnd = sccNext.end();
+}
+
+
+
+void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
+ int SccNum = 0;
+ MachineBasicBlock *MBB;
+ for (scc_iterator<MachineFunction *> It = scc_begin(MF), E = scc_end(MF);
+ It != E; ++It, ++SccNum) {
+ std::vector<MachineBasicBlock *> &SccNext = *It;
+ for (std::vector<MachineBasicBlock *>::const_iterator
+ blockIter = SccNext.begin(), blockEnd = SccNext.end();
blockIter != blockEnd; ++blockIter) {
- bb = *blockIter;
- orderedBlks.push_back(bb);
- recordSccnum(bb, sccNum);
+ MBB = *blockIter;
+ OrderedBlks.push_back(MBB);
+ recordSccnum(MBB, SccNum);
}
}
//walk through all the block in func to check for unreachable
- for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
- blockEnd1 = FuncGTraits::nodes_end(funcRep);
- blockIter1 != blockEnd1; ++blockIter1) {
- BlockT *bb = &(*blockIter1);
- sccNum = getSCCNum(bb);
- if (sccNum == INVALIDSCCNUM) {
- errs() << "unreachable block BB" << bb->getNumber() << "\n";
- }
+ typedef GraphTraits<MachineFunction *> GTM;
+ MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
+ for (; It != E; ++It) {
+ MachineBasicBlock *MBB = &(*It);
+ SccNum = getSCCNum(MBB);
+ if (SccNum == INVALIDSCCNUM)
+ dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
}
-} //orderBlocks
+}
-template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
- int numMatch = 0;
- int curMatch;
+int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
+ int NumMatch = 0;
+ int CurMatch;
- if (DEBUGME) {
- errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
- }
+ DEBUG(
+ dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
+ );
- while ((curMatch = patternMatchGroup(curBlk)) > 0) {
- numMatch += curMatch;
- }
+ while ((CurMatch = patternMatchGroup(MBB)) > 0)
+ NumMatch += CurMatch;
- if (DEBUGME) {
- errs() << "End patternMatch BB" << curBlk->getNumber()
- << ", numMatch = " << numMatch << "\n";
- }
+ DEBUG(
+ dbgs() << "End patternMatch BB" << MBB->getNumber()
+ << ", numMatch = " << NumMatch << "\n";
+ );
+
+ return NumMatch;
+}
+
+int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
+ int NumMatch = 0;
+ NumMatch += loopendPatternMatch();
+ NumMatch += serialPatternMatch(MBB);
+ NumMatch += ifPatternMatch(MBB);
+ return NumMatch;
+}
- return numMatch;
-} //patternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
- int numMatch = 0;
- numMatch += serialPatternMatch(curBlk);
- numMatch += ifPatternMatch(curBlk);
- numMatch += loopendPatternMatch(curBlk);
- numMatch += loopPatternMatch(curBlk);
- return numMatch;
-}//patternMatchGroup
-
-template<class PassT>
-int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
- if (curBlk->succ_size() != 1) {
+
+int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
+ if (MBB->succ_size() != 1)
return 0;
- }
- BlockT *childBlk = *curBlk->succ_begin();
- if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+ MachineBasicBlock *childBlk = *MBB->succ_begin();
+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
return 0;
- }
- mergeSerialBlock(curBlk, childBlk);
+ mergeSerialBlock(MBB, childBlk);
++numSerialPatternMatch;
return 1;
-} //serialPatternMatch
+}
-template<class PassT>
-int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
+int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
//two edges
- if (curBlk->succ_size() != 2) {
+ if (MBB->succ_size() != 2)
return 0;
- }
-
- if (hasBackEdge(curBlk)) {
+ if (hasBackEdge(MBB))
return 0;
- }
-
- InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
- if (branchInstr == NULL) {
+ MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
+ if (!BranchMI)
return 0;
- }
- assert(CFGTraits::isCondBranch(branchInstr));
+ assert(isCondBranch(BranchMI));
+ int NumMatch = 0;
- BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
- BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
- BlockT *landBlk;
- int cloned = 0;
+ MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
+ NumMatch += serialPatternMatch(TrueMBB);
+ NumMatch += ifPatternMatch(TrueMBB);
+ MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
+ NumMatch += serialPatternMatch(FalseMBB);
+ NumMatch += ifPatternMatch(FalseMBB);
+ MachineBasicBlock *LandBlk;
+ int Cloned = 0;
+ assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
// TODO: Simplify
- if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
- && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
- landBlk = *trueBlk->succ_begin();
- } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
- landBlk = NULL;
- } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
- landBlk = falseBlk;
- falseBlk = NULL;
- } else if (falseBlk->succ_size() == 1
- && *falseBlk->succ_begin() == trueBlk) {
- landBlk = trueBlk;
- trueBlk = NULL;
- } else if (falseBlk->succ_size() == 1
- && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
- landBlk = *falseBlk->succ_begin();
- } else if (trueBlk->succ_size() == 1
- && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
- landBlk = *trueBlk->succ_begin();
+ if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
+ && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
+ // Diamond pattern
+ LandBlk = *TrueMBB->succ_begin();
+ } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
+ // Triangle pattern, false is empty
+ LandBlk = FalseMBB;
+ FalseMBB = NULL;
+ } else if (FalseMBB->succ_size() == 1
+ && *FalseMBB->succ_begin() == TrueMBB) {
+ // Triangle pattern, true is empty
+ // We reverse the predicate to make a triangle, empty false pattern;
+ std::swap(TrueMBB, FalseMBB);
+ reversePredicateSetter(MBB->end());
+ LandBlk = FalseMBB;
+ FalseMBB = NULL;
+ } else if (FalseMBB->succ_size() == 1
+ && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
+ LandBlk = *FalseMBB->succ_begin();
+ } else if (TrueMBB->succ_size() == 1
+ && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
+ LandBlk = *TrueMBB->succ_begin();
} else {
- return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+ return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
}
// improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
// new BB created for landBlk==NULL may introduce new challenge to the
// reduction process.
- if (landBlk != NULL &&
- ((trueBlk && trueBlk->pred_size() > 1)
- || (falseBlk && falseBlk->pred_size() > 1))) {
- cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+ if (LandBlk &&
+ ((TrueMBB && TrueMBB->pred_size() > 1)
+ || (FalseMBB && FalseMBB->pred_size() > 1))) {
+ Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
}
- if (trueBlk && trueBlk->pred_size() > 1) {
- trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
- ++cloned;
+ if (TrueMBB && TrueMBB->pred_size() > 1) {
+ TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
+ ++Cloned;
}
- if (falseBlk && falseBlk->pred_size() > 1) {
- falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
- ++cloned;
+ if (FalseMBB && FalseMBB->pred_size() > 1) {
+ FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
+ ++Cloned;
}
- mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+ mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
++numIfPatternMatch;
- numClonedBlock += cloned;
-
- return 1 + cloned;
-} //ifPatternMatch
+ numClonedBlock += Cloned;
-template<class PassT>
-int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
- return 0;
-} //switchPatternMatch
+ return 1 + Cloned + NumMatch;
+}
-template<class PassT>
-int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- typename std::vector<LoopT *> nestedLoops;
- while (loopRep) {
- nestedLoops.push_back(loopRep);
- loopRep = loopRep->getParentLoop();
+int AMDGPUCFGStructurizer::loopendPatternMatch() {
+ std::vector<MachineLoop *> NestedLoops;
+ for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end();
+ It != E; ++It) {
+ df_iterator<MachineLoop *> LpIt = df_begin(*It),
+ LpE = df_end(*It);
+ for (; LpIt != LpE; ++LpIt)
+ NestedLoops.push_back(*LpIt);
}
-
- if (nestedLoops.size() == 0) {
+ if (NestedLoops.size() == 0)
return 0;
- }
// Process nested loop outside->inside, so "continue" to a outside loop won't
// be mistaken as "break" of the current loop.
- int num = 0;
- for (typename std::vector<LoopT *>::reverse_iterator
- iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
- iter != iterEnd; ++iter) {
- loopRep = *iter;
-
- if (getLoopLandBlock(loopRep) != NULL) {
+ int Num = 0;
+ for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
+ E = NestedLoops.rend(); It != E; ++It) {
+ MachineLoop *ExaminedLoop = *It;
+ if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
continue;
- }
-
- BlockT *loopHeader = loopRep->getHeader();
-
- int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
-
- if (numBreak == -1) {
+ DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
+ int NumBreak = mergeLoop(ExaminedLoop);
+ if (NumBreak == -1)
break;
- }
-
- int numCont = loopcontPatternMatch(loopRep, loopHeader);
- num += numBreak + numCont;
- }
-
- return num;
-} //loopendPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
- if (curBlk->succ_size() != 0) {
- return 0;
- }
-
- int numLoop = 0;
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- while (loopRep && loopRep->getHeader() == curBlk) {
- LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
- if (loopLand) {
- BlockT *landBlk = loopLand->landBlk;
- assert(landBlk);
- if (!isRetiredBlock(landBlk)) {
- mergeLooplandBlock(curBlk, loopLand);
- ++numLoop;
- }
- }
- loopRep = loopRep->getParentLoop();
- }
-
- numLoopPatternMatch += numLoop;
-
- return numLoop;
-} //loopPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
- BlockT *loopHeader) {
- BlockTSmallerVector exitingBlks;
- loopRep->getExitingBlocks(exitingBlks);
-
- if (DEBUGME) {
- errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
- }
-
- if (exitingBlks.size() == 0) {
- setLoopLandBlock(loopRep);
- return 0;
- }
-
- // Compute the corresponding exitBlks and exit block set.
- BlockTSmallerVector exitBlks;
- std::set<BlockT *> exitBlkSet;
- for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
- iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
- BlockT *exitingBlk = *iter;
- BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
- exitBlks.push_back(exitBlk);
- exitBlkSet.insert(exitBlk); //non-duplicate insert
- }
-
- assert(exitBlkSet.size() > 0);
- assert(exitBlks.size() == exitingBlks.size());
-
- if (DEBUGME) {
- errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+ Num += NumBreak;
}
+ return Num;
+}
- // Find exitLandBlk.
- BlockT *exitLandBlk = NULL;
- int numCloned = 0;
- int numSerial = 0;
-
- if (exitBlkSet.size() == 1) {
- exitLandBlk = *exitBlkSet.begin();
- } else {
- exitLandBlk = findNearestCommonPostDom(exitBlkSet);
-
- if (exitLandBlk == NULL) {
- return -1;
- }
-
- bool allInPath = true;
- bool allNotInPath = true;
- for (typename std::set<BlockT*>::const_iterator
- iter = exitBlkSet.begin(),
- iterEnd = exitBlkSet.end();
- iter != iterEnd; ++iter) {
- BlockT *exitBlk = *iter;
-
- PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
- if (DEBUGME) {
- errs() << "BB" << exitBlk->getNumber()
- << " to BB" << exitLandBlk->getNumber() << " PathToKind="
- << pathKind << "\n";
- }
-
- allInPath = allInPath && (pathKind == SinglePath_InPath);
- allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
-
- if (!allInPath && !allNotInPath) {
- if (DEBUGME) {
- errs() << "singlePath check fail\n";
- }
- return -1;
- }
- } // check all exit blocks
-
- if (allNotInPath) {
-
- // TODO: Simplify, maybe separate function?
- LoopT *parentLoopRep = loopRep->getParentLoop();
- BlockT *parentLoopHeader = NULL;
- if (parentLoopRep)
- parentLoopHeader = parentLoopRep->getHeader();
-
- if (exitLandBlk == parentLoopHeader &&
- (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
- loopRep,
- exitBlkSet,
- exitLandBlk)) != NULL) {
- if (DEBUGME) {
- errs() << "relocateLoopcontBlock success\n";
- }
- } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
- exitingBlks,
- exitBlks)) != NULL) {
- if (DEBUGME) {
- errs() << "insertEndbranchBlock success\n";
- }
- } else {
- if (DEBUGME) {
- errs() << "loop exit fail\n";
- }
- return -1;
- }
- }
-
- // Handle side entry to exit path.
- exitBlks.clear();
- exitBlkSet.clear();
- for (typename BlockTSmallerVector::iterator iterExiting =
- exitingBlks.begin(),
- iterExitingEnd = exitingBlks.end();
- iterExiting != iterExitingEnd; ++iterExiting) {
- BlockT *exitingBlk = *iterExiting;
- BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
- BlockT *newExitBlk = exitBlk;
-
- if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
- newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
- ++numCloned;
- }
-
- numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
-
- exitBlks.push_back(newExitBlk);
- exitBlkSet.insert(newExitBlk);
- }
-
- for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
- iterExitEnd = exitBlks.end();
- iterExit != iterExitEnd; ++iterExit) {
- BlockT *exitBlk = *iterExit;
- numSerial += serialPatternMatch(exitBlk);
- }
-
- for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
- iterExitEnd = exitBlks.end();
- iterExit != iterExitEnd; ++iterExit) {
- BlockT *exitBlk = *iterExit;
- if (exitBlk->pred_size() > 1) {
- if (exitBlk != exitLandBlk) {
- return -1;
- }
- } else {
- if (exitBlk != exitLandBlk &&
- (exitBlk->succ_size() != 1 ||
- *exitBlk->succ_begin() != exitLandBlk)) {
- return -1;
- }
- }
- }
- } // else
-
- exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
-
- // Fold break into the breaking block. Leverage across level breaks.
- assert(exitingBlks.size() == exitBlks.size());
- for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
- iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
- iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
- BlockT *exitBlk = *iterExit;
- BlockT *exitingBlk = *iterExiting;
- assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
- LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
- handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
- }
+int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
+ MachineBasicBlock *LoopHeader = LoopRep->getHeader();
+ MBBVector ExitingMBBs;
+ LoopRep->getExitingBlocks(ExitingMBBs);
+ assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
+ DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
+ // We assume a single ExitBlk
+ MBBVector ExitBlks;
+ LoopRep->getExitBlocks(ExitBlks);
+ SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
+ for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
+ ExitBlkSet.insert(ExitBlks[i]);
+ assert(ExitBlkSet.size() == 1);
+ MachineBasicBlock *ExitBlk = *ExitBlks.begin();
+ assert(ExitBlk && "Loop has several exit block");
+ MBBVector LatchBlks;
+ typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
+ InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
+ PE = InvMBBTraits::child_end(LoopHeader);
+ for (; PI != PE; PI++) {
+ if (LoopRep->contains(*PI))
+ LatchBlks.push_back(*PI);
+ }
+
+ for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
+ mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
+ for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
+ settleLoopcontBlock(LatchBlks[i], LoopHeader);
+ int Match = 0;
+ do {
+ Match = 0;
+ Match += serialPatternMatch(LoopHeader);
+ Match += ifPatternMatch(LoopHeader);
+ } while (Match > 0);
+ mergeLooplandBlock(LoopHeader, ExitBlk);
+ MachineLoop *ParentLoop = LoopRep->getParentLoop();
+ if (ParentLoop)
+ MLI->changeLoopFor(LoopHeader, ParentLoop);
+ else
+ MLI->removeBlock(LoopHeader);
+ Visited[LoopRep] = true;
+ return 1;
+}
- int numBreak = static_cast<int>(exitingBlks.size());
- numLoopbreakPatternMatch += numBreak;
- numClonedBlock += numCloned;
- return numBreak + numSerial + numCloned;
-} //loopbreakPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
- BlockT *loopHeader) {
- int numCont = 0;
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
- for (typename InvBlockGTraits::ChildIteratorType iter =
- InvBlockGTraits::child_begin(loopHeader),
- iterEnd = InvBlockGTraits::child_end(loopHeader);
- iter != iterEnd; ++iter) {
- BlockT *curBlk = *iter;
- if (loopRep->contains(curBlk)) {
- handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
- loopHeader, loopRep);
- contBlk.push_back(curBlk);
- ++numCont;
+int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
+ MachineBasicBlock *LoopHeader) {
+ int NumCont = 0;
+ SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> ContMBB;
+ typedef GraphTraits<Inverse<MachineBasicBlock *> > GTIM;
+ GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
+ E = GTIM::child_end(LoopHeader);
+ for (; It != E; ++It) {
+ MachineBasicBlock *MBB = *It;
+ if (LoopRep->contains(MBB)) {
+ handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
+ LoopHeader, LoopRep);
+ ContMBB.push_back(MBB);
+ ++NumCont;
}
}
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
- iter = contBlk.begin(), iterEnd = contBlk.end();
- iter != iterEnd; ++iter) {
- (*iter)->removeSuccessor(loopHeader);
+ for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
+ E = ContMBB.end(); It != E; ++It) {
+ (*It)->removeSuccessor(LoopHeader);
}
- numLoopcontPatternMatch += numCont;
+ numLoopcontPatternMatch += NumCont;
- return numCont;
-} //loopcontPatternMatch
+ return NumCont;
+}
-template<class PassT>
-bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
- BlockT *src2Blk) {
- // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
- // same loop with LoopLandInfo without explicitly keeping track of
- // loopContBlks and loopBreakBlks, this is a method to get the information.
- //
- if (src1Blk->succ_size() == 0) {
- LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
- if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
- if (theEntry != NULL) {
- if (DEBUGME) {
- errs() << "isLoopContBreakBlock yes src1 = BB"
- << src1Blk->getNumber()
- << " src2 = BB" << src2Blk->getNumber() << "\n";
- }
+bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
+ MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
+ if (Src1MBB->succ_size() == 0) {
+ MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
+ if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
+ MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
+ if (TheEntry) {
+ DEBUG(
+ dbgs() << "isLoopContBreakBlock yes src1 = BB"
+ << Src1MBB->getNumber()
+ << " src2 = BB" << Src2MBB->getNumber() << "\n";
+ );
return true;
}
}
}
return false;
-} //isSameloopDetachedContbreak
-
-template<class PassT>
-int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk) {
- int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
- if (num == 0) {
- if (DEBUGME) {
- errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
- }
- num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
- }
- return num;
}
-template<class PassT>
-int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk) {
- int num = 0;
- BlockT *downBlk;
-
- //trueBlk could be the common post dominator
- downBlk = trueBlk;
-
- if (DEBUGME) {
- errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
- << " true = BB" << trueBlk->getNumber()
- << ", numSucc=" << trueBlk->succ_size()
- << " false = BB" << falseBlk->getNumber() << "\n";
+int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
+ MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
+ int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
+ if (Num == 0) {
+ DEBUG(
+ dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+ );
+ Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
}
+ return Num;
+}
- while (downBlk) {
- if (DEBUGME) {
- errs() << "check down = BB" << downBlk->getNumber();
- }
-
- if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
- if (DEBUGME) {
- errs() << " working\n";
- }
-
- num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
- num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
+ MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
+ int Num = 0;
+ MachineBasicBlock *DownBlk;
- numClonedBlock += num;
- num += serialPatternMatch(*headBlk->succ_begin());
- num += serialPatternMatch(*(++headBlk->succ_begin()));
- num += ifPatternMatch(headBlk);
- assert(num > 0);
+ //trueBlk could be the common post dominator
+ DownBlk = TrueMBB;
+
+ DEBUG(
+ dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
+ << " true = BB" << TrueMBB->getNumber()
+ << ", numSucc=" << TrueMBB->succ_size()
+ << " false = BB" << FalseMBB->getNumber() << "\n";
+ );
+
+ while (DownBlk) {
+ DEBUG(
+ dbgs() << "check down = BB" << DownBlk->getNumber();
+ );
+
+ if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
+ DEBUG(
+ dbgs() << " working\n";
+ );
+
+ Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
+ Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
+
+ numClonedBlock += Num;
+ Num += serialPatternMatch(*HeadMBB->succ_begin());
+ Num += serialPatternMatch(*llvm::next(HeadMBB->succ_begin()));
+ Num += ifPatternMatch(HeadMBB);
+ assert(Num > 0);
break;
}
- if (DEBUGME) {
- errs() << " not working\n";
- }
- downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+ DEBUG(
+ dbgs() << " not working\n";
+ );
+ DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL;
} // walk down the postDomTree
- return num;
-} //handleJumpintoIf
-
-template<class PassT>
-void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT *landBlk,
- bool detail) {
- errs() << "head = BB" << headBlk->getNumber()
- << " size = " << headBlk->size();
- if (detail) {
- errs() << "\n";
- headBlk->print(errs());
- errs() << "\n";
+ return Num;
+}
+
+void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
+ MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
+ MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
+ dbgs() << "head = BB" << HeadMBB->getNumber()
+ << " size = " << HeadMBB->size();
+ if (Detail) {
+ dbgs() << "\n";
+ HeadMBB->print(dbgs());
+ dbgs() << "\n";
}
- if (trueBlk) {
- errs() << ", true = BB" << trueBlk->getNumber() << " size = "
- << trueBlk->size() << " numPred = " << trueBlk->pred_size();
- if (detail) {
- errs() << "\n";
- trueBlk->print(errs());
- errs() << "\n";
+ if (TrueMBB) {
+ dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
+ << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
+ if (Detail) {
+ dbgs() << "\n";
+ TrueMBB->print(dbgs());
+ dbgs() << "\n";
}
}
- if (falseBlk) {
- errs() << ", false = BB" << falseBlk->getNumber() << " size = "
- << falseBlk->size() << " numPred = " << falseBlk->pred_size();
- if (detail) {
- errs() << "\n";
- falseBlk->print(errs());
- errs() << "\n";
+ if (FalseMBB) {
+ dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
+ << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
+ if (Detail) {
+ dbgs() << "\n";
+ FalseMBB->print(dbgs());
+ dbgs() << "\n";
}
}
- if (landBlk) {
- errs() << ", land = BB" << landBlk->getNumber() << " size = "
- << landBlk->size() << " numPred = " << landBlk->pred_size();
- if (detail) {
- errs() << "\n";
- landBlk->print(errs());
- errs() << "\n";
+ if (LandMBB) {
+ dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
+ << LandMBB->size() << " numPred = " << LandMBB->pred_size();
+ if (Detail) {
+ dbgs() << "\n";
+ LandMBB->print(dbgs());
+ dbgs() << "\n";
}
}
- errs() << "\n";
-} //showImproveSimpleJumpintoIf
+ dbgs() << "\n";
+}
-template<class PassT>
-int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT **plandBlk) {
- bool migrateTrue = false;
- bool migrateFalse = false;
+int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
+ MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
+ MachineBasicBlock **LandMBBPtr) {
+ bool MigrateTrue = false;
+ bool MigrateFalse = false;
- BlockT *landBlk = *plandBlk;
+ MachineBasicBlock *LandBlk = *LandMBBPtr;
- assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
- && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+ assert((!TrueMBB || TrueMBB->succ_size() <= 1)
+ && (!FalseMBB || FalseMBB->succ_size() <= 1));
- if (trueBlk == falseBlk) {
+ if (TrueMBB == FalseMBB)
return 0;
- }
- migrateTrue = needMigrateBlock(trueBlk);
- migrateFalse = needMigrateBlock(falseBlk);
+ MigrateTrue = needMigrateBlock(TrueMBB);
+ MigrateFalse = needMigrateBlock(FalseMBB);
- if (!migrateTrue && !migrateFalse) {
+ if (!MigrateTrue && !MigrateFalse)
return 0;
- }
// If we need to migrate either trueBlk and falseBlk, migrate the rest that
// have more than one predecessors. without doing this, its predecessor
// rather than headBlk will have undefined value in initReg.
- if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
- migrateTrue = true;
- }
- if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
- migrateFalse = true;
- }
+ if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
+ MigrateTrue = true;
+ if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
+ MigrateFalse = true;
- if (DEBUGME) {
- errs() << "before improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
- }
+ DEBUG(
+ dbgs() << "before improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
+ );
// org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
//
@@ -1183,205 +1336,192 @@ int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
// add initReg = initVal to headBlk
const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- unsigned initReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
- if (!migrateTrue || !migrateFalse) {
- int initVal = migrateTrue ? 0 : 1;
- CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+ if (!MigrateTrue || !MigrateFalse) {
+ // XXX: We have an opportunity here to optimize the "branch into if" case
+ // here. Branch into if looks like this:
+ // entry
+ // / |
+ // diamond_head branch_from
+ // / \ |
+ // diamond_false diamond_true
+ // \ /
+ // done
+ //
+ // The diamond_head block begins the "if" and the diamond_true block
+ // is the block being "branched into".
+ //
+ // If MigrateTrue is true, then TrueBB is the block being "branched into"
+ // and if MigrateFalse is true, then FalseBB is the block being
+ // "branched into"
+ //
+ // Here is the pseudo code for how I think the optimization should work:
+ // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
+ // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
+ // 3. Move the branch instruction from diamond_head into its own basic
+ // block (new_block).
+ // 4. Add an unconditional branch from diamond_head to new_block
+ // 5. Replace the branch instruction in branch_from with an unconditional
+ // branch to new_block. If branch_from has multiple predecessors, then
+ // we need to replace the True/False block in the branch
+ // instruction instead of replacing it.
+ // 6. Change the condition of the branch instruction in new_block from
+ // COND to (COND || GPR0)
+ //
+ // In order insert these MOV instruction, we will need to use the
+ // RegisterScavenger. Usually liveness stops being tracked during
+ // the late machine optimization passes, however if we implement
+ // bool TargetRegisterInfo::requiresRegisterScavenging(
+ // const MachineFunction &MF)
+ // and have it return true, liveness will be tracked correctly
+ // by generic optimization passes. We will also need to make sure that
+ // all of our target-specific passes that run after regalloc and before
+ // the CFGStructurizer track liveness and we will need to modify this pass
+ // to correctly track liveness.
+ //
+ // After the above changes, the new CFG should look like this:
+ // entry
+ // / |
+ // diamond_head branch_from
+ // \ /
+ // new_block
+ // / |
+ // diamond_false diamond_true
+ // \ /
+ // done
+ //
+ // Without this optimization, we are forced to duplicate the diamond_true
+ // block and we will end up with a CFG like this:
+ //
+ // entry
+ // / |
+ // diamond_head branch_from
+ // / \ |
+ // diamond_false diamond_true diamond_true (duplicate)
+ // \ / |
+ // done --------------------|
+ //
+ // Duplicating diamond_true can be very costly especially if it has a
+ // lot of instructions.
+ return 0;
}
- int numNewBlk = 0;
-
- if (landBlk == NULL) {
- landBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(landBlk); //insert to function
-
- if (trueBlk) {
- trueBlk->addSuccessor(landBlk);
- } else {
- headBlk->addSuccessor(landBlk);
- }
-
- if (falseBlk) {
- falseBlk->addSuccessor(landBlk);
- } else {
- headBlk->addSuccessor(landBlk);
- }
-
- numNewBlk ++;
- }
+ int NumNewBlk = 0;
- bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+ bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
//insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
- typename BlockT::iterator insertPos =
- CFGTraits::getInstrPos
- (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
-
- if (landBlkHasOtherPred) {
- unsigned immReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
- CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
- unsigned cmpResReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
-
- CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
- initReg, immReg);
- CFGTraits::insertCondBranchBefore(landBlk, insertPos,
- AMDGPU::IF_PREDICATE_SET, passRep,
- cmpResReg, DebugLoc());
- }
-
- CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
- passRep, initReg, DebugLoc());
-
- if (migrateTrue) {
- migrateInstruction(trueBlk, landBlk, insertPos);
+ MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
+
+ if (LandBlkHasOtherPred) {
+ llvm_unreachable("Extra register needed to handle CFG");
+ unsigned CmpResReg =
+ HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
+ llvm_unreachable("Extra compare instruction needed to handle CFG");
+ insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
+ CmpResReg, DebugLoc());
+ }
+
+ // XXX: We are running this after RA, so creating virtual registers will
+ // cause an assertion failure in the PostRA scheduling pass.
+ unsigned InitReg =
+ HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
+ insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
+ DebugLoc());
+
+ if (MigrateTrue) {
+ migrateInstruction(TrueMBB, LandBlk, I);
// need to uncondionally insert the assignment to ensure a path from its
// predecessor rather than headBlk has valid value in initReg if
// (initVal != 1).
- CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+ llvm_unreachable("Extra register needed to handle CFG");
}
- CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
+ insertInstrBefore(I, AMDGPU::ELSE);
- if (migrateFalse) {
- migrateInstruction(falseBlk, landBlk, insertPos);
+ if (MigrateFalse) {
+ migrateInstruction(FalseMBB, LandBlk, I);
// need to uncondionally insert the assignment to ensure a path from its
// predecessor rather than headBlk has valid value in initReg if
// (initVal != 0)
- CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+ llvm_unreachable("Extra register needed to handle CFG");
}
- if (landBlkHasOtherPred) {
+ if (LandBlkHasOtherPred) {
// add endif
- CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
+ insertInstrBefore(I, AMDGPU::ENDIF);
// put initReg = 2 to other predecessors of landBlk
- for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
- predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
- ++predIter) {
- BlockT *curBlk = *predIter;
- if (curBlk != trueBlk && curBlk != falseBlk) {
- CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
- }
- } //for
- }
- if (DEBUGME) {
- errs() << "result from improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
- }
-
- // update landBlk
- *plandBlk = landBlk;
-
- return numNewBlk;
-} //improveSimpleJumpintoIf
-
-template<class PassT>
-void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
- LoopT *exitingLoop,
- BlockT *exitBlk,
- LoopT *exitLoop,
- BlockT *landBlk) {
- if (DEBUGME) {
- errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
- << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
- }
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-
- RegiT initReg = INVALIDREGNUM;
- if (exitingLoop != exitLoop) {
- initReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- assert(initReg != INVALIDREGNUM);
- addLoopBreakInitReg(exitLoop, initReg);
- while (exitingLoop != exitLoop && exitingLoop) {
- addLoopBreakOnReg(exitingLoop, initReg);
- exitingLoop = exitingLoop->getParentLoop();
+ for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
+ PE = LandBlk->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *MBB = *PI;
+ if (MBB != TrueMBB && MBB != FalseMBB)
+ llvm_unreachable("Extra register needed to handle CFG");
}
- assert(exitingLoop == exitLoop);
}
+ DEBUG(
+ dbgs() << "result from improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
+ );
- mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+ // update landBlk
+ *LandMBBPtr = LandBlk;
-} //handleLoopbreak
+ return NumNewBlk;
+}
-template<class PassT>
-void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
- LoopT *contingLoop,
- BlockT *contBlk,
- LoopT *contLoop) {
- if (DEBUGME) {
- errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
- << " header = BB" << contBlk->getNumber() << "\n";
+void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
+ MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
+ MachineLoop *ContLoop) {
+ DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber()
+ << " header = BB" << ContMBB->getNumber() << "\n";
+ dbgs() << "Trying to continue loop-depth = "
+ << getLoopDepth(ContLoop)
+ << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";);
+ settleLoopcontBlock(ContingMBB, ContMBB);
+}
- errs() << "Trying to continue loop-depth = "
- << getLoopDepth(contLoop)
- << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
- }
+void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
+ MachineBasicBlock *SrcMBB) {
+ DEBUG(
+ dbgs() << "serialPattern BB" << DstMBB->getNumber()
+ << " <= BB" << SrcMBB->getNumber() << "\n";
+ );
+ DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
- RegiT initReg = INVALIDREGNUM;
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- if (contingLoop != contLoop) {
- initReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- assert(initReg != INVALIDREGNUM);
- addLoopContInitReg(contLoop, initReg);
- while (contingLoop && contingLoop->getParentLoop() != contLoop) {
- addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
- contingLoop = contingLoop->getParentLoop();
- }
- assert(contingLoop && contingLoop->getParentLoop() == contLoop);
- addLoopContOnReg(contingLoop, initReg);
- }
+ DstMBB->removeSuccessor(SrcMBB);
+ cloneSuccessorList(DstMBB, SrcMBB);
- settleLoopcontBlock(contingBlk, contBlk, initReg);
-} //handleLoopcontBlock
+ removeSuccessor(SrcMBB);
+ MLI->removeBlock(SrcMBB);
+ retireBlock(SrcMBB);
+}
-template<class PassT>
-void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
- if (DEBUGME) {
- errs() << "serialPattern BB" << dstBlk->getNumber()
- << " <= BB" << srcBlk->getNumber() << "\n";
- }
- dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
-
- dstBlk->removeSuccessor(srcBlk);
- CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
-
- removeSuccessor(srcBlk);
- retireBlock(dstBlk, srcBlk);
-} //mergeSerialBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
- BlockT *curBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT *landBlk) {
- if (DEBUGME) {
- errs() << "ifPattern BB" << curBlk->getNumber();
- errs() << "{ ";
- if (trueBlk) {
- errs() << "BB" << trueBlk->getNumber();
- }
- errs() << " } else ";
- errs() << "{ ";
- if (falseBlk) {
- errs() << "BB" << falseBlk->getNumber();
- }
- errs() << " }\n ";
- errs() << "landBlock: ";
- if (landBlk == NULL) {
- errs() << "NULL";
+void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
+ MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
+ MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
+ assert (TrueMBB);
+ DEBUG(
+ dbgs() << "ifPattern BB" << MBB->getNumber();
+ dbgs() << "{ ";
+ if (TrueMBB) {
+ dbgs() << "BB" << TrueMBB->getNumber();
+ }
+ dbgs() << " } else ";
+ dbgs() << "{ ";
+ if (FalseMBB) {
+ dbgs() << "BB" << FalseMBB->getNumber();
+ }
+ dbgs() << " }\n ";
+ dbgs() << "landBlock: ";
+ if (!LandMBB) {
+ dbgs() << "NULL";
} else {
- errs() << "BB" << landBlk->getNumber();
+ dbgs() << "BB" << LandMBB->getNumber();
}
- errs() << "\n";
- }
+ dbgs() << "\n";
+ );
- int oldOpcode = branchInstr->getOpcode();
- DebugLoc branchDL = branchInstr->getDebugLoc();
+ int OldOpcode = BranchMI->getOpcode();
+ DebugLoc BranchDL = BranchMI->getDebugLoc();
// transform to
// if cond
@@ -1391,1661 +1531,374 @@ void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
// endif
// landBlk
- typename BlockT::iterator branchInstrPos =
- CFGTraits::getInstrPos(curBlk, branchInstr);
- CFGTraits::insertCondBranchBefore(branchInstrPos,
- CFGTraits::getBranchNzeroOpcode(oldOpcode),
- passRep,
- branchDL);
-
- if (trueBlk) {
- curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
- curBlk->removeSuccessor(trueBlk);
- if (landBlk && trueBlk->succ_size()!=0) {
- trueBlk->removeSuccessor(landBlk);
- }
- retireBlock(curBlk, trueBlk);
- }
- CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
-
- if (falseBlk) {
- curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
- falseBlk->end());
- curBlk->removeSuccessor(falseBlk);
- if (landBlk && falseBlk->succ_size() != 0) {
- falseBlk->removeSuccessor(landBlk);
- }
- retireBlock(curBlk, falseBlk);
- }
- CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
-
- branchInstr->eraseFromParent();
+ MachineBasicBlock::iterator I = BranchMI;
+ insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
+ BranchDL);
- if (landBlk && trueBlk && falseBlk) {
- curBlk->addSuccessor(landBlk);
+ if (TrueMBB) {
+ MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
+ MBB->removeSuccessor(TrueMBB);
+ if (LandMBB && TrueMBB->succ_size()!=0)
+ TrueMBB->removeSuccessor(LandMBB);
+ retireBlock(TrueMBB);
+ MLI->removeBlock(TrueMBB);
}
-} //mergeIfthenelseBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
- LoopLandInfo *loopLand) {
- BlockT *landBlk = loopLand->landBlk;
-
- if (DEBUGME) {
- errs() << "loopPattern header = BB" << dstBlk->getNumber()
- << " land = BB" << landBlk->getNumber() << "\n";
+ if (FalseMBB) {
+ insertInstrBefore(I, AMDGPU::ELSE);
+ MBB->splice(I, FalseMBB, FalseMBB->begin(),
+ FalseMBB->end());
+ MBB->removeSuccessor(FalseMBB);
+ if (LandMBB && FalseMBB->succ_size() != 0)
+ FalseMBB->removeSuccessor(LandMBB);
+ retireBlock(FalseMBB);
+ MLI->removeBlock(FalseMBB);
}
+ insertInstrBefore(I, AMDGPU::ENDIF);
- // Loop contInitRegs are init at the beginning of the loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->contInitRegs.begin(),
- iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
- }
+ BranchMI->eraseFromParent();
- /* we last inserterd the DebugLoc in the
- * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
- * search for the DebugLoc in the that statement.
- * if not found, we have to insert the empty/default DebugLoc */
- InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
- DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
-
- CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
- // Loop breakInitRegs are init before entering the loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->breakInitRegs.begin(),
- iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
- }
- // Loop endbranchInitRegs are init before entering the loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->endbranchInitRegs.begin(),
- iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
- }
+ if (LandMBB && TrueMBB && FalseMBB)
+ MBB->addSuccessor(LandMBB);
- /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
- * search for the DebugLoc in the continue statement.
- * if not found, we have to insert the empty/default DebugLoc */
- InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
- DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
-
- CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
- // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
- // loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->breakOnRegs.begin(),
- iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
- *iter);
- }
-
- // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
- // loop.
- for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
- iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
- passRep, *iter);
- }
-
- dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
-
- for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
- iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
- dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
- }
-
- removeSuccessor(landBlk);
- retireBlock(dstBlk, landBlk);
-} //mergeLooplandBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
- while (I--) {
- if (I->getOpcode() == AMDGPU::PRED_X) {
- switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
- case OPCODE_IS_ZERO_INT:
- static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
- return;
- case OPCODE_IS_NOT_ZERO_INT:
- static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
- return;
- case OPCODE_IS_ZERO:
- static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
- return;
- case OPCODE_IS_NOT_ZERO:
- static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
- return;
- default:
- assert(0 && "PRED_X Opcode invalid!");
- }
- }
- }
}
-template<class PassT>
-void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
- BlockT *exitBlk,
- BlockT *exitLandBlk,
- RegiT setReg) {
- if (DEBUGME) {
- errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
- << " exit = BB" << exitBlk->getNumber()
- << " land = BB" << exitLandBlk->getNumber() << "\n";
- }
+void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
+ MachineBasicBlock *LandMBB) {
+ DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
+ << " land = BB" << LandMBB->getNumber() << "\n";);
- InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
- assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
-
- DebugLoc DL = branchInstr->getDebugLoc();
-
- BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
-
- // transform exitingBlk to
- // if ( ) {
- // exitBlk (if exitBlk != exitLandBlk)
- // setReg = 1
- // break
- // }endif
- // successor = {orgSuccessor(exitingBlk) - exitBlk}
-
- typename BlockT::iterator branchInstrPos =
- CFGTraits::getInstrPos(exitingBlk, branchInstr);
-
- if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
- //break_logical
+ insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
+ insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
+ DstBlk->addSuccessor(LandMBB);
+ DstBlk->removeSuccessor(DstBlk);
+}
- if (trueBranch != exitBlk) {
- reversePredicateSetter(branchInstrPos);
- }
- CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
- } else {
- if (trueBranch != exitBlk) {
- reversePredicateSetter(branchInstr);
- }
- CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
- if (exitBlk != exitLandBlk) {
- //splice is insert-before ...
- exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
- exitBlk->end());
- }
- if (setReg != INVALIDREGNUM) {
- CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
- }
- CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
- } //if_logical
+void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
+ MachineBasicBlock *LandMBB) {
+ DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
+ << " land = BB" << LandMBB->getNumber() << "\n";);
+ MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
+ assert(BranchMI && isCondBranch(BranchMI));
+ DebugLoc DL = BranchMI->getDebugLoc();
+ MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
+ MachineBasicBlock::iterator I = BranchMI;
+ if (TrueBranch != LandMBB)
+ reversePredicateSetter(I);
+ insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
+ insertInstrBefore(I, AMDGPU::BREAK);
+ insertInstrBefore(I, AMDGPU::ENDIF);
//now branchInst can be erase safely
- branchInstr->eraseFromParent();
-
+ BranchMI->eraseFromParent();
//now take care of successors, retire blocks
- exitingBlk->removeSuccessor(exitBlk);
- if (exitBlk != exitLandBlk) {
- //splice is insert-before ...
- exitBlk->removeSuccessor(exitLandBlk);
- retireBlock(exitingBlk, exitBlk);
- }
-
-} //mergeLoopbreakBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
- BlockT *contBlk,
- RegiT setReg) {
- if (DEBUGME) {
- errs() << "settleLoopcontBlock conting = BB"
- << contingBlk->getNumber()
- << ", cont = BB" << contBlk->getNumber() << "\n";
- }
-
- InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
- if (branchInstr) {
- assert(CFGTraits::isCondBranch(branchInstr));
- typename BlockT::iterator branchInstrPos =
- CFGTraits::getInstrPos(contingBlk, branchInstr);
- BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
- int oldOpcode = branchInstr->getOpcode();
- DebugLoc DL = branchInstr->getDebugLoc();
-
- // transform contingBlk to
- // if () {
- // move instr after branchInstr
- // continue
- // or
- // setReg = 1
- // break
- // }endif
- // successor = {orgSuccessor(contingBlk) - loopHeader}
-
- bool useContinueLogical =
- (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
-
- if (useContinueLogical == false) {
- int branchOpcode =
- trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
- : CFGTraits::getBranchZeroOpcode(oldOpcode);
-
- CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
-
- if (setReg != INVALIDREGNUM) {
- CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
- } else {
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
- }
+ ExitingMBB->removeSuccessor(LandMBB);
+}
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
+void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
+ MachineBasicBlock *ContMBB) {
+ DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
+ << ContingMBB->getNumber()
+ << ", cont = BB" << ContMBB->getNumber() << "\n";);
+
+ MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
+ if (MI) {
+ assert(isCondBranch(MI));
+ MachineBasicBlock::iterator I = MI;
+ MachineBasicBlock *TrueBranch = getTrueBranch(MI);
+ int OldOpcode = MI->getOpcode();
+ DebugLoc DL = MI->getDebugLoc();
+
+ bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
+
+ if (UseContinueLogical == false) {
+ int BranchOpcode =
+ TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
+ getBranchZeroOpcode(OldOpcode);
+ insertCondBranchBefore(I, BranchOpcode, DL);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
+ insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
} else {
- int branchOpcode =
- trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
- : CFGTraits::getContinueZeroOpcode(oldOpcode);
-
- CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+ int BranchOpcode =
+ TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
+ getContinueZeroOpcode(OldOpcode);
+ insertCondBranchBefore(I, BranchOpcode, DL);
}
- branchInstr->eraseFromParent();
+ MI->eraseFromParent();
} else {
// if we've arrived here then we've already erased the branch instruction
- // travel back up the basic block to see the last reference of our debug location
- // we've just inserted that reference here so it should be representative
- if (setReg != INVALIDREGNUM) {
- CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
- } else {
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
- }
- } //else
-
-} //settleLoopcontBlock
-
-// BBs in exitBlkSet are determined as in break-path for loopRep,
-// before we can put code for BBs as inside loop-body for loopRep
-// check whether those BBs are determined as cont-BB for parentLoopRep
-// earlier.
-// If so, generate a new BB newBlk
-// (1) set newBlk common successor of BBs in exitBlkSet
-// (2) change the continue-instr in BBs in exitBlkSet to break-instr
-// (3) generate continue-instr in newBlk
-//
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
- LoopT *loopRep,
- std::set<BlockT *> &exitBlkSet,
- BlockT *exitLandBlk) {
- std::set<BlockT *> endBlkSet;
-
-
-
- for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
- iterEnd = exitBlkSet.end();
- iter != iterEnd; ++iter) {
- BlockT *exitBlk = *iter;
- BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
-
- if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
- return NULL;
-
- endBlkSet.insert(endBlk);
- }
-
- BlockT *newBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(newBlk); //insert to function
- CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
- SHOWNEWBLK(newBlk, "New continue block: ");
-
- for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
- iterEnd = endBlkSet.end();
- iter != iterEnd; ++iter) {
- BlockT *endBlk = *iter;
- InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
- if (contInstr) {
- contInstr->eraseFromParent();
- }
- endBlk->addSuccessor(newBlk);
- if (DEBUGME) {
- errs() << "Add new continue Block to BB"
- << endBlk->getNumber() << " successors\n";
- }
- }
-
- return newBlk;
-} //relocateLoopcontBlock
-
-
-// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
-// LoopLandBlock. This BB branch on the loop endBranchInit register to the
-// pathes corresponding to the loop exiting branches.
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
- BlockTSmallerVector &exitingBlks,
- BlockTSmallerVector &exitBlks) {
- const AMDGPUInstrInfo *tii =
- static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-
- RegiT endBranchReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- assert(endBranchReg >= 0);
-
- // reg = 0 before entering the loop
- addLoopEndbranchInitReg(loopRep, endBranchReg);
-
- uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
- assert(numBlks >=2 && numBlks == exitBlks.size());
-
- BlockT *preExitingBlk = exitingBlks[0];
- BlockT *preExitBlk = exitBlks[0];
- BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(preBranchBlk); //insert to function
- SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
-
- BlockT *newLandBlk = preBranchBlk;
-
- CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
- newLandBlk);
- preExitingBlk->removeSuccessor(preExitBlk);
- preExitingBlk->addSuccessor(newLandBlk);
-
- //it is redundant to add reg = 0 to exitingBlks[0]
-
- // For 1..n th exiting path (the last iteration handles two pathes) create the
- // branch to the previous path and the current path.
- for (uint32_t i = 1; i < numBlks; ++i) {
- BlockT *curExitingBlk = exitingBlks[i];
- BlockT *curExitBlk = exitBlks[i];
- BlockT *curBranchBlk;
-
- if (i == numBlks - 1) {
- curBranchBlk = curExitBlk;
- } else {
- curBranchBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(curBranchBlk); //insert to function
- SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
- }
-
- // Add reg = i to exitingBlks[i].
- CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
- endBranchReg, i);
-
- // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
- // (exitingBlks[i], newLandBlk).
- CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
- newLandBlk);
- curExitingBlk->removeSuccessor(curExitBlk);
- curExitingBlk->addSuccessor(newLandBlk);
-
- // add to preBranchBlk the branch instruction:
- // if (endBranchReg == preVal)
- // preExitBlk
- // else
- // curBranchBlk
- //
- // preValReg = i - 1
-
- DebugLoc DL;
- RegiT preValReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
-
- preBranchBlk->insert(preBranchBlk->begin(),
- tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
- i - 1));
-
- // condResReg = (endBranchReg == preValReg)
- RegiT condResReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
- .addReg(endBranchReg).addReg(preValReg);
-
- BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
- .addMBB(preExitBlk).addReg(condResReg);
-
- preBranchBlk->addSuccessor(preExitBlk);
- preBranchBlk->addSuccessor(curBranchBlk);
-
- // Update preExitingBlk, preExitBlk, preBranchBlk.
- preExitingBlk = curExitingBlk;
- preExitBlk = curExitBlk;
- preBranchBlk = curBranchBlk;
-
- } //end for 1 .. n blocks
-
- return newLandBlk;
-} //addLoopEndbranchBlock
-
-template<class PassT>
-typename CFGStructurizer<PassT>::PathToKind
-CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
- bool allowSideEntry) {
- assert(dstBlk);
-
- if (srcBlk == dstBlk) {
- return SinglePath_InPath;
+ // travel back up the basic block to see the last reference of our debug
+ // location we've just inserted that reference here so it should be
+ // representative insertEnd to ensure phi-moves, if exist, go before the
+ // continue-instr.
+ insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
+ getLastDebugLocInBB(ContingMBB));
}
+}
- while (srcBlk && srcBlk->succ_size() == 1) {
- srcBlk = *srcBlk->succ_begin();
- if (srcBlk == dstBlk) {
- return SinglePath_InPath;
- }
-
- if (!allowSideEntry && srcBlk->pred_size() > 1) {
- return Not_SinglePath;
- }
- }
-
- if (srcBlk && srcBlk->succ_size()==0) {
- return SinglePath_NotInPath;
- }
-
- return Not_SinglePath;
-} //singlePathTo
-
-// If there is a single path from srcBlk to dstBlk, return the last block before
-// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
-// last block in the path Otherwise, return NULL
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
- bool allowSideEntry) {
- assert(dstBlk);
-
- if (srcBlk == dstBlk) {
- return srcBlk;
- }
-
- if (srcBlk->succ_size() == 0) {
- return srcBlk;
- }
-
- while (srcBlk && srcBlk->succ_size() == 1) {
- BlockT *preBlk = srcBlk;
-
- srcBlk = *srcBlk->succ_begin();
- if (srcBlk == NULL) {
- return preBlk;
- }
-
- if (!allowSideEntry && srcBlk->pred_size() > 1) {
- return NULL;
- }
- }
-
- if (srcBlk && srcBlk->succ_size()==0) {
- return srcBlk;
- }
-
- return NULL;
-
-} //singlePathEnd
-
-template<class PassT>
-int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
- BlockT *dstBlk) {
- int cloned = 0;
- assert(preBlk->isSuccessor(srcBlk));
- while (srcBlk && srcBlk != dstBlk) {
- assert(srcBlk->succ_size() == 1);
- if (srcBlk->pred_size() > 1) {
- srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
- ++cloned;
+int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
+ MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
+ int Cloned = 0;
+ assert(PreMBB->isSuccessor(SrcMBB));
+ while (SrcMBB && SrcMBB != DstMBB) {
+ assert(SrcMBB->succ_size() == 1);
+ if (SrcMBB->pred_size() > 1) {
+ SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
+ ++Cloned;
}
- preBlk = srcBlk;
- srcBlk = *srcBlk->succ_begin();
+ PreMBB = SrcMBB;
+ SrcMBB = *SrcMBB->succ_begin();
}
- return cloned;
-} //cloneOnSideEntryTo
+ return Cloned;
+}
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
- BlockT *predBlk) {
- assert(predBlk->isSuccessor(curBlk) &&
+MachineBasicBlock *
+AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
+ MachineBasicBlock *PredMBB) {
+ assert(PredMBB->isSuccessor(MBB) &&
"succBlk is not a prececessor of curBlk");
- BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
- CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+ MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
+ replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
//srcBlk, oldBlk, newBlk
- predBlk->removeSuccessor(curBlk);
- predBlk->addSuccessor(cloneBlk);
+ PredMBB->removeSuccessor(MBB);
+ PredMBB->addSuccessor(CloneMBB);
// add all successor to cloneBlk
- CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+ cloneSuccessorList(CloneMBB, MBB);
- numClonedInstr += curBlk->size();
+ numClonedInstr += MBB->size();
- if (DEBUGME) {
- errs() << "Cloned block: " << "BB"
- << curBlk->getNumber() << "size " << curBlk->size() << "\n";
- }
+ DEBUG(
+ dbgs() << "Cloned block: " << "BB"
+ << MBB->getNumber() << "size " << MBB->size() << "\n";
+ );
- SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+ SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
- return cloneBlk;
-} //cloneBlockForPredecessor
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
- BlockT *exitingBlk) {
- BlockT *exitBlk = NULL;
-
- for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
- iterSuccEnd = exitingBlk->succ_end();
- iterSucc != iterSuccEnd; ++iterSucc) {
- BlockT *curBlk = *iterSucc;
- if (!loopRep->contains(curBlk)) {
- assert(exitBlk == NULL);
- exitBlk = curBlk;
- }
- }
-
- assert(exitBlk != NULL);
-
- return exitBlk;
-} //exitingBlock2ExitBlock
+ return CloneMBB;
+}
-template<class PassT>
-void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
- BlockT *dstBlk,
- InstrIterator insertPos) {
- InstrIterator spliceEnd;
+void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
+ MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator SpliceEnd;
//look for the input branchinstr, not the AMDGPU branchinstr
- InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
- if (branchInstr == NULL) {
- if (DEBUGME) {
- errs() << "migrateInstruction don't see branch instr\n" ;
- }
- spliceEnd = srcBlk->end();
+ MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
+ if (!BranchMI) {
+ DEBUG(
+ dbgs() << "migrateInstruction don't see branch instr\n" ;
+ );
+ SpliceEnd = SrcMBB->end();
} else {
- if (DEBUGME) {
- errs() << "migrateInstruction see branch instr\n" ;
- branchInstr->dump();
- }
- spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
- }
- if (DEBUGME) {
- errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
- << "srcSize = " << srcBlk->size() << "\n";
+ DEBUG(
+ dbgs() << "migrateInstruction see branch instr\n" ;
+ BranchMI->dump();
+ );
+ SpliceEnd = BranchMI;
}
+ DEBUG(
+ dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
+ << "srcSize = " << SrcMBB->size() << "\n";
+ );
//splice insert before insertPos
- dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+ DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
- if (DEBUGME) {
- errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
- << "srcSize = " << srcBlk->size() << "\n";
- }
-} //migrateInstruction
+ DEBUG(
+ dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
+ << "srcSize = " << SrcMBB->size() << "\n";
+ );
+}
-// normalizeInfiniteLoopExit change
-// B1:
-// uncond_br LoopHeader
-//
-// to
-// B1:
-// cond_br 1 LoopHeader dummyExit
-// and return the newly added dummy exit block
-//
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
- BlockT *loopHeader;
- BlockT *loopLatch;
- loopHeader = LoopRep->getHeader();
- loopLatch = LoopRep->getLoopLatch();
- BlockT *dummyExitBlk = NULL;
+MachineBasicBlock *
+AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
+ MachineBasicBlock *LoopHeader = LoopRep->getHeader();
+ MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- if (loopHeader!=NULL && loopLatch!=NULL) {
- InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
- if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
- dummyExitBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(dummyExitBlk); //insert to function
- SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
-
- if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
-
- typename BlockT::iterator insertPos =
- CFGTraits::getInstrPos(loopLatch, branchInstr);
- unsigned immReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
- CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
- InstrT *newInstr =
- CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
- MachineInstrBuilder MIB(*funcRep, newInstr);
- MIB.addMBB(loopHeader);
- MIB.addReg(immReg, false);
-
- SHOWNEWINSTR(newInstr);
-
- branchInstr->eraseFromParent();
- loopLatch->addSuccessor(dummyExitBlk);
- }
- }
- return dummyExitBlk;
-} //normalizeInfiniteLoopExit
+ if (!LoopHeader || !LoopLatch)
+ return NULL;
+ MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
+ // Is LoopRep an infinite loop ?
+ if (!BranchMI || !isUncondBranch(BranchMI))
+ return NULL;
-template<class PassT>
-void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
- InstrT *branchInstr;
+ MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
+ FuncRep->push_back(DummyExitBlk); //insert to function
+ SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+ DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
+ MachineBasicBlock::iterator I = BranchMI;
+ unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC);
+ llvm_unreachable("Extra register needed to handle CFG");
+ MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
+ MachineInstrBuilder MIB(*FuncRep, NewMI);
+ MIB.addMBB(LoopHeader);
+ MIB.addReg(ImmReg, false);
+ SHOWNEWINSTR(NewMI);
+ BranchMI->eraseFromParent();
+ LoopLatch->addSuccessor(DummyExitBlk);
+
+ return DummyExitBlk;
+}
+
+void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
+ MachineInstr *BranchMI;
// I saw two unconditional branch in one basic block in example
// test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
- while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
- && CFGTraits::isUncondBranch(branchInstr)) {
- if (DEBUGME) {
- errs() << "Removing unconditional branch instruction" ;
- branchInstr->dump();
- }
- branchInstr->eraseFromParent();
- }
-} //removeUnconditionalBranch
-
-template<class PassT>
-void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
- if (srcBlk->succ_size() == 2) {
- BlockT *blk1 = *srcBlk->succ_begin();
- BlockT *blk2 = *(++srcBlk->succ_begin());
-
- if (blk1 == blk2) {
- InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
- assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
- if (DEBUGME) {
- errs() << "Removing unneeded conditional branch instruction" ;
- branchInstr->dump();
- }
- branchInstr->eraseFromParent();
- SHOWNEWBLK(blk1, "Removing redundant successor");
- srcBlk->removeSuccessor(blk1);
- }
- }
-} //removeRedundantConditionalBranch
-
-template<class PassT>
-void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
- DEFAULT_VEC_SLOTS> &retBlks) {
- BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(dummyExitBlk); //insert to function
- CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
-
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
- retBlks.begin(),
- iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
- BlockT *curBlk = *iter;
- InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
- if (curInstr) {
- curInstr->eraseFromParent();
- }
- curBlk->addSuccessor(dummyExitBlk);
- if (DEBUGME) {
- errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
- << " successors\n";
- }
- } //for
-
- SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
-} //addDummyExitBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
- while (srcBlk->succ_size()) {
- srcBlk->removeSuccessor(*srcBlk->succ_begin());
+ while ((BranchMI = getLoopendBlockBranchInstr(MBB))
+ && isUncondBranch(BranchMI)) {
+ DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
+ BranchMI->eraseFromParent();
}
}
-template<class PassT>
-void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
- BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
+ MachineBasicBlock *MBB) {
+ if (MBB->succ_size() != 2)
+ return;
+ MachineBasicBlock *MBB1 = *MBB->succ_begin();
+ MachineBasicBlock *MBB2 = *llvm::next(MBB->succ_begin());
+ if (MBB1 != MBB2)
+ return;
+
+ MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
+ assert(BranchMI && isCondBranch(BranchMI));
+ DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
+ BranchMI->eraseFromParent();
+ SHOWNEWBLK(MBB1, "Removing redundant successor");
+ MBB->removeSuccessor(MBB1);
+}
- if (srcBlkInfo == NULL) {
- srcBlkInfo = new BlockInfo();
+void AMDGPUCFGStructurizer::addDummyExitBlock(
+ SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
+ MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
+ FuncRep->push_back(DummyExitBlk); //insert to function
+ insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
+
+ for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
+ E = RetMBB.end(); It != E; ++It) {
+ MachineBasicBlock *MBB = *It;
+ MachineInstr *MI = getReturnInstr(MBB);
+ if (MI)
+ MI->eraseFromParent();
+ MBB->addSuccessor(DummyExitBlk);
+ DEBUG(
+ dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
+ << " successors\n";
+ );
}
+ SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
+}
- srcBlkInfo->sccNum = sccNum;
+void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
+ while (MBB->succ_size())
+ MBB->removeSuccessor(*MBB->succ_begin());
}
-template<class PassT>
-int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
- BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
- return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
+ int SccNum) {
+ BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
+ if (!srcBlkInfo)
+ srcBlkInfo = new BlockInformation();
+ srcBlkInfo->SccNum = SccNum;
}
-template<class PassT>
-void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
- if (DEBUGME) {
- errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
- }
+void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
+ DEBUG(
+ dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
+ );
- BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+ BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
- if (srcBlkInfo == NULL) {
- srcBlkInfo = new BlockInfo();
- }
+ if (!SrcBlkInfo)
+ SrcBlkInfo = new BlockInformation();
- srcBlkInfo->isRetired = true;
- assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+ SrcBlkInfo->IsRetired = true;
+ assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
&& "can't retire block yet");
}
-template<class PassT>
-bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
- BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
- return (srcBlkInfo && srcBlkInfo->isRetired);
-}
-
-template<class PassT>
-bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- while (loopRep && loopRep->getHeader() == curBlk) {
- LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
-
- if(loopLand == NULL)
- return true;
-
- BlockT *landBlk = loopLand->landBlk;
- assert(landBlk);
- if (!isRetiredBlock(landBlk)) {
- return true;
- }
-
- loopRep = loopRep->getParentLoop();
- }
-
- return false;
-} //isActiveLoophead
-
-template<class PassT>
-bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
- const unsigned blockSizeThreshold = 30;
- const unsigned cloneInstrThreshold = 100;
-
- bool multiplePreds = blk && (blk->pred_size() > 1);
-
- if(!multiplePreds)
- return false;
-
- unsigned blkSize = blk->size();
- return ((blkSize > blockSizeThreshold)
- && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
-} //needMigrateBlock
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
- BlockTSmallerVector &exitBlks,
- std::set<BlockT *> &exitBlkSet) {
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
-
- for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
- predIterEnd = landBlk->pred_end();
- predIter != predIterEnd; ++predIter) {
- BlockT *curBlk = *predIter;
- if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
- inpathBlks.push_back(curBlk);
- }
- } //for
-
- //if landBlk has predecessors that are not in the given loop,
- //create a new block
- BlockT *newLandBlk = landBlk;
- if (inpathBlks.size() != landBlk->pred_size()) {
- newLandBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(newLandBlk); //insert to function
- newLandBlk->addSuccessor(landBlk);
- for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
- inpathBlks.begin(),
- iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
- BlockT *curBlk = *iter;
- CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
- //srcBlk, oldBlk, newBlk
- curBlk->removeSuccessor(landBlk);
- curBlk->addSuccessor(newLandBlk);
- }
- for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
- if (exitBlks[i] == landBlk) {
- exitBlks[i] = newLandBlk;
- }
- }
- SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
- }
-
- setLoopLandBlock(loopRep, newLandBlk);
-
- return newLandBlk;
-} // recordLoopbreakLand
-
-template<class PassT>
-void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- assert(theEntry->landBlk == NULL);
-
- if (blk == NULL) {
- blk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(blk); //insert to function
- SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
- }
-
- theEntry->landBlk = blk;
-
- if (DEBUGME) {
- errs() << "setLoopLandBlock loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " landing-block = BB" << blk->getNumber() << "\n";
- }
-} // setLoopLandBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
-
- theEntry->breakOnRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopBreakOnReg loop-header = BB"
+void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *&TheEntry = LLInfoMap[loopRep];
+ if (!MBB) {
+ MBB = FuncRep->CreateMachineBasicBlock();
+ FuncRep->push_back(MBB); //insert to function
+ SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: ");
+ }
+ TheEntry = MBB;
+ DEBUG(
+ dbgs() << "setLoopLandBlock loop-header = BB"
<< loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopBreakOnReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->contOnRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopContOnReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopContOnReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->breakInitRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopBreakInitReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopBreakInitReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->contInitRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopContInitReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopContInitReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
- RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->endbranchInitRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopEndbranchInitReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopEndbranchInitReg
-
-template<class PassT>
-typename CFGStructurizer<PassT>::LoopLandInfo *
-CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- return theEntry;
-} // getLoopLandInfo
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- return theEntry ? theEntry->landBlk : NULL;
-} // getLoopLandBlock
-
-
-template<class PassT>
-bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- if (loopRep == NULL)
- return false;
-
- BlockT *loopHeader = loopRep->getHeader();
-
- return curBlk->isSuccessor(loopHeader);
-
-} //hasBackEdge
-
-template<class PassT>
-unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
- return loopRep ? loopRep->getLoopDepth() : 0;
-} //getLoopDepth
-
-template<class PassT>
-int CFGStructurizer<PassT>::countActiveBlock
-(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
- typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
- int count = 0;
- while (iterStart != iterEnd) {
- if (!isRetiredBlock(*iterStart)) {
- ++count;
- }
- ++iterStart;
- }
-
- return count;
-} //countActiveBlock
+ << " landing-block = BB" << MBB->getNumber() << "\n";
+ );
+}
-// This is work around solution for findNearestCommonDominator not avaiable to
-// post dom a proper fix should go to Dominators.h.
+MachineBasicBlock *
+AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2) {
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT*
-CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
+ if (PDT->dominates(MBB1, MBB2))
+ return MBB1;
+ if (PDT->dominates(MBB2, MBB1))
+ return MBB2;
- if (postDomTree->dominates(blk1, blk2)) {
- return blk1;
- }
- if (postDomTree->dominates(blk2, blk1)) {
- return blk2;
- }
-
- DomTreeNodeT *node1 = postDomTree->getNode(blk1);
- DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+ MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
+ MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
// Handle newly cloned node.
- if (node1 == NULL && blk1->succ_size() == 1) {
- return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
- }
- if (node2 == NULL && blk2->succ_size() == 1) {
- return findNearestCommonPostDom(blk1, *blk2->succ_begin());
- }
+ if (!Node1 && MBB1->succ_size() == 1)
+ return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
+ if (!Node2 && MBB2->succ_size() == 1)
+ return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
- if (node1 == NULL || node2 == NULL) {
+ if (!Node1 || !Node2)
return NULL;
- }
- node1 = node1->getIDom();
- while (node1) {
- if (postDomTree->dominates(node1, node2)) {
- return node1->getBlock();
- }
- node1 = node1->getIDom();
+ Node1 = Node1->getIDom();
+ while (Node1) {
+ if (PDT->dominates(Node1, Node2))
+ return Node1->getBlock();
+ Node1 = Node1->getIDom();
}
return NULL;
}
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::findNearestCommonPostDom
-(typename std::set<BlockT *> &blks) {
- BlockT *commonDom;
- typename std::set<BlockT *>::const_iterator iter = blks.begin();
- typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
- for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
- BlockT *curBlk = *iter;
- if (curBlk != commonDom) {
- commonDom = findNearestCommonPostDom(curBlk, commonDom);
- }
- }
-
- if (DEBUGME) {
- errs() << "Common post dominator for exit blocks is ";
- if (commonDom) {
- errs() << "BB" << commonDom->getNumber() << "\n";
- } else {
- errs() << "NULL\n";
- }
- }
-
- return commonDom;
-} //findNearestCommonPostDom
-
-} //end namespace llvm
-
-//todo: move-end
-
-
-//===----------------------------------------------------------------------===//
-//
-// CFGStructurizer for AMDGPU
-//
-//===----------------------------------------------------------------------===//
-
-
-using namespace llvmCFGStruct;
-
-namespace llvm {
-class AMDGPUCFGStructurizer : public MachineFunctionPass {
-public:
- typedef MachineInstr InstructionType;
- typedef MachineFunction FunctionType;
- typedef MachineBasicBlock BlockType;
- typedef MachineLoopInfo LoopinfoType;
- typedef MachineDominatorTree DominatortreeType;
- typedef MachinePostDominatorTree PostDominatortreeType;
- typedef MachineDomTreeNode DomTreeNodeType;
- typedef MachineLoop LoopType;
-
-protected:
- TargetMachine &TM;
- const TargetInstrInfo *TII;
- const AMDGPURegisterInfo *TRI;
-
-public:
- AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
- const TargetInstrInfo *getTargetInstrInfo() const;
-
-private:
-
-};
-
-} //end of namespace llvm
-AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
-: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
- TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) {
-}
-
-const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
- return TII;
+MachineBasicBlock *
+AMDGPUCFGStructurizer::findNearestCommonPostDom(
+ std::set<MachineBasicBlock *> &MBBs) {
+ MachineBasicBlock *CommonDom;
+ std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
+ std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
+ for (CommonDom = *It; It != E && CommonDom; ++It) {
+ MachineBasicBlock *MBB = *It;
+ if (MBB != CommonDom)
+ CommonDom = findNearestCommonPostDom(MBB, CommonDom);
+ }
+
+ DEBUG(
+ dbgs() << "Common post dominator for exit blocks is ";
+ if (CommonDom)
+ dbgs() << "BB" << CommonDom->getNumber() << "\n";
+ else
+ dbgs() << "NULL\n";
+ );
+
+ return CommonDom;
}
-//===----------------------------------------------------------------------===//
-//
-// CFGPrepare
-//
-//===----------------------------------------------------------------------===//
-
-
-using namespace llvmCFGStruct;
-
-namespace llvm {
-class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
-public:
- static char ID;
-public:
- AMDGPUCFGPrepare(TargetMachine &tm);
-
- virtual const char *getPassName() const;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
- bool runOnMachineFunction(MachineFunction &F);
+char AMDGPUCFGStructurizer::ID = 0;
-private:
-
-};
+} // end anonymous namespace
-char AMDGPUCFGPrepare::ID = 0;
-} //end of namespace llvm
-
-AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
- : AMDGPUCFGStructurizer(ID, tm ) {
-}
-const char *AMDGPUCFGPrepare::getPassName() const {
- return "AMD IL Control Flow Graph Preparation Pass";
-}
-
-void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addRequired<MachineFunctionAnalysis>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
-}
-
-//===----------------------------------------------------------------------===//
-//
-// CFGPerform
-//
-//===----------------------------------------------------------------------===//
-
-
-using namespace llvmCFGStruct;
-
-namespace llvm {
-class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
-public:
- static char ID;
-
-public:
- AMDGPUCFGPerform(TargetMachine &tm);
- virtual const char *getPassName() const;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- bool runOnMachineFunction(MachineFunction &F);
-
-private:
-
-};
-
-char AMDGPUCFGPerform::ID = 0;
-} //end of namespace llvm
-
- AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
-: AMDGPUCFGStructurizer(ID, tm) {
-}
-
-const char *AMDGPUCFGPerform::getPassName() const {
- return "AMD IL Control Flow Graph structurizer Pass";
-}
-
-void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addRequired<MachineFunctionAnalysis>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
-}
-
-//===----------------------------------------------------------------------===//
-//
-// CFGStructTraits<AMDGPUCFGStructurizer>
-//
-//===----------------------------------------------------------------------===//
-
-namespace llvmCFGStruct {
-// this class is tailor to the AMDGPU backend
-template<>
-struct CFGStructTraits<AMDGPUCFGStructurizer> {
- typedef int RegiT;
-
- static int getBranchNzeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
- default:
- assert(0 && "internal error");
- }
- return -1;
- }
-
- static int getBranchZeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
- default:
- assert(0 && "internal error");
- }
- return -1;
- }
-
- static int getContinueNzeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
- static int getContinueZeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
- default:
- assert(0 && "internal error");
- }
- return -1;
- }
-
- static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
- return instr->getOperand(0).getMBB();
- }
-
- static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
- instr->getOperand(0).setMBB(blk);
- }
-
- static MachineBasicBlock *
- getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
- assert(blk->succ_size() == 2);
- MachineBasicBlock *trueBranch = getTrueBranch(instr);
- MachineBasicBlock::succ_iterator iter = blk->succ_begin();
- MachineBasicBlock::succ_iterator iterNext = iter;
- ++iterNext;
-
- return (*iter == trueBranch) ? *iterNext : *iter;
- }
-
- static bool isCondBranch(MachineInstr *instr) {
- switch (instr->getOpcode()) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32:
- break;
- default:
- return false;
- }
- return true;
- }
-
- static bool isUncondBranch(MachineInstr *instr) {
- switch (instr->getOpcode()) {
- case AMDGPU::JUMP:
- case AMDGPU::BRANCH:
- return true;
- default:
- return false;
- }
- return true;
- }
-
- static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
- //get DebugLoc from the first MachineBasicBlock instruction with debug info
- DebugLoc DL;
- for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
- MachineInstr *instr = &(*iter);
- if (instr->getDebugLoc().isUnknown() == false) {
- DL = instr->getDebugLoc();
- }
- }
- return DL;
- }
-
- static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
- MachineBasicBlock::reverse_iterator iter = blk->rbegin();
- MachineInstr *instr = &*iter;
- if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
- return instr;
- }
- return NULL;
- }
-
- // The correct naming for this is getPossibleLoopendBlockBranchInstr.
- //
- // BB with backward-edge could have move instructions after the branch
- // instruction. Such move instruction "belong to" the loop backward-edge.
- //
- static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
- const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
- blk->getParent()->getTarget().getInstrInfo());
-
- for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
- iterEnd = blk->rend(); iter != iterEnd; ++iter) {
- // FIXME: Simplify
- MachineInstr *instr = &*iter;
- if (instr) {
- if (isCondBranch(instr) || isUncondBranch(instr)) {
- return instr;
- } else if (!TII->isMov(instr->getOpcode())) {
- break;
- }
- }
- }
- return NULL;
- }
-
- static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
- MachineBasicBlock::reverse_iterator iter = blk->rbegin();
- if (iter != blk->rend()) {
- MachineInstr *instr = &(*iter);
- if (instr->getOpcode() == AMDGPU::RETURN) {
- return instr;
- }
- }
- return NULL;
- }
-
- static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
- MachineBasicBlock::reverse_iterator iter = blk->rbegin();
- if (iter != blk->rend()) {
- MachineInstr *instr = &(*iter);
- if (instr->getOpcode() == AMDGPU::CONTINUE) {
- return instr;
- }
- }
- return NULL;
- }
-
- static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
- for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
- MachineInstr *instr = &(*iter);
- if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
- return instr;
- }
- }
- return NULL;
- }
-
- static bool isReturnBlock(MachineBasicBlock *blk) {
- MachineInstr *instr = getReturnInstr(blk);
- bool isReturn = (blk->succ_size() == 0);
- if (instr) {
- assert(isReturn);
- } else if (isReturn) {
- if (DEBUGME) {
- errs() << "BB" << blk->getNumber()
- <<" is return block without RETURN instr\n";
- }
- }
-
- return isReturn;
- }
-
- static MachineBasicBlock::iterator
- getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
- assert(instr->getParent() == blk && "instruction doesn't belong to block");
- MachineBasicBlock::iterator iter = blk->begin();
- MachineBasicBlock::iterator iterEnd = blk->end();
- while (&(*iter) != instr && iter != iterEnd) {
- ++iter;
- }
-
- assert(iter != iterEnd);
- return iter;
- }//getInstrPos
-
- static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
- AMDGPUCFGStructurizer *passRep) {
- return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
- } //insertInstrBefore
-
- static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
- AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
-
- MachineBasicBlock::iterator res;
- if (blk->begin() != blk->end()) {
- blk->insert(blk->begin(), newInstr);
- } else {
- blk->push_back(newInstr);
- }
-
- SHOWNEWINSTR(newInstr);
-
- return newInstr;
- } //insertInstrBefore
-
- static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
- AMDGPUCFGStructurizer *passRep) {
- insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
- } //insertInstrEnd
-
- static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
- AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineInstr *newInstr = blk->getParent()
- ->CreateMachineInstr(tii->get(newOpcode), DL);
-
- blk->push_back(newInstr);
- //assume the instruction doesn't take any reg operand ...
-
- SHOWNEWINSTR(newInstr);
- } //insertInstrEnd
-
- static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
- int newOpcode,
- AMDGPUCFGStructurizer *passRep) {
- MachineInstr *oldInstr = &(*instrPos);
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineBasicBlock *blk = oldInstr->getParent();
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
- DebugLoc());
-
- blk->insert(instrPos, newInstr);
- //assume the instruction doesn't take any reg operand ...
-
- SHOWNEWINSTR(newInstr);
- return newInstr;
- } //insertInstrBefore
-
- static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
- int newOpcode,
- AMDGPUCFGStructurizer *passRep,
- DebugLoc DL) {
- MachineInstr *oldInstr = &(*instrPos);
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineBasicBlock *blk = oldInstr->getParent();
- MachineFunction *MF = blk->getParent();
- MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
-
- blk->insert(instrPos, newInstr);
- MachineInstrBuilder MIB(*MF, newInstr);
- MIB.addReg(oldInstr->getOperand(1).getReg(), false);
-
- SHOWNEWINSTR(newInstr);
- //erase later oldInstr->eraseFromParent();
- } //insertCondBranchBefore
-
- static void insertCondBranchBefore(MachineBasicBlock *blk,
- MachineBasicBlock::iterator insertPos,
- int newOpcode,
- AMDGPUCFGStructurizer *passRep,
- RegiT regNum,
- DebugLoc DL) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineFunction *MF = blk->getParent();
-
- MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
-
- //insert before
- blk->insert(insertPos, newInstr);
- MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
-
- SHOWNEWINSTR(newInstr);
- } //insertCondBranchBefore
-
- static void insertCondBranchEnd(MachineBasicBlock *blk,
- int newOpcode,
- AMDGPUCFGStructurizer *passRep,
- RegiT regNum) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineFunction *MF = blk->getParent();
- MachineInstr *newInstr =
- MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
-
- blk->push_back(newInstr);
- MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
-
- SHOWNEWINSTR(newInstr);
- } //insertCondBranchEnd
-
-
- static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
- AMDGPUCFGStructurizer *passRep,
- RegiT regNum, int regVal) {
- MachineInstr *oldInstr = &(*instrPos);
- const AMDGPUInstrInfo *tii =
- static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
- MachineBasicBlock *blk = oldInstr->getParent();
- MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
- regVal);
- blk->insert(instrPos, newInstr);
-
- SHOWNEWINSTR(newInstr);
- } //insertAssignInstrBefore
-
- static void insertAssignInstrBefore(MachineBasicBlock *blk,
- AMDGPUCFGStructurizer *passRep,
- RegiT regNum, int regVal) {
- const AMDGPUInstrInfo *tii =
- static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
-
- MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
- regVal);
- if (blk->begin() != blk->end()) {
- blk->insert(blk->begin(), newInstr);
- } else {
- blk->push_back(newInstr);
- }
-
- SHOWNEWINSTR(newInstr);
-
- } //insertInstrBefore
-
- static void insertCompareInstrBefore(MachineBasicBlock *blk,
- MachineBasicBlock::iterator instrPos,
- AMDGPUCFGStructurizer *passRep,
- RegiT dstReg, RegiT src1Reg,
- RegiT src2Reg) {
- const AMDGPUInstrInfo *tii =
- static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
- MachineFunction *MF = blk->getParent();
- MachineInstr *newInstr =
- MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
-
- MachineInstrBuilder MIB(*MF, newInstr);
- MIB.addReg(dstReg, RegState::Define); //set target
- MIB.addReg(src1Reg); //set src value
- MIB.addReg(src2Reg); //set src value
-
- blk->insert(instrPos, newInstr);
- SHOWNEWINSTR(newInstr);
-
- } //insertCompareInstrBefore
-
- static void cloneSuccessorList(MachineBasicBlock *dstBlk,
- MachineBasicBlock *srcBlk) {
- for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
- iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
- dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
- }
- } //cloneSuccessorList
-
- static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
- MachineFunction *func = srcBlk->getParent();
- MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
- func->push_back(newBlk); //insert to function
- for (MachineBasicBlock::iterator iter = srcBlk->begin(),
- iterEnd = srcBlk->end();
- iter != iterEnd; ++iter) {
- MachineInstr *instr = func->CloneMachineInstr(iter);
- newBlk->push_back(instr);
- }
- return newBlk;
- }
-
- //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
- //the AMDGPU instruction is not recognized as terminator fix this and retire
- //this routine
- static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
- MachineBasicBlock *oldBlk,
- MachineBasicBlock *newBlk) {
- MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
- if (branchInstr && isCondBranch(branchInstr) &&
- getTrueBranch(branchInstr) == oldBlk) {
- setTrueBranch(branchInstr, newBlk);
- }
- }
-
- static void wrapup(MachineBasicBlock *entryBlk) {
- assert((!entryBlk->getParent()->getJumpTableInfo()
- || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
- && "found a jump table");
-
- //collect continue right before endloop
- SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
- MachineBasicBlock::iterator pre = entryBlk->begin();
- MachineBasicBlock::iterator iterEnd = entryBlk->end();
- MachineBasicBlock::iterator iter = pre;
- while (iter != iterEnd) {
- if (pre->getOpcode() == AMDGPU::CONTINUE
- && iter->getOpcode() == AMDGPU::ENDLOOP) {
- contInstr.push_back(pre);
- }
- pre = iter;
- ++iter;
- } //end while
-
- //delete continue right before endloop
- for (unsigned i = 0; i < contInstr.size(); ++i) {
- contInstr[i]->eraseFromParent();
- }
-
- // TODO to fix up jump table so later phase won't be confused. if
- // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
- // there isn't such an interface yet. alternatively, replace all the other
- // blocks in the jump table with the entryBlk //}
-
- } //wrapup
-
- static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
- return &pass.getAnalysis<MachineDominatorTree>();
- }
-
- static MachinePostDominatorTree*
- getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
- return &pass.getAnalysis<MachinePostDominatorTree>();
- }
-
- static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
- return &pass.getAnalysis<MachineLoopInfo>();
- }
-}; // template class CFGStructTraits
-} //end of namespace llvm
-
-// createAMDGPUCFGPreparationPass- Returns a pass
-FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
- ) {
- return new AMDGPUCFGPrepare(tm );
-}
-
-bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
- return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
- *this,
- TRI);
-}
-
-// createAMDGPUCFGStructurizerPass- Returns a pass
-FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
- ) {
- return new AMDGPUCFGPerform(tm );
-}
-bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
- return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
- *this,
- TRI);
+FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) {
+ return new AMDGPUCFGStructurizer(tm);
}
OpenPOWER on IntegriCloud