diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
36 files changed, 2786 insertions, 1102 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp index 03c3a80..409326e 100644 --- a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> namespace llvm { diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp index e9f6239..0262358f 100644 --- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp @@ -52,32 +52,34 @@ // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; #define DEBUG_TYPE "add-discriminators" namespace { - struct AddDiscriminators : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - AddDiscriminators() : FunctionPass(ID) { - initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry()); - } +struct AddDiscriminators : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + AddDiscriminators() : FunctionPass(ID) { + initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry()); + } - bool runOnFunction(Function &F) override; - }; + bool runOnFunction(Function &F) override; +}; } char AddDiscriminators::ID = 0; @@ -89,17 +91,17 @@ INITIALIZE_PASS_END(AddDiscriminators, "add-discriminators", // Command line option to disable discriminator generation even in the // presence of debug information. This is only needed when debugging // debug info generation issues. -static cl::opt<bool> -NoDiscriminators("no-discriminators", cl::init(false), - cl::desc("Disable generation of discriminator information.")); +static cl::opt<bool> NoDiscriminators( + "no-discriminators", cl::init(false), + cl::desc("Disable generation of discriminator information.")); FunctionPass *llvm::createAddDiscriminatorsPass() { return new AddDiscriminators(); } static bool hasDebugInfo(const Function &F) { - NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu"); - return CUNodes != nullptr; + DISubprogram *S = getDISubprogram(&F); + return S != nullptr; } /// \brief Assign DWARF discriminators. @@ -159,8 +161,7 @@ bool AddDiscriminators::runOnFunction(Function &F) { // Simlarly, if the function has no debug info, do nothing. // Finally, if this module is built with dwarf versions earlier than 4, // do nothing (discriminator support is a DWARF 4 feature). - if (NoDiscriminators || - !hasDebugInfo(F) || + if (NoDiscriminators || !hasDebugInfo(F) || F.getParent()->getDwarfVersion() < 4) return false; @@ -169,59 +170,77 @@ bool AddDiscriminators::runOnFunction(Function &F) { LLVMContext &Ctx = M->getContext(); DIBuilder Builder(*M, /*AllowUnresolved*/ false); - // Traverse all the blocks looking for instructions in different - // blocks that are at the same file:line location. - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *B = I; - TerminatorInst *Last = B->getTerminator(); - const DILocation *LastDIL = Last->getDebugLoc(); - if (!LastDIL) - continue; - - for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) { - BasicBlock *Succ = Last->getSuccessor(I); - Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime(); - const DILocation *FirstDIL = First->getDebugLoc(); - if (!FirstDIL) + typedef std::pair<StringRef, unsigned> Location; + typedef DenseMap<const BasicBlock *, Metadata *> BBScopeMap; + typedef DenseMap<Location, BBScopeMap> LocationBBMap; + + LocationBBMap LBM; + + // Traverse all instructions in the function. If the source line location + // of the instruction appears in other basic block, assign a new + // discriminator for this instruction. + for (BasicBlock &B : F) { + for (auto &I : B.getInstList()) { + if (isa<DbgInfoIntrinsic>(&I)) + continue; + const DILocation *DIL = I.getDebugLoc(); + if (!DIL) + continue; + Location L = std::make_pair(DIL->getFilename(), DIL->getLine()); + auto &BBMap = LBM[L]; + auto R = BBMap.insert(std::make_pair(&B, (Metadata *)nullptr)); + if (BBMap.size() == 1) + continue; + bool InsertSuccess = R.second; + Metadata *&NewScope = R.first->second; + // If we could insert a different block in the same location, a + // discriminator is needed to distinguish both instructions. + if (InsertSuccess) { + auto *Scope = DIL->getScope(); + auto *File = + Builder.createFile(DIL->getFilename(), Scope->getDirectory()); + NewScope = Builder.createLexicalBlockFile( + Scope, File, DIL->computeNewDiscriminator()); + } + I.setDebugLoc(DILocation::get(Ctx, DIL->getLine(), DIL->getColumn(), + NewScope, DIL->getInlinedAt())); + DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ":" + << dyn_cast<DILexicalBlockFile>(NewScope)->getDiscriminator() + << I << "\n"); + Changed = true; + } + } + + // Traverse all instructions and assign new discriminators to call + // instructions with the same lineno that are in the same basic block. + // Sample base profile needs to distinguish different function calls within + // a same source line for correct profile annotation. + for (BasicBlock &B : F) { + const DILocation *FirstDIL = NULL; + for (auto &I : B.getInstList()) { + CallInst *Current = dyn_cast<CallInst>(&I); + if (!Current || isa<DbgInfoIntrinsic>(&I)) continue; - // If the first instruction (First) of Succ is at the same file - // location as B's last instruction (Last), add a new - // discriminator for First's location and all the instructions - // in Succ that share the same location with First. - if (!FirstDIL->canDiscriminate(*LastDIL)) { - // Create a new lexical scope and compute a new discriminator - // number for it. - StringRef Filename = FirstDIL->getFilename(); - auto *Scope = FirstDIL->getScope(); - auto *File = Builder.createFile(Filename, Scope->getDirectory()); - - // FIXME: Calculate the discriminator here, based on local information, - // and delete DILocation::computeNewDiscriminator(). The current - // solution gives different results depending on other modules in the - // same context. All we really need is to discriminate between - // FirstDIL and LastDIL -- a local map would suffice. - unsigned Discriminator = FirstDIL->computeNewDiscriminator(); - auto *NewScope = - Builder.createLexicalBlockFile(Scope, File, Discriminator); - auto *NewDIL = - DILocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(), - NewScope, FirstDIL->getInlinedAt()); - DebugLoc newDebugLoc = NewDIL; - - // Attach this new debug location to First and every - // instruction following First that shares the same location. - for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1; - ++I1) { - if (I1->getDebugLoc().get() != FirstDIL) - break; - I1->setDebugLoc(newDebugLoc); - DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine() - << ":" << NewDIL->getColumn() << ":" - << NewDIL->getDiscriminator() << *I1 << "\n"); + DILocation *CurrentDIL = Current->getDebugLoc(); + if (FirstDIL) { + if (CurrentDIL && CurrentDIL->getLine() == FirstDIL->getLine() && + CurrentDIL->getFilename() == FirstDIL->getFilename()) { + auto *Scope = FirstDIL->getScope(); + auto *File = Builder.createFile(FirstDIL->getFilename(), + Scope->getDirectory()); + auto *NewScope = Builder.createLexicalBlockFile( + Scope, File, FirstDIL->computeNewDiscriminator()); + Current->setDebugLoc(DILocation::get( + Ctx, CurrentDIL->getLine(), CurrentDIL->getColumn(), NewScope, + CurrentDIL->getInlinedAt())); + Changed = true; + } else { + FirstDIL = CurrentDIL; } - DEBUG(dbgs() << "\n"); - Changed = true; + } else { + FirstDIL = CurrentDIL; } } } diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index ef7daca..a5137e9 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -41,8 +41,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { // Loop through all of our successors and make sure they know that one // of their predecessors is going away. - for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) - BBTerm->getSuccessor(i)->removePredecessor(BB); + for (BasicBlock *Succ : BBTerm->successors()) + Succ->removePredecessor(BB); // Zap all the instructions in the block. while (!BB->empty()) { @@ -65,7 +65,7 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { /// any single-entry PHI nodes in it, fold them away. This handles the case /// when all entries to the PHI nodes in a block are guaranteed equal, such as /// when the block has exactly one predecessor. -void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA, +void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceAnalysis *MemDep) { if (!isa<PHINode>(BB->begin())) return; @@ -77,8 +77,6 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA, if (MemDep) MemDep->removeInstruction(PN); // Memdep updates AA itself. - else if (AA && isa<PointerType>(PN->getType())) - AA->deleteValue(PN); PN->eraseFromParent(); } @@ -108,7 +106,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, /// if possible. The return value indicates success or failure. bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, - LoopInfo *LI, AliasAnalysis *AA, + LoopInfo *LI, MemoryDependenceAnalysis *MemDep) { // Don't merge away blocks who have their address taken. if (BB->hasAddressTaken()) return false; @@ -119,8 +117,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, // Don't break self-loops. if (PredBB == BB) return false; - // Don't break invokes. - if (isa<InvokeInst>(PredBB->getTerminator())) return false; + // Don't break unwinding instructions. + if (PredBB->getTerminator()->isExceptional()) + return false; succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB)); BasicBlock *OnlySucc = BB; @@ -145,7 +144,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, // Begin by getting rid of unneeded PHIs. if (isa<PHINode>(BB->front())) - FoldSingleEntryPHINodes(BB, AA, MemDep); + FoldSingleEntryPHINodes(BB, MemDep); // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); @@ -253,7 +252,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, // block. assert(SP == BB && "CFG broken"); SP = nullptr; - return SplitBlock(Succ, Succ->begin(), DT, LI); + return SplitBlock(Succ, &Succ->front(), DT, LI); } // Otherwise, if BB has a single successor, split it at the bottom of the @@ -284,8 +283,8 @@ llvm::SplitAllCriticalEdges(Function &F, /// BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI) { - BasicBlock::iterator SplitIt = SplitPt; - while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt)) + BasicBlock::iterator SplitIt = SplitPt->getIterator(); + while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) ++SplitIt; BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); @@ -393,7 +392,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, /// from NewBB. This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef<BasicBlock *> Preds, BranchInst *BI, - AliasAnalysis *AA, bool HasLoopExit) { + bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end()); for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { @@ -474,17 +473,20 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, - const char *Suffix, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI, - bool PreserveLCSSA) { + const char *Suffix, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { + // Do not attempt to split that which cannot be split. + if (!BB->canSplitPredecessors()) + return nullptr; + // For the landingpads we need to act a bit differently. // Delegate this work to the SplitLandingPadPredecessors. if (BB->isLandingPad()) { SmallVector<BasicBlock*, 2> NewBBs; std::string NewName = std::string(Suffix) + ".split-lp"; - SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), - NewBBs, AA, DT, LI, PreserveLCSSA); + SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT, + LI, PreserveLCSSA); return NewBBs[0]; } @@ -523,7 +525,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, HasLoopExit); // Update the PHI nodes in BB with the values coming from NewBB. - UpdatePHINodes(BB, NewBB, Preds, BI, AA, HasLoopExit); + UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); return NewBB; } @@ -544,8 +546,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix1, const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs, - AliasAnalysis *AA, DominatorTree *DT, - LoopInfo *LI, bool PreserveLCSSA) { + DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); // Create a new basic block for OrigBB's predecessors listed in Preds. Insert @@ -574,7 +576,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB1. - UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, AA, HasLoopExit); + UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit); // Move the remaining edges from OrigBB to point to NewBB2. SmallVector<BasicBlock*, 8> NewBB2Preds; @@ -611,7 +613,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, PreserveLCSSA, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB2. - UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, AA, HasLoopExit); + UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, HasLoopExit); } LandingPadInst *LPad = OrigBB->getLandingPadInst(); @@ -661,7 +663,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, // return instruction. V = BCI->getOperand(0); NewBC = BCI->clone(); - Pred->getInstList().insert(NewRet, NewBC); + Pred->getInstList().insert(NewRet->getIterator(), NewBC); *i = NewBC; } if (PHINode *PN = dyn_cast<PHINode>(V)) { @@ -707,7 +709,7 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond, MDNode *BranchWeights, DominatorTree *DT) { BasicBlock *Head = SplitBefore->getParent(); - BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); TerminatorInst *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); @@ -757,7 +759,7 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, TerminatorInst **ElseTerm, MDNode *BranchWeights) { BasicBlock *Head = SplitBefore->getParent(); - BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); TerminatorInst *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 7e83c9e..9582599 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -101,10 +101,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, continue; // Otherwise a new PHI is needed. Create one and populate it. - PHINode *NewPN = - PHINode::Create(PN->getType(), Preds.size(), "split", - SplitBB->isLandingPad() ? - SplitBB->begin() : SplitBB->getTerminator()); + PHINode *NewPN = PHINode::Create( + PN->getType(), Preds.size(), "split", + SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator()); for (unsigned i = 0, e = Preds.size(); i != e; ++i) NewPN->addIncoming(V, Preds[i]); @@ -141,9 +140,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); - // Splitting the critical edge to a landing pad block is non-trivial. Don't do + // Splitting the critical edge to a pad block is non-trivial. Don't do // it in this generic function. - if (DestBB->isLandingPad()) return nullptr; + if (DestBB->isEHPad()) return nullptr; // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), @@ -157,7 +156,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); - Function::iterator FBBI = TIBB; + Function::iterator FBBI = TIBB->getIterator(); F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they @@ -197,7 +196,6 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } // If we have nothing to update, just return. - auto *AA = Options.AA; auto *DT = Options.DT; auto *LI = Options.LI; if (!DT && !LI) @@ -319,10 +317,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, LoopPreds.push_back(P); } if (!LoopPreds.empty()) { - assert(!DestBB->isLandingPad() && - "We don't split edges to landing pads!"); + assert(!DestBB->isEHPad() && "We don't split edges to EH pads!"); BasicBlock *NewExitBB = SplitBlockPredecessors( - DestBB, LoopPreds, "split", AA, DT, LI, Options.PreserveLCSSA); + DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA); if (Options.PreserveLCSSA) createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB); } diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 8aa7b2a..64b44a6 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -21,7 +22,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; @@ -55,32 +55,6 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, return CI; } -/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the -/// specified pointer. Ptr is required to be some pointer type, MaxLen must -/// be of size_t type, and the return value has 'intptr_t' type. -Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, - const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::strnlen)) - return nullptr; - - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS[2]; - AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); - Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); - - LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrNLen = - M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), AS), - DL.getIntPtrType(Context), B.getInt8PtrTy(), - DL.getIntPtrType(Context), nullptr); - CallInst *CI = B.CreateCall(StrNLen, {CastToCStr(Ptr, B), MaxLen}, "strnlen"); - if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - /// EmitStrChr - Emit a call to the strchr function to the builder, for the /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index f2d5e07..0914699 100644 --- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -82,7 +82,7 @@ static bool insertFastDiv(Function &F, bool UseSignedOp, DivCacheTy &PerBBDivCache) { // Get instruction operands - Instruction *Instr = J; + Instruction *Instr = &*J; Value *Dividend = Instr->getOperand(0); Value *Divisor = Instr->getOperand(1); @@ -94,7 +94,7 @@ static bool insertFastDiv(Function &F, } // Basic Block is split before divide - BasicBlock *MainBB = I; + BasicBlock *MainBB = &*I; BasicBlock *SuccessorBB = I->splitBasicBlock(J); ++I; //advance iterator I to successorBB @@ -190,7 +190,7 @@ static bool reuseOrInsertFastDiv(Function &F, bool UseSignedOp, DivCacheTy &PerBBDivCache) { // Get instruction operands - Instruction *Instr = J; + Instruction *Instr = &*J; DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index cc4d6c6..854a3b8 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -52,8 +52,8 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); - VMap[II] = NewInst; // Add instruction map to value. - + VMap[&*II] = NewInst; // Add instruction map to value. + hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (isa<ConstantInt>(AI->getArraySize())) @@ -85,9 +85,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG - for (Function::const_arg_iterator I = OldFunc->arg_begin(), - E = OldFunc->arg_end(); I != E; ++I) - assert(VMap.count(I) && "No mapping from source argument specified!"); + for (const Argument &I : OldFunc->args()) + assert(VMap.count(&I) && "No mapping from source argument specified!"); #endif // Copy all attributes other than those stored in the AttributeSet. We need @@ -96,6 +95,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, NewFunc->copyAttributesFrom(OldFunc); NewFunc->setAttributes(NewAttrs); + // Fix up the personality function that got copied over. + if (OldFunc->hasPersonalityFn()) + NewFunc->setPersonalityFn( + MapValue(OldFunc->getPersonalityFn(), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); + AttributeSet OldAttrs = OldFunc->getAttributes(); // Clone any argument attributes that are present in the VMap. for (const Argument &OldArg : OldFunc->args()) @@ -136,7 +142,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, if (BB.hasAddressTaken()) { Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), const_cast<BasicBlock*>(&BB)); - VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); } // Note return instructions for the caller. @@ -146,11 +152,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. - for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]), - BE = NewFunc->end(); BB != BE; ++BB) + for (Function::iterator BB = + cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(), + BE = NewFunc->end(); + BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... - for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) - RemapInstruction(II, VMap, + for (Instruction &II : *BB) + RemapInstruction(&II, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); } @@ -187,11 +195,9 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, const DISubprogram *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder); if (!OldSubprogramMDNode) return; - // Ensure that OldFunc appears in the map. - // (if it's already there it must point to NewFunc anyway) - VMap[OldFunc] = NewFunc; auto *NewSubprogram = cast<DISubprogram>(MapMetadata(OldSubprogramMDNode, VMap)); + NewFunc->setSubprogram(NewSubprogram); for (auto *CU : Finder.compile_units()) { auto Subprograms = CU->getSubprograms(); @@ -222,10 +228,9 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, // The user might be deleting arguments to the function by specifying them in // the VMap. If so, we need to not add the arguments to the arg ty vector // - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) - if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet? - ArgTypes.push_back(I->getType()); + for (const Argument &I : F->args()) + if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet? + ArgTypes.push_back(I.getType()); // Create a new function type... FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(), @@ -236,11 +241,10 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, // Loop over the arguments, copying the names of the mapped arguments over... Function::arg_iterator DestI = NewF->arg_begin(); - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) - if (VMap.count(I) == 0) { // Is this argument preserved? - DestI->setName(I->getName()); // Copy the name over... - VMap[I] = DestI++; // Add mapping to VMap + for (const Argument & I : F->args()) + if (VMap.count(&I) == 0) { // Is this argument preserved? + DestI->setName(I.getName()); // Copy the name over... + VMap[&I] = &*DestI++; // Add mapping to VMap } if (ModuleLevelChanges) @@ -330,8 +334,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, II != IE; ++II) { // If the "Director" remaps the instruction, don't clone it. if (Director) { - CloningDirector::CloningAction Action - = Director->handleInstruction(VMap, II, NewBB); + CloningDirector::CloningAction Action = + Director->handleInstruction(VMap, &*II, NewBB); // If the cloning director says stop, we want to stop everything, not // just break out of the loop (which would cause the terminator to be // cloned). The cloning director is responsible for inserting a proper @@ -365,7 +369,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (Value *MappedV = VMap.lookup(V)) V = MappedV; - VMap[II] = V; + VMap[&*II] = V; delete NewInst; continue; } @@ -373,9 +377,15 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); - VMap[II] = NewInst; // Add instruction map to value. + VMap[&*II] = NewInst; // Add instruction map to value. NewBB->getInstList().push_back(NewInst); hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); + + if (CodeInfo) + if (auto CS = ImmutableCallSite(&*II)) + if (CS.hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (isa<ConstantInt>(AI->getArraySize())) hasStaticAllocas = true; @@ -400,8 +410,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If the director says to skip with a terminate instruction, we still // need to clone this block's successors. const TerminatorInst *TI = NewBB->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - ToClone.push_back(TI->getSuccessor(i)); + for (const BasicBlock *Succ : TI->successors()) + ToClone.push_back(Succ); return; } assert(Action != CloningDirector::SkipInstruction && @@ -447,11 +457,16 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, NewInst->setName(OldTI->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); VMap[OldTI] = NewInst; // Add instruction map to value. - + + if (CodeInfo) + if (auto CS = ImmutableCallSite(OldTI)) + if (CS.hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + // Recursively clone any reachable successor blocks. const TerminatorInst *TI = BB->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - ToClone.push_back(TI->getSuccessor(i)); + for (const BasicBlock *Succ : TI->successors()) + ToClone.push_back(Succ); } if (CodeInfo) { @@ -484,12 +499,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, } #ifndef NDEBUG - // If the cloning starts at the begining of the function, verify that + // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. if (!StartingInst) - for (Function::const_arg_iterator II = OldFunc->arg_begin(), - E = OldFunc->arg_end(); II != E; ++II) - assert(VMap.count(II) && "No mapping from source argument specified!"); + for (const Argument &II : OldFunc->args()) + assert(VMap.count(&II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, @@ -499,12 +513,12 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, StartingBB = StartingInst->getParent(); else { StartingBB = &OldFunc->getEntryBlock(); - StartingInst = StartingBB->begin(); + StartingInst = &StartingBB->front(); } // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; - PFC.CloneBlock(StartingBB, StartingInst, CloneWorklist); + PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); @@ -517,9 +531,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; - for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); - BI != BE; ++BI) { - Value *V = VMap[BI]; + for (const BasicBlock &BI : *OldFunc) { + Value *V = VMap[&BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (!NewBB) continue; // Dead block. @@ -528,7 +541,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Handle PHI nodes specially, as we have to remove references to dead // blocks. - for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { + for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { // PHI nodes may have been remapped to non-PHI nodes by the caller or // during the cloning process. if (const PHINode *PN = dyn_cast<PHINode>(I)) { @@ -621,8 +634,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); - assert(VMap[OldI] == PN && "VMap mismatch"); - VMap[OldI] = NV; + assert(VMap[&*OldI] == PN && "VMap mismatch"); + VMap[&*OldI] = NV; PN->eraseFromParent(); ++OldI; } @@ -644,15 +657,15 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. - Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]); + Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. - if (I != Begin && (pred_begin(I) == pred_end(I) || - I->getSinglePredecessor() == I)) { - BasicBlock *DeadBB = I++; + if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || + I->getSinglePredecessor() == &*I)) { + BasicBlock *DeadBB = &*I++; DeleteDeadBlock(DeadBB); continue; } @@ -662,7 +675,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // simplification required looking through PHI nodes, those are only // available after forming the full basic block. That may leave some here, // and we still want to prune the dead code as early as possible. - ConstantFoldTerminator(I); + ConstantFoldTerminator(&*I); BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } @@ -681,7 +694,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. - Dest->replaceAllUsesWith(I); + Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); @@ -695,7 +708,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. - for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]), + for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(), E = NewFunc->end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) @@ -717,7 +730,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, const char *NameSuffix, ClonedCodeInfo *CodeInfo, Instruction *TheCall) { - CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), VMap, + CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, ModuleLevelChanges, Returns, NameSuffix, CodeInfo, nullptr); } @@ -780,9 +793,10 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, } // Move them physically from the end of the block list. - F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH); - F->getBasicBlockList().splice(Before, F->getBasicBlockList(), - NewLoop->getHeader(), F->end()); + F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(), + NewPH); + F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(), + NewLoop->getHeader()->getIterator(), F->end()); return NewLoop; } diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp index 61f1811..ab08335 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -20,21 +20,28 @@ #include "llvm-c/Core.h" using namespace llvm; -/// CloneModule - Return an exact copy of the specified module. This is not as -/// easy as it might seem because we have to worry about making copies of global -/// variables and functions, and making their (initializers and references, -/// respectively) refer to the right globals. +/// This is not as easy as it might seem because we have to worry about making +/// copies of global variables and functions, and making their (initializers and +/// references, respectively) refer to the right globals. /// -Module *llvm::CloneModule(const Module *M) { +std::unique_ptr<Module> llvm::CloneModule(const Module *M) { // Create the value map that maps things from the old module over to the new // module. ValueToValueMapTy VMap; return CloneModule(M, VMap); } -Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { +std::unique_ptr<Module> llvm::CloneModule(const Module *M, + ValueToValueMapTy &VMap) { + return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; }); +} + +std::unique_ptr<Module> llvm::CloneModule( + const Module *M, ValueToValueMapTy &VMap, + std::function<bool(const GlobalValue *)> ShouldCloneDefinition) { // First off, we need to create the new module. - Module *New = new Module(M->getModuleIdentifier(), M->getContext()); + std::unique_ptr<Module> New = + llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext()); New->setDataLayout(M->getDataLayout()); New->setTargetTriple(M->getTargetTriple()); New->setModuleInlineAsm(M->getModuleInlineAsm()); @@ -52,26 +59,48 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { (GlobalVariable*) nullptr, I->getThreadLocalMode(), I->getType()->getAddressSpace()); - GV->copyAttributesFrom(I); - VMap[I] = GV; + GV->copyAttributesFrom(&*I); + VMap[&*I] = GV; } // Loop over the functions in the module, making external functions as before for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { Function *NF = - Function::Create(cast<FunctionType>(I->getType()->getElementType()), - I->getLinkage(), I->getName(), New); - NF->copyAttributesFrom(I); - VMap[I] = NF; + Function::Create(cast<FunctionType>(I->getType()->getElementType()), + I->getLinkage(), I->getName(), New.get()); + NF->copyAttributesFrom(&*I); + VMap[&*I] = NF; } // Loop over the aliases in the module for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { - auto *PTy = cast<PointerType>(I->getType()); - auto *GA = GlobalAlias::create(PTy, I->getLinkage(), I->getName(), New); - GA->copyAttributesFrom(I); - VMap[I] = GA; + if (!ShouldCloneDefinition(&*I)) { + // An alias cannot act as an external reference, so we need to create + // either a function or a global variable depending on the value type. + // FIXME: Once pointee types are gone we can probably pick one or the + // other. + GlobalValue *GV; + if (I->getValueType()->isFunctionTy()) + GV = Function::Create(cast<FunctionType>(I->getValueType()), + GlobalValue::ExternalLinkage, I->getName(), + New.get()); + else + GV = new GlobalVariable( + *New, I->getValueType(), false, GlobalValue::ExternalLinkage, + (Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr, + I->getThreadLocalMode(), I->getType()->getAddressSpace()); + VMap[&*I] = GV; + // We do not copy attributes (mainly because copying between different + // kinds of globals is forbidden), but this is generally not required for + // correctness. + continue; + } + auto *GA = GlobalAlias::create(I->getValueType(), + I->getType()->getPointerAddressSpace(), + I->getLinkage(), I->getName(), New.get()); + GA->copyAttributesFrom(&*I); + VMap[&*I] = GA; } // Now that all of the things that global variable initializer can refer to @@ -80,7 +109,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) { - GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); + GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]); + if (!ShouldCloneDefinition(&*I)) { + // Skip after setting the correct linkage for an external reference. + GV->setLinkage(GlobalValue::ExternalLinkage); + continue; + } if (I->hasInitializer()) GV->setInitializer(MapValue(I->getInitializer(), VMap)); } @@ -88,18 +122,22 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // Similarly, copy over function bodies now... // for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { - Function *F = cast<Function>(VMap[I]); + Function *F = cast<Function>(VMap[&*I]); + if (!ShouldCloneDefinition(&*I)) { + // Skip after setting the correct linkage for an external reference. + F->setLinkage(GlobalValue::ExternalLinkage); + continue; + } if (!I->isDeclaration()) { Function::arg_iterator DestI = F->arg_begin(); for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end(); ++J) { DestI->setName(J->getName()); - VMap[J] = DestI++; + VMap[&*J] = &*DestI++; } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns); - + CloneFunctionInto(F, &*I, VMap, /*ModuleLevelChanges=*/true, Returns); } if (I->hasPersonalityFn()) @@ -109,7 +147,10 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // And aliases for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { - GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); + // We already dealt with undefined aliases above. + if (!ShouldCloneDefinition(&*I)) + continue; + GlobalAlias *GA = cast<GlobalAlias>(VMap[&*I]); if (const Constant *C = I->getAliasee()) GA->setAliasee(MapValue(C, VMap)); } @@ -129,7 +170,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { extern "C" { LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) { - return wrap(CloneModule(unwrap(M))); + return wrap(CloneModule(unwrap(M)).release()); } } diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index ab89b41..823696d 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -51,7 +51,7 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, /// \brief Test whether a block is valid for extraction. static bool isBlockValidForExtraction(const BasicBlock &BB) { // Landing pads must be in the function where they were inserted for cleanup. - if (BB.isLandingPad()) + if (BB.isEHPad()) return false; // Don't hoist code containing allocas, invokes, or vastarts. @@ -175,7 +175,7 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs, for (User *U : II->users()) if (!definedInRegion(Blocks, U)) { - Outputs.insert(II); + Outputs.insert(&*II); break; } } @@ -211,7 +211,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. - BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI(); + BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator(); BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, Header->getName()+".ce"); @@ -246,7 +246,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, - PN->getName()+".ce", NewBB->begin()); + PN->getName() + ".ce", &NewBB->front()); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they @@ -266,7 +266,8 @@ void CodeExtractor::splitReturnBlocks() { for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) { - BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); + BasicBlock *New = + (*I)->splitBasicBlock(RI->getIterator(), (*I)->getName() + ".ret"); if (DT) { // Old dominates New. New node dominates all other nodes dominated // by Old. @@ -365,10 +366,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); TerminatorInst *TI = newFunction->begin()->getTerminator(); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, AI, Idx, "gep_" + inputs[i]->getName(), TI); + StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); } else - RewriteVal = AI++; + RewriteVal = &*AI++; std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end()); for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end(); @@ -440,8 +441,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, StructValues.push_back(*i); } else { AllocaInst *alloca = - new AllocaInst((*i)->getType(), nullptr, (*i)->getName()+".loc", - codeReplacer->getParent()->begin()->begin()); + new AllocaInst((*i)->getType(), nullptr, (*i)->getName() + ".loc", + &codeReplacer->getParent()->front().front()); ReloadOutputs.push_back(alloca); params.push_back(alloca); } @@ -457,9 +458,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Allocate a struct at the beginning of this function StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - Struct = - new AllocaInst(StructArgTy, nullptr, "structArg", - codeReplacer->getParent()->begin()->begin()); + Struct = new AllocaInst(StructArgTy, nullptr, "structArg", + &codeReplacer->getParent()->front().front()); params.push_back(Struct); for (unsigned i = 0, e = inputs.size(); i != e; ++i) { @@ -566,8 +566,12 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, bool DominatesDef = true; - if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) { - DefBlock = Invoke->getNormalDest(); + BasicBlock *NormalDest = nullptr; + if (auto *Invoke = dyn_cast<InvokeInst>(outputs[out])) + NormalDest = Invoke->getNormalDest(); + + if (NormalDest) { + DefBlock = NormalDest; // Make sure we are looking at the original successor block, not // at a newly inserted exit block, which won't be in the dominator @@ -606,11 +610,11 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut+out); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, OAI, Idx, "gep_" + outputs[out]->getName(), + StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(), NTRet); new StoreInst(outputs[out], GEP, NTRet); } else { - new StoreInst(outputs[out], OAI, NTRet); + new StoreInst(outputs[out], &*OAI, NTRet); } } // Advance output iterator even if we don't emit a store diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp index dc95089..b56ff68 100644 --- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp @@ -50,7 +50,7 @@ void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) { GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", GCL->getThreadLocalMode()); - GCL->getParent()->getGlobalList().insert(GCL, NGV); + GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV); NGV->takeName(GCL); // Nuke the old list, replacing any uses with the new one. diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index 003da58..75a1dde 100644 --- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -35,8 +35,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, I.getName()+".reg2mem", AllocaPoint); } else { Function *F = I.getParent()->getParent(); - Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem", - F->getEntryBlock().begin()); + Slot = new AllocaInst(I.getType(), nullptr, I.getName() + ".reg2mem", + &F->getEntryBlock().front()); } // We cannot demote invoke instructions to the stack if their normal edge @@ -89,16 +89,15 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, // AFTER the terminator instruction. BasicBlock::iterator InsertPt; if (!isa<TerminatorInst>(I)) { - InsertPt = &I; - ++InsertPt; - for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) + InsertPt = ++I.getIterator(); + for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt) /* empty */; // Don't insert before PHI nodes or landingpad instrs. } else { InvokeInst &II = cast<InvokeInst>(I); InsertPt = II.getNormalDest()->getFirstInsertionPt(); } - new StoreInst(&I, Slot, InsertPt); + new StoreInst(&I, Slot, &*InsertPt); return Slot; } @@ -118,8 +117,8 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { P->getName()+".reg2mem", AllocaPoint); } else { Function *F = P->getParent()->getParent(); - Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem", - F->getEntryBlock().begin()); + Slot = new AllocaInst(P->getType(), nullptr, P->getName() + ".reg2mem", + &F->getEntryBlock().front()); } // Iterate over each operand inserting a store in each predecessor. @@ -133,12 +132,12 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { } // Insert a load in place of the PHI and replace all uses. - BasicBlock::iterator InsertPt = P; + BasicBlock::iterator InsertPt = P->getIterator(); - for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) + for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt) /* empty */; // Don't insert before PHI nodes or landingpad instrs. - Value *V = new LoadInst(Slot, P->getName()+".reload", InsertPt); + Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt); P->replaceAllUsesWith(V); // Delete PHI. diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp index 4eb3e3d..492ae9f 100644 --- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -28,12 +28,11 @@ class FlattenCFGOpt { AliasAnalysis *AA; /// \brief Use parallel-and or parallel-or to generate conditions for /// conditional branches. - bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, - Pass *P = nullptr); + bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder); /// \brief If \param BB is the merge block of an if-region, attempt to merge /// the if-region with an adjacent if-region upstream if two if-regions /// contain identical instructions. - bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = nullptr); + bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder); /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which /// are from two if-regions whose entry blocks are \p Head1 and \p /// Head2. \returns true if \p Block1 and \p Block2 contain identical @@ -122,8 +121,7 @@ public: /// its predecessor. In Case 2, \param BB (BB3) only has conditional branches /// as its predecessors. /// -bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, - Pass *P) { +bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { PHINode *PHI = dyn_cast<PHINode>(BB->begin()); if (PHI) return false; // For simplicity, avoid cases containing PHI nodes. @@ -177,8 +175,9 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, // Instructions in the internal condition blocks should be safe // to hoist up. - for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) { - Instruction *CI = BI++; + for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator(); + BI != BE;) { + Instruction *CI = &*BI++; if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI)) return false; } @@ -315,7 +314,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, BasicBlock *Block1, BasicBlock *Block2) { TerminatorInst *PTI2 = Head2->getTerminator(); - Instruction *PBI2 = Head2->begin(); + Instruction *PBI2 = &Head2->front(); bool eq1 = (Block1 == Head1); bool eq2 = (Block2 == Head2); @@ -327,9 +326,9 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, // Check whether instructions in Block1 and Block2 are identical // and do not alias with instructions in Head2. BasicBlock::iterator iter1 = Block1->begin(); - BasicBlock::iterator end1 = Block1->getTerminator(); + BasicBlock::iterator end1 = Block1->getTerminator()->getIterator(); BasicBlock::iterator iter2 = Block2->begin(); - BasicBlock::iterator end2 = Block2->getTerminator(); + BasicBlock::iterator end2 = Block2->getTerminator()->getIterator(); while (1) { if (iter1 == end1) { @@ -338,7 +337,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, break; } - if (!iter1->isIdenticalTo(iter2)) + if (!iter1->isIdenticalTo(&*iter2)) return false; // Illegal to remove instructions with side effects except @@ -356,10 +355,10 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, return false; if (iter1->mayWriteToMemory()) { - for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { + for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) { if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) { // Check alias with Head2. - if (!AA || AA->alias(iter1, BI)) + if (!AA || AA->alias(&*iter1, &*BI)) return false; } } @@ -386,8 +385,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, /// if (a || b) /// statement; /// -bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, - Pass *P) { +bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { BasicBlock *IfTrue2, *IfFalse2; Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2); @@ -413,7 +411,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, return false; TerminatorInst *PTI2 = SecondEntryBlock->getTerminator(); - Instruction *PBI2 = SecondEntryBlock->begin(); + Instruction *PBI2 = &SecondEntryBlock->front(); if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, IfTrue2)) @@ -425,8 +423,8 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, // Check whether \param SecondEntryBlock has side-effect and is safe to // speculate. - for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { - Instruction *CI = BI; + for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) { + Instruction *CI = &*BI; if (isa<PHINode>(CI) || CI->mayHaveSideEffects() || !isSafeToSpeculativelyExecute(CI)) return false; diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp index 44b7d25..3893a75 100644 --- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -49,6 +49,10 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) { static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, SmallPtrSetImpl<const PHINode *> &PhiUsers) { + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + if (GV->isExternallyInitialized()) + GS.StoredType = GlobalStatus::StoredOnce; + for (const Use &U : V->uses()) { const User *UR = U.getUser(); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) { diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index d2d60d7..1457411 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -13,14 +13,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" @@ -41,6 +42,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CommandLine.h" #include <algorithm> + using namespace llvm; static cl::opt<bool> @@ -54,17 +56,17 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", cl::desc("Convert align attributes to assumptions during inlining.")); bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, - bool InsertLifetime) { - return InlineFunction(CallSite(CI), IFI, InsertLifetime); + AAResults *CalleeAAR, bool InsertLifetime) { + return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime); } bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, - bool InsertLifetime) { - return InlineFunction(CallSite(II), IFI, InsertLifetime); + AAResults *CalleeAAR, bool InsertLifetime) { + return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime); } namespace { - /// A class for recording information about inlining through an invoke. - class InvokeInliningInfo { + /// A class for recording information about inlining a landing pad. + class LandingPadInliningInfo { BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind. BasicBlock *InnerResumeDest; ///< Destination for the callee's resume. LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke. @@ -72,7 +74,7 @@ namespace { SmallVector<Value*, 8> UnwindDestPHIValues; public: - InvokeInliningInfo(InvokeInst *II) + LandingPadInliningInfo(InvokeInst *II) : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr), CallerLPad(nullptr), InnerEHValuesPHI(nullptr) { // If there are PHI nodes in the unwind destination block, we need to keep @@ -121,14 +123,14 @@ namespace { } } }; -} +} // anonymous namespace /// Get or create a target for the branch from ResumeInsts. -BasicBlock *InvokeInliningInfo::getInnerResumeDest() { +BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { if (InnerResumeDest) return InnerResumeDest; // Split the landing pad. - BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint; + BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); InnerResumeDest = OuterResumeDest->splitBasicBlock(SplitPoint, OuterResumeDest->getName() + ".body"); @@ -137,7 +139,7 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { const unsigned PHICapacity = 2; // Create corresponding new PHIs for all the PHIs in the outer landing pad. - BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); + Instruction *InsertPoint = &InnerResumeDest->front(); BasicBlock::iterator I = OuterResumeDest->begin(); for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { PHINode *OuterPHI = cast<PHINode>(I); @@ -162,8 +164,8 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { /// When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. -void InvokeInliningInfo::forwardResume(ResumeInst *RI, - SmallPtrSetImpl<LandingPadInst*> &InlinedLPads) { +void LandingPadInliningInfo::forwardResume( + ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { BasicBlock *Dest = getInnerResumeDest(); BasicBlock *Src = RI->getParent(); @@ -182,33 +184,39 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI, /// This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. -static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, - InvokeInliningInfo &Invoke) { +static BasicBlock * +HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) { for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { - Instruction *I = BBI++; + Instruction *I = &*BBI++; // We only need to check for function calls: inlined invoke // instructions require no special handling. CallInst *CI = dyn_cast<CallInst>(I); - // If this call cannot unwind, don't convert it to an invoke. - // Inline asm calls cannot throw. if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) continue; // Convert this function call into an invoke instruction. First, split the // basic block. - BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); + BasicBlock *Split = + BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc"); // Delete the unconditional branch inserted by splitBasicBlock BB->getInstList().pop_back(); // Create the new invoke instruction. - ImmutableCallSite CS(CI); - SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end()); - InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, - Invoke.getOuterResumeDest(), - InvokeArgs, CI->getName(), BB); + SmallVector<Value*, 8> InvokeArgs(CI->arg_begin(), CI->arg_end()); + SmallVector<OperandBundleDef, 1> OpBundles; + + CI->getOperandBundlesAsDefs(OpBundles); + + // Note: we're round tripping operand bundles through memory here, and that + // can potentially be avoided with a cleverer API design that we do not have + // as of this time. + + InvokeInst *II = + InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs, + OpBundles, CI->getName(), BB); II->setDebugLoc(CI->getDebugLoc()); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); @@ -219,12 +227,9 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // Delete the original call Split->getInstList().pop_front(); - - // Update any PHI nodes in the exceptional block to indicate that there is - // now a new entry in them. - Invoke.addIncomingPHIValuesFor(BB); - return; + return BB; } + return nullptr; } /// If we inlined an invoke site, we need to convert calls @@ -233,8 +238,8 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, /// II is the invoke instruction being inlined. FirstNewBlock is the first /// block of the inlined code (the last block is the end of the function), /// and InlineCodeInfo is information about the code that got inlined. -static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, - ClonedCodeInfo &InlinedCodeInfo) { +static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { BasicBlock *InvokeDest = II->getUnwindDest(); Function *Caller = FirstNewBlock->getParent(); @@ -242,11 +247,12 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to // rewrite. - InvokeInliningInfo Invoke(II); + LandingPadInliningInfo Invoke(II); // Get all of the inlined landing pad instructions. SmallPtrSet<LandingPadInst*, 16> InlinedLPads; - for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I) + for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); + I != E; ++I) if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) InlinedLPads.insert(II->getLandingPadInst()); @@ -262,9 +268,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, InlinedLPad->setCleanup(true); } - for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ + for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); + BB != E; ++BB) { if (InlinedCodeInfo.ContainsCalls) - HandleCallsInBlockInlinedThroughInvoke(BB, Invoke); + if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( + &*BB, Invoke.getOuterResumeDest())) + // Update any PHI nodes in the exceptional block to indicate that there + // is now a new entry in them. + Invoke.addIncomingPHIValuesFor(NewBB); // Forward any resumes that are remaining here. if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) @@ -278,6 +289,99 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, InvokeDest->removePredecessor(II->getParent()); } +/// If we inlined an invoke site, we need to convert calls +/// in the body of the inlined function into invokes. +/// +/// II is the invoke instruction being inlined. FirstNewBlock is the first +/// block of the inlined code (the last block is the end of the function), +/// and InlineCodeInfo is information about the code that got inlined. +static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Function *Caller = FirstNewBlock->getParent(); + + assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!"); + + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the invoke before removing the + // edge from this block. + SmallVector<Value *, 8> UnwindDestPHIValues; + llvm::BasicBlock *InvokeBB = II->getParent(); + for (Instruction &I : *UnwindDest) { + // Save the value to use for this edge. + PHINode *PHI = dyn_cast<PHINode>(&I); + if (!PHI) + break; + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + } + + // Add incoming-PHI values to the unwind destination block for the given basic + // block, using the values for the original invoke's source block. + auto UpdatePHINodes = [&](BasicBlock *Src) { + BasicBlock::iterator I = UnwindDest->begin(); + for (Value *V : UnwindDestPHIValues) { + PHINode *PHI = cast<PHINode>(I); + PHI->addIncoming(V, Src); + ++I; + } + }; + + // This connects all the instructions which 'unwind to caller' to the invoke + // destination. + for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); + BB != E; ++BB) { + if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { + if (CRI->unwindsToCaller()) { + CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI); + CRI->eraseFromParent(); + UpdatePHINodes(&*BB); + } + } + + Instruction *I = BB->getFirstNonPHI(); + if (!I->isEHPad()) + continue; + + Instruction *Replacement = nullptr; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { + if (CatchSwitch->unwindsToCaller()) { + auto *NewCatchSwitch = CatchSwitchInst::Create( + CatchSwitch->getParentPad(), UnwindDest, + CatchSwitch->getNumHandlers(), CatchSwitch->getName(), + CatchSwitch); + for (BasicBlock *PadBB : CatchSwitch->handlers()) + NewCatchSwitch->addHandler(PadBB); + Replacement = NewCatchSwitch; + } + } else if (!isa<FuncletPadInst>(I)) { + llvm_unreachable("unexpected EHPad!"); + } + + if (Replacement) { + Replacement->takeName(I); + I->replaceAllUsesWith(Replacement); + I->eraseFromParent(); + UpdatePHINodes(&*BB); + } + } + + if (InlinedCodeInfo.ContainsCalls) + for (Function::iterator BB = FirstNewBlock->getIterator(), + E = Caller->end(); + BB != E; ++BB) + if (BasicBlock *NewBB = + HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest)) + // Update any PHI nodes in the exceptional block to indicate that there + // is now a new entry in them. + UpdatePHINodes(NewBB); + + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + UnwindDest->removePredecessor(InvokeBB); +} + /// When inlining a function that contains noalias scope metadata, /// this metadata needs to be cloned so that the inlined blocks /// have different "unqiue scopes" at every call site. Were this not done, then @@ -395,17 +499,16 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, - const DataLayout &DL, AliasAnalysis *AA) { + const DataLayout &DL, AAResults *CalleeAAR) { if (!EnableNoAliasConversion) return; const Function *CalledFunc = CS.getCalledFunction(); SmallVector<const Argument *, 4> NoAliasArgs; - for (Function::const_arg_iterator I = CalledFunc->arg_begin(), - E = CalledFunc->arg_end(); I != E; ++I) { - if (I->hasNoAliasAttr() && !I->hasNUses(0)) - NoAliasArgs.push_back(I); + for (const Argument &I : CalledFunc->args()) { + if (I.hasNoAliasAttr() && !I.hasNUses(0)) + NoAliasArgs.push_back(&I); } if (NoAliasArgs.empty()) @@ -480,10 +583,10 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, continue; IsFuncCall = true; - if (AA) { - AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(ICS); - if (MRB == AliasAnalysis::OnlyAccessesArgumentPointees || - MRB == AliasAnalysis::OnlyReadsArgumentPointees) + if (CalleeAAR) { + FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS); + if (MRB == FMRB_OnlyAccessesArgumentPointees || + MRB == FMRB_OnlyReadsArgumentPointees) IsArgMemOnlyCall = true; } @@ -518,7 +621,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) { SmallVector<Value *, 4> Objects; GetUnderlyingObjects(const_cast<Value*>(PtrArgs[i]), - Objects, DL, /* MaxLookup = */ 0); + Objects, DL, /* LI = */ nullptr); for (Value *O : Objects) ObjSet.insert(O); @@ -646,7 +749,7 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { // caller, then don't bother inserting the assumption. Value *Arg = CS.getArgument(I->getArgNo()); if (getKnownAlignment(Arg, DL, CS.getInstruction(), - &IFI.ACT->getAssumptionCache(*CalledFunc), + &IFI.ACT->getAssumptionCache(*CS.getCaller()), &DT) >= Align) continue; @@ -731,7 +834,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, BasicBlock *InsertBlock, InlineFunctionInfo &IFI) { Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); - IRBuilder<> Builder(InsertBlock->begin()); + IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); @@ -851,9 +954,8 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, // Starting from the top, rebuild the nodes to point to the new inlined-at // location (then rebuilding the rest of the chain behind it) and update the // map of already-constructed inlined-at nodes. - for (auto I = InlinedAtLocations.rbegin(), E = InlinedAtLocations.rend(); - I != E; ++I) { - const DILocation *MD = *I; + for (const DILocation *MD : make_range(InlinedAtLocations.rbegin(), + InlinedAtLocations.rend())) { Last = IANodes[MD] = DILocation::getDistinct( Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); } @@ -917,7 +1019,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, - bool InsertLifetime) { + AAResults *CalleeAAR, bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); @@ -930,6 +1032,22 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; + // The inliner does not know how to inline through calls with operand bundles + // in general ... + if (CS.hasOperandBundles()) { + for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { + uint32_t Tag = CS.getOperandBundleAt(i).getTagID(); + // ... but it knows how to inline through "deopt" operand bundles ... + if (Tag == LLVMContext::OB_deopt) + continue; + // ... and "funclet" operand bundles. + if (Tag == LLVMContext::OB_funclet) + continue; + + return false; + } + } + // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); @@ -950,13 +1068,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Get the personality function from the callee if it contains a landing pad. Constant *CalledPersonality = - CalledFunc->hasPersonalityFn() ? CalledFunc->getPersonalityFn() : nullptr; + CalledFunc->hasPersonalityFn() + ? CalledFunc->getPersonalityFn()->stripPointerCasts() + : nullptr; // Find the personality function used by the landing pads of the caller. If it // exists, then check to see that it matches the personality function used in // the callee. Constant *CallerPersonality = - Caller->hasPersonalityFn() ? Caller->getPersonalityFn() : nullptr; + Caller->hasPersonalityFn() + ? Caller->getPersonalityFn()->stripPointerCasts() + : nullptr; if (CalledPersonality) { if (!CallerPersonality) Caller->setPersonalityFn(CalledPersonality); @@ -968,9 +1090,46 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, return false; } + // We need to figure out which funclet the callsite was in so that we may + // properly nest the callee. + Instruction *CallSiteEHPad = nullptr; + if (CallerPersonality) { + EHPersonality Personality = classifyEHPersonality(CallerPersonality); + if (isFuncletEHPersonality(Personality)) { + Optional<OperandBundleUse> ParentFunclet = + CS.getOperandBundle(LLVMContext::OB_funclet); + if (ParentFunclet) + CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front()); + + // OK, the inlining site is legal. What about the target function? + + if (CallSiteEHPad) { + if (Personality == EHPersonality::MSVC_CXX) { + // The MSVC personality cannot tolerate catches getting inlined into + // cleanup funclets. + if (isa<CleanupPadInst>(CallSiteEHPad)) { + // Ok, the call site is within a cleanuppad. Let's check the callee + // for catchpads. + for (const BasicBlock &CalledBB : *CalledFunc) { + if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI())) + return false; + } + } + } else if (isAsynchronousEHPersonality(Personality)) { + // SEH is even less tolerant, there may not be any sort of exceptional + // funclet in the callee. + for (const BasicBlock &CalledBB : *CalledFunc) { + if (CalledBB.isEHPad()) + return false; + } + } + } + } + } + // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. - Function::iterator LastBlock = &Caller->back(); + Function::iterator LastBlock = --Caller->end(); // Make sure to capture all of the return instructions from the cloned // function. @@ -1007,7 +1166,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); } - VMap[I] = ActualArg; + VMap[&*I] = ActualArg; } // Add alignment assumptions if necessary. We do this before the inlined @@ -1029,7 +1188,61 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Inject byval arguments initialization. for (std::pair<Value*, Value*> &Init : ByValInit) HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), - FirstNewBlock, IFI); + &*FirstNewBlock, IFI); + + Optional<OperandBundleUse> ParentDeopt = + CS.getOperandBundle(LLVMContext::OB_deopt); + if (ParentDeopt) { + SmallVector<OperandBundleDef, 2> OpDefs; + + for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { + Instruction *I = dyn_cast_or_null<Instruction>(VH); + if (!I) continue; // instruction was DCE'd or RAUW'ed to undef + + OpDefs.clear(); + + CallSite ICS(I); + OpDefs.reserve(ICS.getNumOperandBundles()); + + for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) { + auto ChildOB = ICS.getOperandBundleAt(i); + if (ChildOB.getTagID() != LLVMContext::OB_deopt) { + // If the inlined call has other operand bundles, let them be + OpDefs.emplace_back(ChildOB); + continue; + } + + // It may be useful to separate this logic (of handling operand + // bundles) out to a separate "policy" component if this gets crowded. + // Prepend the parent's deoptimization continuation to the newly + // inlined call's deoptimization continuation. + std::vector<Value *> MergedDeoptArgs; + MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + + ChildOB.Inputs.size()); + + MergedDeoptArgs.insert(MergedDeoptArgs.end(), + ParentDeopt->Inputs.begin(), + ParentDeopt->Inputs.end()); + MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), + ChildOB.Inputs.end()); + + OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); + } + + Instruction *NewI = nullptr; + if (isa<CallInst>(I)) + NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I); + else + NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I); + + // Note: the RAUW does the appropriate fixup in VMap, so we need to do + // this even if the call returns void. + I->replaceAllUsesWith(NewI); + + VH = nullptr; + I->eraseFromParent(); + } + } // Update the callgraph if requested. if (IFI.CG) @@ -1042,7 +1255,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CloneAliasScopeMetadata(CS, VMap); // Add noalias metadata if necessary. - AddAliasScopeMetadata(CS, VMap, DL, IFI.AA); + AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. @@ -1085,9 +1298,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. - Caller->getEntryBlock().getInstList().splice(InsertPoint, - FirstNewBlock->getInstList(), - AI, I); + Caller->getEntryBlock().getInstList().splice( + InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I); } // Move any dbg.declares describing the allocas into the entry basic block. DIBuilder DIB(*Caller->getParent()); @@ -1137,7 +1349,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (InsertLifetime && !IFI.StaticAllocas.empty()) { - IRBuilder<> builder(FirstNewBlock->begin()); + IRBuilder<> builder(&FirstNewBlock->front()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; @@ -1189,7 +1401,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. - CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) + CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) .CreateCall(StackSave, {}, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the @@ -1203,10 +1415,74 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + // Update the lexical scopes of the new funclets and callsites. + // Anything that had 'none' as its parent is now nested inside the callsite's + // EHPad. + + if (CallSiteEHPad) { + for (Function::iterator BB = FirstNewBlock->getIterator(), + E = Caller->end(); + BB != E; ++BB) { + // Add bundle operands to any top-level call sites. + SmallVector<OperandBundleDef, 1> OpBundles; + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { + Instruction *I = &*BBI++; + CallSite CS(I); + if (!CS) + continue; + + // Skip call sites which are nounwind intrinsics. + auto *CalledFn = + dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) + continue; + + // Skip call sites which already have a "funclet" bundle. + if (CS.getOperandBundle(LLVMContext::OB_funclet)) + continue; + + CS.getOperandBundlesAsDefs(OpBundles); + OpBundles.emplace_back("funclet", CallSiteEHPad); + + Instruction *NewInst; + if (CS.isCall()) + NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I); + else + NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I); + NewInst->setDebugLoc(I->getDebugLoc()); + NewInst->takeName(I); + I->replaceAllUsesWith(NewInst); + I->eraseFromParent(); + + OpBundles.clear(); + } + + Instruction *I = BB->getFirstNonPHI(); + if (!I->isEHPad()) + continue; + + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { + if (isa<ConstantTokenNone>(CatchSwitch->getParentPad())) + CatchSwitch->setParentPad(CallSiteEHPad); + } else { + auto *FPI = cast<FuncletPadInst>(I); + if (isa<ConstantTokenNone>(FPI->getParentPad())) + FPI->setParentPad(CallSiteEHPad); + } + } + } + // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. - if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) - HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); + if (auto *II = dyn_cast<InvokeInst>(TheCall)) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); + if (isa<LandingPadInst>(FirstNonPHI)) { + HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } else { + HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } + } // Handle any inlined musttail call sites. In order for a new call site to be // musttail, the source of the clone and the inlined call site must have been @@ -1250,7 +1526,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. - OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), + OrigBB->getInstList().splice(TheCall->getIterator(), + FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); @@ -1297,15 +1574,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. - AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest, - CalledFunc->getName()+".exit"); + AfterCallBB = + OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), + CalledFunc->getName() + ".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // - AfterCallBB = OrigBB->splitBasicBlock(TheCall, - CalledFunc->getName()+".exit"); + AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(), + CalledFunc->getName() + ".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first @@ -1314,14 +1592,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); - Br->setOperand(0, FirstNewBlock); - + Br->setOperand(0, &*FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. - Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), - FirstNewBlock, Caller->end()); + Caller->getBasicBlockList().splice(AfterCallBB->getIterator(), + Caller->getBasicBlockList(), FirstNewBlock, + Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. @@ -1333,7 +1611,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // possible incoming values. if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), - AfterCallBB->begin()); + &AfterCallBB->front()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); @@ -1350,7 +1628,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } - // Add a branch to the merge points and remove return instructions. DebugLoc Loc; for (unsigned i = 0, e = Returns.size(); i != e; ++i) { @@ -1413,7 +1690,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Splice the code entry block into calling block, right before the // unconditional branch. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes - OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); + OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList()); // Remove the unconditional branch. OrigBB->getInstList().erase(Br); diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp index 30edf3b..5687afa 100644 --- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -380,14 +380,10 @@ bool llvm::expandRemainder(BinaryOperator *Rem) { IRBuilder<> Builder(Rem); - Type *RemTy = Rem->getType(); - if (RemTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); - - unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); - - if (RemTyBitWidth != 32 && RemTyBitWidth != 64) - llvm_unreachable("Div of bitwidth other than 32 or 64 not supported"); + assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported"); + assert((Rem->getType()->getIntegerBitWidth() == 32 || + Rem->getType()->getIntegerBitWidth() == 64) && + "Div of bitwidth other than 32 or 64 not supported"); // First prepare the sign if it's a signed remainder if (Rem->getOpcode() == Instruction::SRem) { @@ -401,7 +397,7 @@ bool llvm::expandRemainder(BinaryOperator *Rem) { // If we didn't actually generate an urem instruction, we're done // This happens for example if the input were constant. In this case the // Builder insertion point was unchanged - if (Rem == Builder.GetInsertPoint()) + if (Rem == Builder.GetInsertPoint().getNodePtrUnchecked()) return true; BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); @@ -440,14 +436,10 @@ bool llvm::expandDivision(BinaryOperator *Div) { IRBuilder<> Builder(Div); - Type *DivTy = Div->getType(); - if (DivTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); - - unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); - - if (DivTyBitWidth != 32 && DivTyBitWidth != 64) - llvm_unreachable("Div of bitwidth other than 32 or 64 not supported"); + assert(!Div->getType()->isVectorTy() && "Div over vectors not supported"); + assert((Div->getType()->getIntegerBitWidth() == 32 || + Div->getType()->getIntegerBitWidth() == 64) && + "Div of bitwidth other than 32 or 64 not supported"); // First prepare the sign if it's a signed division if (Div->getOpcode() == Instruction::SDiv) { @@ -461,7 +453,7 @@ bool llvm::expandDivision(BinaryOperator *Div) { // If we didn't actually generate an udiv instruction, we're done // This happens for example if the input were constant. In this case the // Builder insertion point was unchanged - if (Div == Builder.GetInsertPoint()) + if (Div == Builder.GetInsertPoint().getNodePtrUnchecked()) return true; BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); @@ -492,15 +484,14 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { "Trying to expand remainder from a non-remainder function"); Type *RemTy = Rem->getType(); - if (RemTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); + assert(!RemTy->isVectorTy() && "Div over vectors not supported"); unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); - if (RemTyBitWidth > 32) - llvm_unreachable("Div of bitwidth greater than 32 not supported"); + assert(RemTyBitWidth <= 32 && + "Div of bitwidth greater than 32 not supported"); - if (RemTyBitWidth == 32) + if (RemTyBitWidth == 32) return expandRemainder(Rem); // If bitwidth smaller than 32 extend inputs, extend output and proceed @@ -542,15 +533,13 @@ bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) { "Trying to expand remainder from a non-remainder function"); Type *RemTy = Rem->getType(); - if (RemTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); + assert(!RemTy->isVectorTy() && "Div over vectors not supported"); unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); - if (RemTyBitWidth > 64) - llvm_unreachable("Div of bitwidth greater than 64 not supported"); + assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported"); - if (RemTyBitWidth == 64) + if (RemTyBitWidth == 64) return expandRemainder(Rem); // If bitwidth smaller than 64 extend inputs, extend output and proceed @@ -593,13 +582,11 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { "Trying to expand division from a non-division function"); Type *DivTy = Div->getType(); - if (DivTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); + assert(!DivTy->isVectorTy() && "Div over vectors not supported"); unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); - if (DivTyBitWidth > 32) - llvm_unreachable("Div of bitwidth greater than 32 not supported"); + assert(DivTyBitWidth <= 32 && "Div of bitwidth greater than 32 not supported"); if (DivTyBitWidth == 32) return expandDivision(Div); @@ -643,13 +630,12 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) { "Trying to expand division from a non-division function"); Type *DivTy = Div->getType(); - if (DivTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); + assert(!DivTy->isVectorTy() && "Div over vectors not supported"); unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); - if (DivTyBitWidth > 64) - llvm_unreachable("Div of bitwidth greater than 64 not supported"); + assert(DivTyBitWidth <= 64 && + "Div of bitwidth greater than 64 not supported"); if (DivTyBitWidth == 64) return expandDivision(Div); diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index 9d40b69..b4b2e14 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -31,8 +31,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -64,6 +66,13 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, PredIteratorCache &PredCache, LoopInfo *LI) { SmallVector<Use *, 16> UsesToRewrite; + // Tokens cannot be used in PHI nodes, so we skip over them. + // We can run into tokens which are live out of a loop with catchswitch + // instructions in Windows EH if the catchswitch has one catchpad which + // is inside the loop and another which is not. + if (Inst.getType()->isTokenTy()) + return false; + BasicBlock *InstBB = Inst.getParent(); for (Use &U : Inst.uses()) { @@ -84,9 +93,8 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, // Invoke instructions are special in that their result value is not available // along their unwind edge. The code below tests to see whether DomBB - // dominates - // the value, so adjust DomBB to the normal destination block, which is - // effectively where the value is first usable. + // dominates the value, so adjust DomBB to the normal destination block, + // which is effectively where the value is first usable. BasicBlock *DomBB = Inst.getParent(); if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst)) DomBB = Inv->getNormalDest(); @@ -101,10 +109,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, // Insert the LCSSA phi's into all of the exit blocks dominated by the // value, and add them to the Phi's map. - for (SmallVectorImpl<BasicBlock *>::const_iterator BBI = ExitBlocks.begin(), - BBE = ExitBlocks.end(); - BBI != BBE; ++BBI) { - BasicBlock *ExitBB = *BBI; + for (BasicBlock *ExitBB : ExitBlocks) { if (!DT.dominates(DomNode, DT.getNode(ExitBB))) continue; @@ -113,7 +118,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, continue; PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB), - Inst.getName() + ".lcssa", ExitBB->begin()); + Inst.getName() + ".lcssa", &ExitBB->front()); // Add inputs from inside the loop for this PHI. for (BasicBlock *Pred : PredCache.get(ExitBB)) { @@ -148,26 +153,26 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, // Rewrite all uses outside the loop in terms of the new PHIs we just // inserted. - for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) { + for (Use *UseToRewrite : UsesToRewrite) { // If this use is in an exit block, rewrite to use the newly inserted PHI. // This is required for correctness because SSAUpdate doesn't handle uses in // the same block. It assumes the PHI we inserted is at the end of the // block. - Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser()); + Instruction *User = cast<Instruction>(UseToRewrite->getUser()); BasicBlock *UserBB = User->getParent(); if (PHINode *PN = dyn_cast<PHINode>(User)) - UserBB = PN->getIncomingBlock(*UsesToRewrite[i]); + UserBB = PN->getIncomingBlock(*UseToRewrite); if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { // Tell the VHs that the uses changed. This updates SCEV's caches. - if (UsesToRewrite[i]->get()->hasValueHandle()) - ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin()); - UsesToRewrite[i]->set(UserBB->begin()); + if (UseToRewrite->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front()); + UseToRewrite->set(&UserBB->front()); continue; } // Otherwise, do full PHI insertion. - SSAUpdate.RewriteUse(*UsesToRewrite[i]); + SSAUpdate.RewriteUse(*UseToRewrite); } // Post process PHI instructions that were inserted into another disjoint loop @@ -190,10 +195,9 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, } // Remove PHI nodes that did not have any uses rewritten. - for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) { - if (AddedPHIs[i]->use_empty()) - AddedPHIs[i]->eraseFromParent(); - } + for (PHINode *PN : AddedPHIs) + if (PN->use_empty()) + PN->eraseFromParent(); return true; } @@ -205,8 +209,8 @@ blockDominatesAnExit(BasicBlock *BB, DominatorTree &DT, const SmallVectorImpl<BasicBlock *> &ExitBlocks) { DomTreeNode *DomNode = DT.getNode(BB); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (DT.dominates(DomNode, DT.getNode(ExitBlocks[i]))) + for (BasicBlock *ExitBB : ExitBlocks) + if (DT.dominates(DomNode, DT.getNode(ExitBB))) return true; return false; @@ -227,25 +231,22 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, // Look at all the instructions in the loop, checking to see if they have uses // outside the loop. If so, rewrite those uses. - for (Loop::block_iterator BBI = L.block_begin(), BBE = L.block_end(); - BBI != BBE; ++BBI) { - BasicBlock *BB = *BBI; - + for (BasicBlock *BB : L.blocks()) { // For large loops, avoid use-scanning by using dominance information: In // particular, if a block does not dominate any of the loop exits, then none // of the values defined in the block could be used outside the loop. if (!blockDominatesAnExit(BB, DT, ExitBlocks)) continue; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + for (Instruction &I : *BB) { // Reject two common cases fast: instructions with no uses (like stores) // and instructions with one use that is in the same block as this. - if (I->use_empty() || - (I->hasOneUse() && I->user_back()->getParent() == BB && - !isa<PHINode>(I->user_back()))) + if (I.use_empty() || + (I.hasOneUse() && I.user_back()->getParent() == BB && + !isa<PHINode>(I.user_back()))) continue; - Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache, LI); + Changed |= processInstruction(L, I, DT, ExitBlocks, PredCache, LI); } } @@ -266,8 +267,8 @@ bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, bool Changed = false; // Recurse depth-first through inner loops. - for (Loop::iterator I = L.begin(), E = L.end(); I != E; ++I) - Changed |= formLCSSARecursively(**I, DT, LI, SE); + for (Loop *SubLoop : L.getSubLoops()) + Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE); Changed |= formLCSSA(L, DT, LI, SE); return Changed; @@ -296,8 +297,10 @@ struct LCSSA : public FunctionPass { AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); AU.addPreservedID(LoopSimplifyID); - AU.addPreserved<AliasAnalysis>(); - AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); } }; } @@ -306,6 +309,8 @@ char LCSSA::ID = 0; INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) Pass *llvm::createLCSSAPass() { return new LCSSA(); } @@ -317,7 +322,8 @@ bool LCSSA::runOnFunction(Function &F) { bool Changed = false; LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - SE = getAnalysisIfAvailable<ScalarEvolution>(); + auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); + SE = SEWP ? &SEWP->getSE() : nullptr; // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index ba8af47..e75163f 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -17,10 +17,11 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" @@ -188,9 +189,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, BasicBlock *BB = SI->getParent(); // Remove entries from PHI nodes which we no longer branch to... - for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + for (BasicBlock *Succ : SI->successors()) { // Found case matching a constant operand? - BasicBlock *Succ = SI->getSuccessor(i); if (Succ == TheOnlyDest) TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest else @@ -230,6 +230,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SIDef->getValue().getZExtValue())); } + // Update make.implicit metadata to the newly-created conditional branch. + MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit); + if (MakeImplicitMD) + NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD); + // Delete the old switch. SI->eraseFromParent(); return true; @@ -283,8 +288,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI) { if (!I->use_empty() || isa<TerminatorInst>(I)) return false; - // We don't want the landingpad instruction removed by anything this general. - if (isa<LandingPadInst>(I)) + // We don't want the landingpad-like instructions removed by anything this + // general. + if (I->isEHPad()) return false; // We don't want debug info removed by anything this general, unless @@ -414,6 +420,49 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, return false; } +static bool +simplifyAndDCEInstruction(Instruction *I, + SmallSetVector<Instruction *, 16> &WorkList, + const DataLayout &DL, + const TargetLibraryInfo *TLI) { + if (isInstructionTriviallyDead(I, TLI)) { + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *OpV = I->getOperand(i); + I->setOperand(i, nullptr); + + if (!OpV->use_empty() || I == OpV) + continue; + + // If the operand is an instruction that became dead as we nulled out the + // operand, and if it is 'trivially' dead, delete it in a future loop + // iteration. + if (Instruction *OpI = dyn_cast<Instruction>(OpV)) + if (isInstructionTriviallyDead(OpI, TLI)) + WorkList.insert(OpI); + } + + I->eraseFromParent(); + + return true; + } + + if (Value *SimpleV = SimplifyInstruction(I, DL)) { + // Add the users to the worklist. CAREFUL: an instruction can use itself, + // in the case of a phi node. + for (User *U : I->users()) + if (U != I) + WorkList.insert(cast<Instruction>(U)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + I->eraseFromParent(); + return true; + } + return false; +} + /// SimplifyInstructionsInBlock - Scan the specified basic block and try to /// simplify any instructions in it and recursively delete dead instructions. /// @@ -422,30 +471,34 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool MadeChange = false; + const DataLayout &DL = BB->getModule()->getDataLayout(); #ifndef NDEBUG // In debug builds, ensure that the terminator of the block is never replaced // or deleted by these simplifications. The idea of simplification is that it // cannot introduce new instructions, and there is no way to replace the // terminator of a block without introducing a new instruction. - AssertingVH<Instruction> TerminatorVH(--BB->end()); + AssertingVH<Instruction> TerminatorVH(&BB->back()); #endif - for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) { + SmallSetVector<Instruction *, 16> WorkList; + // Iterate over the original function, only adding insts to the worklist + // if they actually need to be revisited. This avoids having to pre-init + // the worklist with the entire function's worth of instructions. + for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); BI != E;) { assert(!BI->isTerminator()); - Instruction *Inst = BI++; + Instruction *I = &*BI; + ++BI; - WeakVH BIHandle(BI); - if (recursivelySimplifyInstruction(Inst, TLI)) { - MadeChange = true; - if (BIHandle != BI) - BI = BB->begin(); - continue; - } + // We're visiting this instruction now, so make sure it's not in the + // worklist from an earlier visit. + if (!WorkList.count(I)) + MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); + } - MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); - if (BIHandle != BI) - BI = BB->begin(); + while (!WorkList.empty()) { + Instruction *I = WorkList.pop_back_val(); + MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); } return MadeChange; } @@ -808,7 +861,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { // Copy over any phi, debug or lifetime instruction. BB->getTerminator()->eraseFromParent(); - Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList()); + Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), + BB->getInstList()); } else { while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. @@ -1017,8 +1071,13 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (LdStHasDebugValue(DIVar, LI)) return true; - Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, - DDI->getDebugLoc(), LI); + // We are now tracking the loaded value instead of the address. In the + // future if multi-location support is added to the IR, it might be + // preferable to keep tracking both the loaded value and the original + // address in case the alloca can not be elided. + Instruction *DbgValue = Builder.insertDbgValueIntrinsic( + LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr); + DbgValue->insertAfter(LI); return true; } @@ -1034,8 +1093,8 @@ bool llvm::LowerDbgDeclare(Function &F) { DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); SmallVector<DbgDeclareInst *, 4> Dbgs; for (auto &FI : F) - for (BasicBlock::iterator BI : FI) - if (auto DDI = dyn_cast<DbgDeclareInst>(BI)) + for (Instruction &BI : FI) + if (auto DDI = dyn_cast<DbgDeclareInst>(&BI)) Dbgs.push_back(DDI); if (Dbgs.empty()) @@ -1060,9 +1119,13 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. + SmallVector<uint64_t, 1> NewDIExpr; + auto *DIExpr = DDI->getExpression(); + NewDIExpr.push_back(dwarf::DW_OP_deref); + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(), - DDI->getExpression(), DDI->getDebugLoc(), - CI); + DIB.createExpression(NewDIExpr), + DDI->getDebugLoc(), CI); } DDI->eraseFromParent(); } @@ -1082,9 +1145,10 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { return nullptr; } -bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder, bool Deref) { - DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); +bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, + Instruction *InsertBefore, DIBuilder &Builder, + bool Deref, int Offset) { + DbgDeclareInst *DDI = FindAllocaDbgDeclare(Address); if (!DDI) return false; DebugLoc Loc = DDI->getDebugLoc(); @@ -1092,29 +1156,40 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); - if (Deref) { + if (Deref || Offset) { // Create a copy of the original DIDescriptor for user variable, prepending // "deref" operation to a list of address elements, as new llvm.dbg.declare // will take a value storing address of the memory for variable, not // alloca itself. SmallVector<uint64_t, 4> NewDIExpr; - NewDIExpr.push_back(dwarf::DW_OP_deref); + if (Deref) + NewDIExpr.push_back(dwarf::DW_OP_deref); + if (Offset > 0) { + NewDIExpr.push_back(dwarf::DW_OP_plus); + NewDIExpr.push_back(Offset); + } else if (Offset < 0) { + NewDIExpr.push_back(dwarf::DW_OP_minus); + NewDIExpr.push_back(-Offset); + } if (DIExpr) NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIExpr = Builder.createExpression(NewDIExpr); } - // Insert llvm.dbg.declare in the same basic block as the original alloca, - // and remove old llvm.dbg.declare. - BasicBlock *BB = AI->getParent(); - Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB); + // Insert llvm.dbg.declare immediately after the original alloca, and remove + // old llvm.dbg.declare. + Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); DDI->eraseFromParent(); return true; } -/// changeToUnreachable - Insert an unreachable instruction before the specified -/// instruction, making it and the rest of the code in the block dead. -static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { +bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, + DIBuilder &Builder, bool Deref, int Offset) { + return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder, + Deref, Offset); +} + +void llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. @@ -1132,7 +1207,7 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { new UnreachableInst(I->getContext(), I); // All instructions after this are dead. - BasicBlock::iterator BBI = I, BBE = BB->end(); + BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end(); while (BBI != BBE) { if (!BBI->use_empty()) BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); @@ -1142,8 +1217,11 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { /// changeToCall - Convert the specified invoke into a normal call. static void changeToCall(InvokeInst *II) { - SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); - CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); + SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end()); + SmallVector<OperandBundleDef, 1> OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles, + "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); @@ -1162,7 +1240,7 @@ static bool markAliveBlocks(Function &F, SmallPtrSetImpl<BasicBlock*> &Reachable) { SmallVector<BasicBlock*, 128> Worklist; - BasicBlock *BB = F.begin(); + BasicBlock *BB = &F.front(); Worklist.push_back(BB); Reachable.insert(BB); bool Changed = false; @@ -1187,7 +1265,7 @@ static bool markAliveBlocks(Function &F, if (MakeUnreachable) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(BBI, false); + changeToUnreachable(&*BBI, false); Changed = true; break; } @@ -1201,7 +1279,7 @@ static bool markAliveBlocks(Function &F, ++BBI; if (!isa<UnreachableInst>(BBI)) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(BBI, false); + changeToUnreachable(&*BBI, false); Changed = true; } break; @@ -1253,6 +1331,40 @@ static bool markAliveBlocks(Function &F, return Changed; } +void llvm::removeUnwindEdge(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + + if (auto *II = dyn_cast<InvokeInst>(TI)) { + changeToCall(II); + return; + } + + TerminatorInst *NewTI; + BasicBlock *UnwindDest; + + if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) { + NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI); + UnwindDest = CRI->getUnwindDest(); + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) { + auto *NewCatchSwitch = CatchSwitchInst::Create( + CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(), + CatchSwitch->getName(), CatchSwitch); + for (BasicBlock *PadBB : CatchSwitch->handlers()) + NewCatchSwitch->addHandler(PadBB); + + NewTI = NewCatchSwitch; + UnwindDest = CatchSwitch->getUnwindDest(); + } else { + llvm_unreachable("Could not find unwind successor"); + } + + NewTI->takeName(TI); + NewTI->setDebugLoc(TI->getDebugLoc()); + UnwindDest->removePredecessor(BB); + TI->replaceAllUsesWith(NewTI); + TI->eraseFromParent(); +} + /// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. @@ -1270,17 +1382,18 @@ bool llvm::removeUnreachableBlocks(Function &F) { // Loop over all of the basic blocks that are not reachable, dropping all of // their internal references... for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { - if (Reachable.count(BB)) + if (Reachable.count(&*BB)) continue; - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + for (succ_iterator SI = succ_begin(&*BB), SE = succ_end(&*BB); SI != SE; + ++SI) if (Reachable.count(*SI)) - (*SI)->removePredecessor(BB); + (*SI)->removePredecessor(&*BB); BB->dropAllReferences(); } for (Function::iterator I = ++F.begin(); I != F.end();) - if (!Reachable.count(I)) + if (!Reachable.count(&*I)) I = F.getBasicBlockList().erase(I); else ++I; @@ -1288,9 +1401,10 @@ bool llvm::removeUnreachableBlocks(Function &F) { return true; } -void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs) { +void llvm::combineMetadata(Instruction *K, const Instruction *J, + ArrayRef<unsigned> KnownIDs) { SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata; - K->dropUnknownMetadata(KnownIDs); + K->dropUnknownNonDebugMetadata(KnownIDs); K->getAllMetadataOtherThanDebugLoc(Metadata); for (unsigned i = 0, n = Metadata.size(); i < n; ++i) { unsigned Kind = Metadata[i].first; @@ -1326,8 +1440,29 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign // Only set the !nonnull if it is present in both instructions. K->setMetadata(Kind, JMD); break; + case LLVMContext::MD_invariant_group: + // Preserve !invariant.group in K. + break; + case LLVMContext::MD_align: + K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; } } + // Set !invariant.group from J if J has it. If both instructions have it + // then we will just pick it from J - even when they are different. + // Also make sure that K is load or store - f.e. combining bitcast with load + // could produce bitcast with invariant.group metadata, which is invalid. + // FIXME: we should try to preserve both invariant.group md if they are + // different, but right now instruction can only have one invariant.group. + if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group)) + if (isa<LoadInst>(K) || isa<StoreInst>(K)) + K->setMetadata(LLVMContext::MD_invariant_group, JMD); } unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, @@ -1349,3 +1484,40 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, } return Count; } + +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlock *BB) { + assert(From->getType() == To->getType()); + + unsigned Count = 0; + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE;) { + Use &U = *UI++; + auto *I = cast<Instruction>(U.getUser()); + if (DT.dominates(BB, I->getParent())) { + U.set(To); + DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " + << *To << " in " << *U << "\n"); + ++Count; + } + } + return Count; +} + +bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { + if (isa<IntrinsicInst>(CS.getInstruction())) + // Most LLVM intrinsics are things which can never take a safepoint. + // As a result, we don't need to have the stack parsable at the + // callsite. This is a highly useful optimization since intrinsic + // calls are fairly prevalent, particularly in debug builds. + return true; + + // Check if the function is specifically marked as a gc leaf function. + // + // TODO: we should be checking the attributes on the call site as well. + if (const Function *F = CS.getCalledFunction()) + return F->hasFnAttribute("gc-leaf-function"); + + return false; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 5c98043..1fa4695 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -44,11 +44,14 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -78,7 +81,7 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB, SmallVectorImpl<BasicBlock *> &SplitPreds, Loop *L) { // Check to see if NewBB is already well placed. - Function::iterator BBI = NewBB; --BBI; + Function::iterator BBI = --NewBB->getIterator(); for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { if (&*BBI == SplitPreds[i]) return; @@ -92,9 +95,8 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB, // block that neighbors a BB actually in the loop. BasicBlock *FoundBB = nullptr; for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { - Function::iterator BBI = SplitPreds[i]; - if (++BBI != NewBB->getParent()->end() && - L->contains(BBI)) { + Function::iterator BBI = SplitPreds[i]->getIterator(); + if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) { FoundBB = SplitPreds[i]; break; } @@ -112,17 +114,10 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB, /// preheader, this method is called to insert one. This method has two phases: /// preheader insertion and analysis updating. /// -BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { +BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Header = L->getHeader(); - // Get analyses that we try to update. - auto *AA = PP->getAnalysisIfAvailable<AliasAnalysis>(); - auto *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *LIWP = PP->getAnalysisIfAvailable<LoopInfoWrapperPass>(); - auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); - // Compute the set of predecessors of the loop that are not in the loop. SmallVector<BasicBlock*, 8> OutsideBlocks; for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); @@ -141,8 +136,10 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { // Split out the loop pre-header. BasicBlock *PreheaderBB; - PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", - AA, DT, LI, PreserveLCSSA); + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT, + LI, PreserveLCSSA); + if (!PreheaderBB) + return nullptr; DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << PreheaderBB->getName() << "\n"); @@ -159,8 +156,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { /// This method is used to split exit blocks that have predecessors outside of /// the loop. static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, - AliasAnalysis *AA, DominatorTree *DT, - LoopInfo *LI, Pass *PP) { + DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { SmallVector<BasicBlock*, 8> LoopBlocks; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; @@ -175,10 +172,10 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); BasicBlock *NewExitBB = nullptr; - bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); - - NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", AA, DT, - LI, PreserveLCSSA); + NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", DT, LI, + PreserveLCSSA); + if (!NewExitBB) + return nullptr; DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " << NewExitBB->getName() << "\n"); @@ -206,8 +203,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, /// \brief The first part of loop-nestification is to find a PHI node that tells /// us how to partition the loops. -static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, - DominatorTree *DT, +static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, AssumptionCache *AC) { const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { @@ -216,7 +212,6 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); - if (AA) AA->deleteValue(PN); PN->eraseFromParent(); continue; } @@ -251,18 +246,18 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, /// created. /// static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, - AliasAnalysis *AA, DominatorTree *DT, - LoopInfo *LI, ScalarEvolution *SE, Pass *PP, + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, bool PreserveLCSSA, AssumptionCache *AC) { // Don't try to separate loops without a preheader. if (!Preheader) return nullptr; // The header is not a landing pad; preheader insertion should ensure this. - assert(!L->getHeader()->isLandingPad() && - "Can't insert backedge to landing pad"); + BasicBlock *Header = L->getHeader(); + assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); - PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AC); + PHINode *PN = findPHIToPartitionLoops(L, DT, AC); if (!PN) return nullptr; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This @@ -286,11 +281,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, if (SE) SE->forgetLoop(L); - bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); - - BasicBlock *Header = L->getHeader(); BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", - AA, DT, LI, PreserveLCSSA); + DT, LI, PreserveLCSSA); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -357,7 +349,6 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, /// and have that block branch to the loop header. This ensures that loops /// have exactly one backedge. static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, - AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); @@ -369,8 +360,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, if (!Preheader) return nullptr; - // The header is not a landing pad; preheader insertion should ensure this. - assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); + // The header is not an EH pad; preheader insertion should ensure this. + assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; @@ -394,7 +385,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. - Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; + Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator(); F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); // Now that the block has been inserted into the function, create PHI nodes in @@ -443,7 +434,6 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // eliminate the PHI Node. if (HasUniqueIncomingValue) { NewPN->replaceAllUsesWith(UniqueValue); - if (AA) AA->deleteValue(NewPN); BEBlock->getInstList().erase(NewPN); } } @@ -470,15 +460,10 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, } /// \brief Simplify one loop and queue further loops for simplification. -/// -/// FIXME: Currently this accepts both lots of analyses that it uses and a raw -/// Pass pointer. The Pass pointer is used by numerous utilities to update -/// specific analyses. Rather than a pass it would be much cleaner and more -/// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, - AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, Pass *PP, - AssumptionCache *AC) { + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, + bool PreserveLCSSA) { bool Changed = false; ReprocessLoop: @@ -544,7 +529,7 @@ ReprocessLoop: // Does the loop already have a preheader? If so, don't insert one. BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { - Preheader = InsertPreheaderForLoop(L, PP); + Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); if (Preheader) { ++NumInserted; Changed = true; @@ -568,7 +553,7 @@ ReprocessLoop: // Must be exactly this loop: no subloops, parent loops, or non-loop preds // allowed. if (!L->contains(*PI)) { - if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) { + if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA)) { ++NumInserted; Changed = true; } @@ -585,7 +570,7 @@ ReprocessLoop: // common backedge instead. if (L->getNumBackEdges() < 8) { if (Loop *OuterL = - separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP, AC)) { + separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) { ++NumNested; // Enqueue the outer loop as it should be processed next in our // depth-first nest walk. @@ -602,7 +587,7 @@ ReprocessLoop: // If we either couldn't, or didn't want to, identify nesting of the loops, // insert a new block that all backedges target, then make it jump to the // loop header. - LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI); + LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI); if (LoopLatch) { ++NumInserted; Changed = true; @@ -618,7 +603,6 @@ ReprocessLoop: for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { - if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -654,7 +638,7 @@ ReprocessLoop: bool AllInvariant = true; bool AnyInvariant = false; for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { - Instruction *Inst = I++; + Instruction *Inst = &*I++; // Skip debug info intrinsics. if (isa<DbgInfoIntrinsic>(Inst)) continue; @@ -716,9 +700,9 @@ ReprocessLoop: return Changed; } -bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, - AliasAnalysis *AA, ScalarEvolution *SE, - AssumptionCache *AC) { +bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, + bool PreserveLCSSA) { bool Changed = false; // Worklist maintains our depth-first queue of loops in this nest to process. @@ -734,8 +718,8 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, } while (!Worklist.empty()) - Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, - SE, PP, AC); + Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE, + AC, PreserveLCSSA); return Changed; } @@ -747,9 +731,6 @@ namespace { initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); } - // AA - If we have an alias analysis object to update, this is it, otherwise - // this is null. - AliasAnalysis *AA; DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; @@ -767,8 +748,11 @@ namespace { AU.addRequired<LoopInfoWrapperPass>(); AU.addPreserved<LoopInfoWrapperPass>(); - AU.addPreserved<AliasAnalysis>(); - AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); AU.addPreserved<DependenceAnalysis>(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. } @@ -784,6 +768,9 @@ INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops", false, false) @@ -796,15 +783,16 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } /// bool LoopSimplify::runOnFunction(Function &F) { bool Changed = false; - AA = getAnalysisIfAvailable<AliasAnalysis>(); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - SE = getAnalysisIfAvailable<ScalarEvolution>(); + auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); + SE = SEWP ? &SEWP->getSE() : nullptr; AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, AC); + Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA); return Changed; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 1dbce47..2499b88 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -73,7 +73,7 @@ static inline void RemapInstruction(Instruction *I, /// of loops that have already been forgotten to prevent redundant, expensive /// calls to ScalarEvolution::forgetLoop. Returns the new combined block. static BasicBlock * -FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, +FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE, SmallPtrSetImpl<Loop *> &ForgottenLoops) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and @@ -109,12 +109,10 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, // Erase basic block from the function... // ScalarEvolution holds references to loop exit blocks. - if (LPM) { - if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) { - if (Loop *L = LI->getLoopFor(BB)) { - if (ForgottenLoops.insert(L).second) - SE->forgetLoop(L); - } + if (SE) { + if (Loop *L = LI->getLoopFor(BB)) { + if (ForgottenLoops.insert(L).second) + SE->forgetLoop(L); } } LI->removeBlock(BB); @@ -155,15 +153,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, /// /// The LoopInfo Analysis that is passed will be kept consistent. /// -/// If a LoopPassManager is passed in, and the loop is fully removed, it will be -/// removed from the LoopPassManager as well. LPM can also be NULL. -/// -/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are -/// available from the Pass it must also preserve those analyses. +/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and +/// DominatorTree if they are non-null. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, bool AllowExpensiveTripCount, - unsigned TripMultiple, LoopInfo *LI, Pass *PP, - LPPassManager *LPM, AssumptionCache *AC) { + unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, AssumptionCache *AC, + bool PreserveLCSSA) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -220,6 +216,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; + SmallVector<BasicBlock *, 4> ExitBlocks; + L->getExitBlocks(ExitBlocks); + Loop *ParentL = L->getParentLoop(); + bool AllExitsAreInsideParentLoop = !ParentL || + std::all_of(ExitBlocks.begin(), ExitBlocks.end(), + [&](BasicBlock *BB) { return ParentL->contains(BB); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime @@ -227,13 +229,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); if (RuntimeTripCount && - !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM)) + !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT, + PreserveLCSSA)) return false; // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. - ScalarEvolution *SE = - PP ? PP->getAnalysisIfAvailable<ScalarEvolution>() : nullptr; if (SE) SE->forgetLoop(L); @@ -392,7 +393,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) - ::RemapInstruction(I, LastValueMap); + ::RemapInstruction(&*I, LastValueMap); } // Loop over the PHI nodes in the original block, setting incoming values. @@ -432,8 +433,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // For a complete unroll, make the last iteration end with a branch // to the exit block. - if (CompletelyUnroll && j == 0) { - Dest = LoopExit; + if (CompletelyUnroll) { + if (j == 0) + Dest = LoopExit; NeedConditional = false; } @@ -473,7 +475,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM, + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } @@ -483,29 +485,24 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // whole function's cache. AC->clear(); - DominatorTree *DT = nullptr; - if (PP) { - // FIXME: Reconstruct dom info, because it is not preserved properly. - // Incrementally updating domtree after loop unrolling would be easy. - if (DominatorTreeWrapperPass *DTWP = - PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { - DT = &DTWP->getDomTree(); - DT->recalculate(*L->getHeader()->getParent()); - } - - // Simplify any new induction variables in the partially unrolled loop. - if (SE && !CompletelyUnroll) { - SmallVector<WeakVH, 16> DeadInsts; - simplifyLoopIVs(L, SE, LPM, DeadInsts); - - // Aggressively clean up dead instructions that simplifyLoopIVs already - // identified. Any remaining should be cleaned up below. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); - } + // FIXME: Reconstruct dom info, because it is not preserved properly. + // Incrementally updating domtree after loop unrolling would be easy. + if (DT) + DT->recalculate(*L->getHeader()->getParent()); + + // Simplify any new induction variables in the partially unrolled loop. + if (SE && !CompletelyUnroll) { + SmallVector<WeakVH, 16> DeadInsts; + simplifyLoopIVs(L, SE, DT, LI, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); } + // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. @@ -514,7 +511,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { - Instruction *Inst = I++; + Instruction *Inst = &*I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); @@ -529,29 +526,33 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, ++NumUnrolled; Loop *OuterL = L->getParentLoop(); - // Remove the loop from the LoopPassManager if it's completely removed. - if (CompletelyUnroll && LPM != nullptr) - LPM->deleteLoopFromQueue(L); + // Update LoopInfo if the loop is completely removed. + if (CompletelyUnroll) + LI->updateUnloop(L);; // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. - if (PP && DT) { + if (DT) { if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { - simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, AC); + bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after - // deleteLoopFromQueue updates LoopInfo. + // LoopInfo's been updated by updateUnloop. Loop *LatchLoop = LI->getLoopFor(Latches.back()); if (!OuterL->contains(LatchLoop)) while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); - formLCSSARecursively(*OuterL, *DT, LI, SE); + if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified)) + formLCSSARecursively(*OuterL, *DT, LI, SE); + else + assert(OuterL->isLCSSAForm(*DT) && + "Loops should be in LCSSA form after loop-unroll."); } } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index add5432..0d68f18 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -62,8 +62,8 @@ STATISTIC(NumRuntimeUnrolled, static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *LastPrologBB, BasicBlock *PrologEnd, BasicBlock *OrigPH, BasicBlock *NewPH, - ValueToValueMapTy &VMap, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI, Pass *P) { + ValueToValueMapTy &VMap, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); @@ -127,8 +127,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); - SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI, - P->mustPreserveAnalysisID(LCSSAID)); + SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, + PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, Exit, NewPH); InsertPt->eraseFromParent(); @@ -150,7 +150,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); - Loop *NewLoop = 0; + Loop *NewLoop = nullptr; Loop *ParentLoop = L->getParentLoop(); if (!UnrollProlog) { NewLoop = new Loop(); @@ -206,9 +206,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *NewPHI = cast<PHINode>(VMap[I]); + PHINode *NewPHI = cast<PHINode>(VMap[&*I]); if (UnrollProlog) { - VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); + VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI); } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); @@ -279,7 +279,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, /// bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, bool AllowExpensiveTripCount, LoopInfo *LI, - LPPassManager *LPM) { + ScalarEvolution *SE, DominatorTree *DT, + bool PreserveLCSSA) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) return false; @@ -291,9 +292,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, // Use Scalar Evolution to compute the trip count. This allows more // loops to be unrolled than relying on induction var simplification - if (!LPM) - return false; - ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); if (!SE) return false; @@ -308,7 +306,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, // Add 1 since the backedge count doesn't include the first loop iteration const SCEV *TripCountSC = - SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); + SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; @@ -333,10 +331,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); - // Grab analyses that we preserve. - auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - BasicBlock *PH = L->getLoopPreheader(); BasicBlock *Latch = L->getLoopLatch(); // It helps to splits the original preheader twice, one for the end of the @@ -397,8 +391,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, VMap, LI); // Insert the cloned blocks into function just before the original loop - F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0], - F->end()); + F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(), + NewBlocks[0]->getIterator(), F->end()); // Rewrite the cloned instruction operands to use the values // created when the clone is created. @@ -406,7 +400,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) { - RemapInstruction(I, VMap, + RemapInstruction(&*I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); } } @@ -414,8 +408,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, // Connect the prolog code to the original loop and update the // PHI functions. BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]); - ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, - /*AliasAnalysis*/ nullptr, DT, LI, LPM->getAsPass()); + ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI, + PreserveLCSSA); NumRuntimeUnrolled++; return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp index 5cbde94..e038805 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -34,6 +34,124 @@ bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, return true; } +bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { + switch (Kind) { + default: + break; + case RK_IntegerAdd: + case RK_IntegerMult: + case RK_IntegerOr: + case RK_IntegerAnd: + case RK_IntegerXor: + case RK_IntegerMinMax: + return true; + } + return false; +} + +bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { + return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); +} + +bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { + switch (Kind) { + default: + break; + case RK_IntegerAdd: + case RK_IntegerMult: + case RK_FloatAdd: + case RK_FloatMult: + return true; + } + return false; +} + +Instruction * +RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, + SmallPtrSetImpl<Instruction *> &Visited, + SmallPtrSetImpl<Instruction *> &CI) { + if (!Phi->hasOneUse()) + return Phi; + + const APInt *M = nullptr; + Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); + + // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT + // with a new integer type of the corresponding bit width. + if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)), + m_And(m_APInt(M), m_Instruction(I))))) { + int32_t Bits = (*M + 1).exactLogBase2(); + if (Bits > 0) { + RT = IntegerType::get(Phi->getContext(), Bits); + Visited.insert(Phi); + CI.insert(J); + return J; + } + } + return Phi; +} + +bool RecurrenceDescriptor::getSourceExtensionKind( + Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, + SmallPtrSetImpl<Instruction *> &Visited, + SmallPtrSetImpl<Instruction *> &CI) { + + SmallVector<Instruction *, 8> Worklist; + bool FoundOneOperand = false; + unsigned DstSize = RT->getPrimitiveSizeInBits(); + Worklist.push_back(Exit); + + // Traverse the instructions in the reduction expression, beginning with the + // exit value. + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + for (Use &U : I->operands()) { + + // Terminate the traversal if the operand is not an instruction, or we + // reach the starting value. + Instruction *J = dyn_cast<Instruction>(U.get()); + if (!J || J == Start) + continue; + + // Otherwise, investigate the operation if it is also in the expression. + if (Visited.count(J)) { + Worklist.push_back(J); + continue; + } + + // If the operand is not in Visited, it is not a reduction operation, but + // it does feed into one. Make sure it is either a single-use sign- or + // zero-extend instruction. + CastInst *Cast = dyn_cast<CastInst>(J); + bool IsSExtInst = isa<SExtInst>(J); + if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst)) + return false; + + // Ensure the source type of the extend is no larger than the reduction + // type. It is not necessary for the types to be identical. + unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); + if (SrcSize > DstSize) + return false; + + // Furthermore, ensure that all such extends are of the same kind. + if (FoundOneOperand) { + if (IsSigned != IsSExtInst) + return false; + } else { + FoundOneOperand = true; + IsSigned = IsSExtInst; + } + + // Lastly, if the source type of the extend matches the reduction type, + // add the extend to CI so that we can avoid accounting for it in the + // cost model. + if (SrcSize == DstSize) + CI.insert(Cast); + } + } + return true; +} + bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop, bool HasFunNoNaNAttr, RecurrenceDescriptor &RedDes) { @@ -68,10 +186,32 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, unsigned NumCmpSelectPatternInst = 0; InstDesc ReduxDesc(false, nullptr); + // Data used for determining if the recurrence has been type-promoted. + Type *RecurrenceType = Phi->getType(); + SmallPtrSet<Instruction *, 4> CastInsts; + Instruction *Start = Phi; + bool IsSigned = false; + SmallPtrSet<Instruction *, 8> VisitedInsts; SmallVector<Instruction *, 8> Worklist; - Worklist.push_back(Phi); - VisitedInsts.insert(Phi); + + // Return early if the recurrence kind does not match the type of Phi. If the + // recurrence kind is arithmetic, we attempt to look through AND operations + // resulting from the type promotion performed by InstCombine. Vector + // operations are not limited to the legal integer widths, so we may be able + // to evaluate the reduction in the narrower width. + if (RecurrenceType->isFloatingPointTy()) { + if (!isFloatingPointRecurrenceKind(Kind)) + return false; + } else { + if (!isIntegerRecurrenceKind(Kind)) + return false; + if (isArithmeticRecurrenceKind(Kind)) + Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); + } + + Worklist.push_back(Start); + VisitedInsts.insert(Start); // A value in the reduction can be used: // - By the reduction: @@ -110,10 +250,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) return false; - // Any reduction instruction must be of one of the allowed kinds. - ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); - if (!ReduxDesc.isRecurrence()) - return false; + // Any reduction instruction must be of one of the allowed kinds. We ignore + // the starting value (the Phi or an AND instruction if the Phi has been + // type-promoted). + if (Cur != Start) { + ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); + if (!ReduxDesc.isRecurrence()) + return false; + } // A reduction operation must only have one use of the reduction value. if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && @@ -131,7 +275,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, ++NumCmpSelectPatternInst; // Check whether we found a reduction operator. - FoundReduxOp |= !IsAPhi; + FoundReduxOp |= !IsAPhi && Cur != Start; // Process users of current instruction. Push non-PHI nodes after PHI nodes // onto the stack. This way we are going to have seen all inputs to PHI @@ -193,6 +337,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) return false; + // If we think Phi may have been type-promoted, we also need to ensure that + // all source operands of the reduction are either SExtInsts or ZEstInsts. If + // so, we will be able to evaluate the reduction in the narrower bit width. + if (Start != Phi) + if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, + IsSigned, VisitedInsts, CastInsts)) + return false; + // We found a reduction var if we have reached the original phi node and we // only have a single instruction with out-of-loop users. @@ -200,9 +352,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, // is saved as part of the RecurrenceDescriptor. // Save the description of this reduction variable. - RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, - ReduxDesc.getMinMaxKind()); - + RecurrenceDescriptor RD( + RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), + ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); RedDes = RD; return true; @@ -263,14 +415,14 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr) { bool FP = I->getType()->isFloatingPointTy(); - bool FastMath = FP && I->hasUnsafeAlgebra(); + Instruction *UAI = Prev.getUnsafeAlgebraInst(); + if (!UAI && FP && !I->hasUnsafeAlgebra()) + UAI = I; // Found an unsafe (unvectorizable) algebra instruction. + switch (I->getOpcode()) { default: return InstDesc(false, I); case Instruction::PHI: - if (FP && - (Kind != RK_FloatMult && Kind != RK_FloatAdd && Kind != RK_FloatMinMax)) - return InstDesc(false, I); return InstDesc(I, Prev.getMinMaxKind()); case Instruction::Sub: case Instruction::Add: @@ -284,10 +436,10 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, case Instruction::Xor: return InstDesc(Kind == RK_IntegerXor, I); case Instruction::FMul: - return InstDesc(Kind == RK_FloatMult && FastMath, I); + return InstDesc(Kind == RK_FloatMult, I, UAI); case Instruction::FSub: case Instruction::FAdd: - return InstDesc(Kind == RK_FloatAdd && FastMath, I); + return InstDesc(Kind == RK_FloatAdd, I, UAI); case Instruction::FCmp: case Instruction::ICmp: case Instruction::Select: @@ -442,6 +594,13 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, break; } + // We only match FP sequences with unsafe algebra, so we can unconditionally + // set it on any generated instructions. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + FastMathFlags FMF; + FMF.setUnsafeAlgebra(); + Builder.SetFastMathFlags(FMF); + Value *Cmp; if (RK == MRK_FloatMin || RK == MRK_FloatMax) Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); @@ -452,8 +611,54 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, return Select; } -bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, - ConstantInt *&StepValue) { +InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, + ConstantInt *Step) + : StartValue(Start), IK(K), StepValue(Step) { + assert(IK != IK_NoInduction && "Not an induction"); + assert(StartValue && "StartValue is null"); + assert(StepValue && !StepValue->isZero() && "StepValue is zero"); + assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && + "StartValue is not a pointer for pointer induction"); + assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && + "StartValue is not an integer for integer induction"); + assert(StepValue->getType()->isIntegerTy() && + "StepValue is not an integer"); +} + +int InductionDescriptor::getConsecutiveDirection() const { + if (StepValue && (StepValue->isOne() || StepValue->isMinusOne())) + return StepValue->getSExtValue(); + return 0; +} + +Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index) const { + switch (IK) { + case IK_IntInduction: + assert(Index->getType() == StartValue->getType() && + "Index type does not match StartValue type"); + if (StepValue->isMinusOne()) + return B.CreateSub(StartValue, Index); + if (!StepValue->isOne()) + Index = B.CreateMul(Index, StepValue); + return B.CreateAdd(StartValue, Index); + + case IK_PtrInduction: + assert(Index->getType() == StepValue->getType() && + "Index type does not match StepValue type"); + if (StepValue->isMinusOne()) + Index = B.CreateNeg(Index); + else if (!StepValue->isOne()) + Index = B.CreateMul(Index, StepValue); + return B.CreateGEP(nullptr, StartValue, Index); + + case IK_NoInduction: + return nullptr; + } + llvm_unreachable("invalid enum"); +} + +bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, + InductionDescriptor &D) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) @@ -467,6 +672,10 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, return false; } + assert(AR->getLoop()->getHeader() == Phi->getParent() && + "PHI is an AddRec for a different loop?!"); + Value *StartValue = + Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); const SCEV *Step = AR->getStepRecurrence(*SE); // Calculate the pointer stride and check if it is consecutive. const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); @@ -475,7 +684,7 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, ConstantInt *CV = C->getValue(); if (PhiTy->isIntegerTy()) { - StepValue = CV; + D = InductionDescriptor(StartValue, IK_IntInduction, CV); return true; } @@ -494,6 +703,27 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) return false; - StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); + auto *StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); + + D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); return true; } + +/// \brief Returns the instructions that use values defined in the loop. +SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { + SmallVector<Instruction *, 8> UsedOutside; + + for (auto *Block : L->getBlocks()) + // FIXME: I believe that this could use copy_if if the Inst reference could + // be adapted into a pointer. + for (auto &Inst : *Block) { + auto Users = Inst.users(); + if (std::any_of(Users.begin(), Users.end(), [&](User *U) { + auto *Use = cast<Instruction>(U); + return !L->contains(Use->getParent()); + })) + UsedOutside.push_back(&Inst); + } + + return UsedOutside; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp index 832079d..9a2a06c 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -13,43 +13,81 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/Dominators.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/LoopVersioning.h" using namespace llvm; LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, - DominatorTree *DT, - const SmallVector<int, 8> *PtrToPartition) - : VersionedLoop(L), NonVersionedLoop(nullptr), - PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) { + DominatorTree *DT, ScalarEvolution *SE, + bool UseLAIChecks) + : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT), + SE(SE) { assert(L->getExitBlock() && "No single exit block"); assert(L->getLoopPreheader() && "No preheader"); + if (UseLAIChecks) { + setAliasChecks(LAI.getRuntimePointerChecking()->getChecks()); + setSCEVChecks(LAI.PSE.getUnionPredicate()); + } } -bool LoopVersioning::needsRuntimeChecks() const { - return LAI.getRuntimePointerChecking()->needsAnyChecking(PtrToPartition); +void LoopVersioning::setAliasChecks( + const SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) { + AliasChecks = std::move(Checks); } -void LoopVersioning::versionLoop(Pass *P) { +void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) { + Preds = std::move(Check); +} + +void LoopVersioning::versionLoop( + const SmallVectorImpl<Instruction *> &DefsUsedOutside) { Instruction *FirstCheckInst; Instruction *MemRuntimeCheck; + Value *SCEVRuntimeCheck; + Value *RuntimeCheck = nullptr; + // Add the memcheck in the original preheader (this is empty initially). - BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader(); + BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader(); std::tie(FirstCheckInst, MemRuntimeCheck) = - LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition); + LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks); assert(MemRuntimeCheck && "called even though needsAnyChecking = false"); + const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate(); + SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), + "scev.check"); + SCEVRuntimeCheck = + Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator()); + auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck); + + // Discard the SCEV runtime check if it is always true. + if (CI && CI->isZero()) + SCEVRuntimeCheck = nullptr; + + if (MemRuntimeCheck && SCEVRuntimeCheck) { + RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck, + SCEVRuntimeCheck, "ldist.safe"); + if (auto *I = dyn_cast<Instruction>(RuntimeCheck)) + I->insertBefore(RuntimeCheckBB->getTerminator()); + } else + RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck; + + assert(RuntimeCheck && "called even though we don't need " + "any runtime checks"); + // Rename the block to make the IR more readable. - MemCheckBB->setName(VersionedLoop->getHeader()->getName() + ".lver.memcheck"); + RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() + + ".lver.check"); // Create empty preheader for the loop (and after cloning for the // non-versioned loop). - BasicBlock *PH = SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI); + BasicBlock *PH = + SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI); PH->setName(VersionedLoop->getHeader()->getName() + ".ph"); // Clone the loop including the preheader. @@ -58,20 +96,23 @@ void LoopVersioning::versionLoop(Pass *P) { // block is a join between the two loops. SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks; NonVersionedLoop = - cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap, ".lver.orig", - LI, DT, NonVersionedLoopBlocks); + cloneLoopWithPreheader(PH, RuntimeCheckBB, VersionedLoop, VMap, + ".lver.orig", LI, DT, NonVersionedLoopBlocks); remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap); // Insert the conditional branch based on the result of the memchecks. - Instruction *OrigTerm = MemCheckBB->getTerminator(); + Instruction *OrigTerm = RuntimeCheckBB->getTerminator(); BranchInst::Create(NonVersionedLoop->getLoopPreheader(), - VersionedLoop->getLoopPreheader(), MemRuntimeCheck, - OrigTerm); + VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm); OrigTerm->eraseFromParent(); // The loops merge in the original exit block. This is now dominated by the // memchecking block. - DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB); + DT->changeImmediateDominator(VersionedLoop->getExitBlock(), RuntimeCheckBB); + + // Adds the necessary PHI nodes for the versioned loops based on the + // loop-defined values used outside of the loop. + addPHINodes(DefsUsedOutside); } void LoopVersioning::addPHINodes( @@ -94,7 +135,7 @@ void LoopVersioning::addPHINodes( // If not create it. if (!PN) { PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver", - PHIBlock->begin()); + &PHIBlock->front()); for (auto *User : Inst->users()) if (!VersionedLoop->contains(cast<Instruction>(User)->getParent())) User->replaceUsesOfWith(Inst, PN); diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp index 66d57b0..b0ad4d5 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -69,7 +69,7 @@ bool LowerInvoke::runOnFunction(Function &F) { BranchInst::Create(II->getNormalDest(), II); // Remove any PHI node entries from the exception destination. - II->getUnwindDest()->removePredecessor(BB); + II->getUnwindDest()->removePredecessor(&*BB); // Remove the invoke instruction now. BB->getInstList().erase(II); diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index 4acd988..52beb15 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -49,8 +49,7 @@ namespace { return I != Ranges.end() && I->Low <= R.Low; } - /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch - /// instructions. + /// Replace all SwitchInst instructions with chained branch instructions. class LowerSwitch : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid @@ -78,7 +77,7 @@ namespace { typedef std::vector<CaseRange> CaseVector; typedef std::vector<CaseRange>::iterator CaseItr; private: - void processSwitchInst(SwitchInst *SI); + void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList); BasicBlock *switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ConstantInt *UpperBound, @@ -116,21 +115,30 @@ FunctionPass *llvm::createLowerSwitchPass() { bool LowerSwitch::runOnFunction(Function &F) { bool Changed = false; + SmallPtrSet<BasicBlock*, 8> DeleteList; for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks + BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks + + // If the block is a dead Default block that will be deleted later, don't + // waste time processing it. + if (DeleteList.count(Cur)) + continue; if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) { Changed = true; - processSwitchInst(SI); + processSwitchInst(SI, DeleteList); } } + for (BasicBlock* BB: DeleteList) { + DeleteDeadBlock(BB); + } + return Changed; } -// operator<< - Used for debugging purposes. -// +/// Used for debugging purposes. static raw_ostream& operator<<(raw_ostream &O, const LowerSwitch::CaseVector &C) LLVM_ATTRIBUTE_USED; @@ -147,23 +155,24 @@ static raw_ostream& operator<<(raw_ostream &O, return O << "]"; } -// \brief Update the first occurrence of the "switch statement" BB in the PHI -// node with the "new" BB. The other occurrences will: -// -// 1) Be updated by subsequent calls to this function. Switch statements may -// have more than one outcoming edge into the same BB if they all have the same -// value. When the switch statement is converted these incoming edges are now -// coming from multiple BBs. -// 2) Removed if subsequent incoming values now share the same case, i.e., -// multiple outcome edges are condensed into one. This is necessary to keep the -// number of phi values equal to the number of branches to SuccBB. +/// \brief Update the first occurrence of the "switch statement" BB in the PHI +/// node with the "new" BB. The other occurrences will: +/// +/// 1) Be updated by subsequent calls to this function. Switch statements may +/// have more than one outcoming edge into the same BB if they all have the same +/// value. When the switch statement is converted these incoming edges are now +/// coming from multiple BBs. +/// 2) Removed if subsequent incoming values now share the same case, i.e., +/// multiple outcome edges are condensed into one. This is necessary to keep the +/// number of phi values equal to the number of branches to SuccBB. static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, unsigned NumMergedCases) { - for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI(); + for (BasicBlock::iterator I = SuccBB->begin(), + IE = SuccBB->getFirstNonPHI()->getIterator(); I != IE; ++I) { PHINode *PN = cast<PHINode>(I); - // Only update the first occurence. + // Only update the first occurrence. unsigned Idx = 0, E = PN->getNumIncomingValues(); unsigned LocalNumMergedCases = NumMergedCases; for (; Idx != E; ++Idx) { @@ -173,7 +182,7 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, } } - // Remove additional occurences coming from condensed cases and keep the + // Remove additional occurrences coming from condensed cases and keep the // number of incoming values equal to the number of branches to SuccBB. SmallVector<unsigned, 8> Indices; for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx) @@ -188,11 +197,11 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, } } -// switchConvert - Convert the switch statement into a binary lookup of -// the case values. The function recursively builds this tree. -// LowerBound and UpperBound are used to keep track of the bounds for Val -// that have already been checked by a block emitted by one of the previous -// calls to switchConvert in the call stack. +/// Convert the switch statement into a binary lookup of the case values. +/// The function recursively builds this tree. LowerBound and UpperBound are +/// used to keep track of the bounds for Val that have already been checked by +/// a block emitted by one of the previous calls to switchConvert in the call +/// stack. BasicBlock * LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ConstantInt *UpperBound, Value *Val, @@ -278,28 +287,24 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, UpperBound, Val, NewNode, OrigBlock, Default, UnreachableRanges); - Function::iterator FI = OrigBlock; - F->getBasicBlockList().insert(++FI, NewNode); + F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode); NewNode->getInstList().push_back(Comp); BranchInst::Create(LBranch, RBranch, Comp, NewNode); return NewNode; } -// newLeafBlock - Create a new leaf block for the binary lookup tree. It -// checks if the switch's value == the case's value. If not, then it -// jumps to the default branch. At this point in the tree, the value -// can't be another valid case value, so the jump to the "default" branch -// is warranted. -// +/// Create a new leaf block for the binary lookup tree. It checks if the +/// switch's value == the case's value. If not, then it jumps to the default +/// branch. At this point in the tree, the value can't be another valid case +/// value, so the jump to the "default" branch is warranted. BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, BasicBlock* OrigBlock, BasicBlock* Default) { Function* F = OrigBlock->getParent(); BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); - Function::iterator FI = OrigBlock; - F->getBasicBlockList().insert(++FI, NewLeaf); + F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf); // Emit comparison ICmpInst* Comp = nullptr; @@ -352,7 +357,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, return NewLeaf; } -// Clusterify - Transform simple list of Cases into list of CaseRange's +/// Transform simple list of Cases into list of CaseRange's. unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { unsigned numCmps = 0; @@ -394,10 +399,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { return numCmps; } -// processSwitchInst - Replace the specified switch instruction with a sequence -// of chained if-then insts in a balanced binary search. -// -void LowerSwitch::processSwitchInst(SwitchInst *SI) { +/// Replace the specified switch instruction with a sequence of chained if-then +/// insts in a balanced binary search. +void LowerSwitch::processSwitchInst(SwitchInst *SI, + SmallPtrSetImpl<BasicBlock*> &DeleteList) { BasicBlock *CurBlock = SI->getParent(); BasicBlock *OrigBlock = CurBlock; Function *F = CurBlock->getParent(); @@ -424,7 +429,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { std::vector<IntRange> UnreachableRanges; if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) { - // Make the bounds tightly fitted around the case value range, becase we + // Make the bounds tightly fitted around the case value range, because we // know that the value passed to the switch must be exactly one of the case // values. assert(!Cases.empty()); @@ -495,7 +500,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); - F->getBasicBlockList().insert(Default, NewDefault); + F->getBasicBlockList().insert(Default->getIterator(), NewDefault); BranchInst::Create(Default, NewDefault); // If there is an entry in any PHI nodes for the default edge, make sure @@ -518,7 +523,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { BasicBlock *OldDefault = SI->getDefaultDest(); CurBlock->getInstList().erase(SI); - // If the Default block has no more predecessors just remove it. + // If the Default block has no more predecessors just add it to DeleteList. if (pred_begin(OldDefault) == pred_end(OldDefault)) - DeleteDeadBlock(OldDefault); + DeleteList.insert(OldDefault); } diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp index 00cf4e6..aa1e35d 100644 --- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp @@ -63,6 +63,9 @@ bool PromotePass::runOnFunction(Function &F) { BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function + if (F.hasFnAttribute(Attribute::OptimizeNone)) + return false; + bool Changed = false; DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp index 395a46b..c999bd0 100644 --- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -42,6 +42,24 @@ namespace { } }; + static const char *const metaNames[] = { + // See http://en.wikipedia.org/wiki/Metasyntactic_variable + "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", + "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" + }; + + struct Renamer { + Renamer(unsigned int seed) { + prng.srand(seed); + } + + const char *newName() { + return metaNames[prng.rand() % array_lengthof(metaNames)]; + } + + PRNG prng; + }; + struct MetaRenamer : public ModulePass { static char ID; // Pass identification, replacement for typeid MetaRenamer() : ModulePass(ID) { @@ -53,36 +71,26 @@ namespace { } bool runOnModule(Module &M) override { - static const char *const metaNames[] = { - // See http://en.wikipedia.org/wiki/Metasyntactic_variable - "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", - "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" - }; - // Seed our PRNG with simple additive sum of ModuleID. We're looking to // simply avoid always having the same function names, and we need to // remain deterministic. unsigned int randSeed = 0; - for (std::string::const_iterator I = M.getModuleIdentifier().begin(), - E = M.getModuleIdentifier().end(); I != E; ++I) - randSeed += *I; + for (auto C : M.getModuleIdentifier()) + randSeed += C; - PRNG prng; - prng.srand(randSeed); + Renamer renamer(randSeed); // Rename all aliases - for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end(); - AI != AE; ++AI) { + for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) { StringRef Name = AI->getName(); if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) continue; AI->setName("alias"); } - + // Rename all global variables - for (Module::global_iterator GI = M.global_begin(), GE = M.global_end(); - GI != GE; ++GI) { + for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { StringRef Name = GI->getName(); if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) continue; @@ -93,40 +101,37 @@ namespace { // Rename all struct types TypeFinder StructTypes; StructTypes.run(M, true); - for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { - StructType *STy = StructTypes[i]; + for (StructType *STy : StructTypes) { if (STy->isLiteral() || STy->getName().empty()) continue; SmallString<128> NameStorage; - STy->setName((Twine("struct.") + metaNames[prng.rand() % - array_lengthof(metaNames)]).toStringRef(NameStorage)); + STy->setName((Twine("struct.") + + renamer.newName()).toStringRef(NameStorage)); } // Rename all functions - for (Module::iterator FI = M.begin(), FE = M.end(); - FI != FE; ++FI) { - StringRef Name = FI->getName(); + for (auto &F : M) { + StringRef Name = F.getName(); if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) continue; - FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]); - runOnFunction(*FI); + F.setName(renamer.newName()); + runOnFunction(F); } return true; } bool runOnFunction(Function &F) { - for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); - AI != AE; ++AI) + for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) if (!AI->getType()->isVoidTy()) AI->setName("arg"); - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - BB->setName("bb"); + for (auto &BB : F) { + BB.setName("bb"); - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (!I->getType()->isVoidTy()) - I->setName("tmp"); + for (auto &I : BB) + if (!I.getType()->isVoidTy()) + I.setName("tmp"); } return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp index d69a81e..9ec28a3 100644 --- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -43,9 +43,9 @@ static void appendToGlobalArray(const char *Array, } GVCtor->eraseFromParent(); } else { - // Use a simple two-field struct if there isn't one already. + // Use the new three-field struct if there isn't one already. EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - nullptr); + IRB.getInt8PtrTy(), nullptr); } // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. @@ -107,7 +107,8 @@ Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) { std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions( Module &M, StringRef CtorName, StringRef InitName, - ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs) { + ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, + StringRef VersionCheckName) { assert(!InitName.empty() && "Expected init function name"); assert(InitArgTypes.size() == InitArgTypes.size() && "Sanitizer's init function expects different number of arguments"); @@ -122,6 +123,13 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions( AttributeSet())); InitFunction->setLinkage(Function::ExternalLinkage); IRB.CreateCall(InitFunction, InitArgs); + if (!VersionCheckName.empty()) { + Function *VersionCheckFunction = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), + AttributeSet())); + IRB.CreateCall(VersionCheckFunction, {}); + } return std::make_pair(Ctor, InitFunction); } diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index a87f850..c4f9b9f 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -205,10 +205,9 @@ public: // avoid gratuitus rescans. const BasicBlock *BB = I->getParent(); unsigned InstNo = 0; - for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E; - ++BBI) - if (isInterestingInstruction(BBI)) - InstNumbers[BBI] = InstNo++; + for (const Instruction &BBI : *BB) + if (isInterestingInstruction(&BBI)) + InstNumbers[&BBI] = InstNo++; It = InstNumbers.find(I); assert(It != InstNumbers.end() && "Didn't insert instruction?"); @@ -402,8 +401,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // Record debuginfo for the store and remove the declaration's // debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { - DIBuilder DIB(*AI->getParent()->getParent()->getParent(), - /*AllowUnresolved*/ false); + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); DDI->eraseFromParent(); LBI.deleteValue(DDI); @@ -425,14 +423,17 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, /// using the Alloca. /// /// If we cannot promote this alloca (because it is read before it is written), -/// return true. This is necessary in cases where, due to control flow, the -/// alloca is potentially undefined on some control flow paths. e.g. code like -/// this is potentially correct: -/// -/// for (...) { if (c) { A = undef; undef = B; } } -/// -/// ... so long as A is not used before undef is set. -static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, +/// return false. This is necessary in cases where, due to control flow, the +/// alloca is undefined only on some control flow paths. e.g. code like +/// this is correct in LLVM IR: +/// // A is an alloca with no stores so far +/// for (...) { +/// int t = *A; +/// if (!first_iteration) +/// use(t); +/// *A = 42; +/// } +static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, AliasSetTracker *AST) { // The trickiest case to handle is when we have large blocks. Because of this, @@ -467,10 +468,15 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)), less_first()); - - if (I == StoresByIndex.begin()) - // If there is no store before this load, the load takes the undef value. - LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + if (I == StoresByIndex.begin()) { + if (StoresByIndex.empty()) + // If there are no stores, the load takes the undef value. + LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + else + // There is no store before this load, bail out (load may be affected + // by the following stores - see main comment). + return false; + } else // Otherwise, there was a store before this load, the load takes its value. LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0)); @@ -486,8 +492,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, StoreInst *SI = cast<StoreInst>(AI->user_back()); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = Info.DbgDeclare) { - DIBuilder DIB(*AI->getParent()->getParent()->getParent(), - /*AllowUnresolved*/ false); + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DDI, SI, DIB); } SI->eraseFromParent(); @@ -506,6 +511,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, } ++NumLocalPromoted; + return true; } void PromoteMem2Reg::run() { @@ -557,9 +563,8 @@ void PromoteMem2Reg::run() { // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. - if (Info.OnlyUsedInOneBlock) { - promoteSingleBlockAlloca(AI, Info, LBI, AST); - + if (Info.OnlyUsedInOneBlock && + promoteSingleBlockAlloca(AI, Info, LBI, AST)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; @@ -636,7 +641,7 @@ void PromoteMem2Reg::run() { // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; - RenamePassWorkList.emplace_back(F.begin(), nullptr, std::move(Values)); + RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); @@ -854,7 +859,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, // BasicBlock. PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), Allocas[AllocaNo]->getName() + "." + Twine(Version++), - BB->begin()); + &BB->front()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; @@ -919,7 +924,7 @@ NextIteration: return; for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) { - Instruction *I = II++; // get the instruction, increment iterator + Instruction *I = &*II++; // get the instruction, increment iterator if (LoadInst *LI = dyn_cast<LoadInst>(I)) { AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 36781c1..d0932f83 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -43,7 +44,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> #include <map> @@ -73,6 +73,22 @@ static cl::opt<bool> HoistCondStores( "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes")); +static cl::opt<bool> MergeCondStores( + "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores even if an unconditional store does not " + "precede - hoist multiple conditional stores into a single " + "predicated store")); + +static cl::opt<bool> MergeCondStoresAggressively( + "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), + cl::desc("When merging conditional stores, do so even if the resultant " + "basic blocks are unlikely to be if-converted as a result")); + +static cl::opt<bool> SpeculateOneExpensiveInst( + "speculate-one-expensive-inst", cl::Hidden, cl::init(true), + cl::desc("Allow exactly one expensive instruction to be speculatively " + "executed")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); @@ -83,13 +99,13 @@ STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { // The first field contains the value that the switch produces when a certain - // case group is selected, and the second field is a vector containing the cases - // composing the case group. + // case group is selected, and the second field is a vector containing the + // cases composing the case group. typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2> SwitchCaseResultVectorTy; // The first field contains the phi node that generates a result of the switch - // and the second field contains the value generated for a certain case in the switch - // for that PHI. + // and the second field contains the value generated for a certain case in the + // switch for that PHI. typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy; /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -124,6 +140,7 @@ class SimplifyCFGOpt { bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); + bool SimplifyCleanupReturn(CleanupReturnInst *RI); bool SimplifyUnreachable(UnreachableInst *UI); bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); bool SimplifyIndirectBr(IndirectBrInst *IBI); @@ -226,6 +243,7 @@ static unsigned ComputeSpeculationCost(const User *I, "Instruction is not safe to speculatively execute!"); return TTI.getUserCost(I); } + /// If we have a merge point of an "if condition" as accepted above, /// return true if the specified value dominates the block. We /// don't handle the true generality of domination here, just a special case @@ -246,7 +264,8 @@ static unsigned ComputeSpeculationCost(const User *I, static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl<Instruction*> *AggressiveInsts, unsigned &CostRemaining, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + unsigned Depth = 0) { Instruction *I = dyn_cast<Instruction>(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -284,15 +303,24 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, unsigned Cost = ComputeSpeculationCost(I, TTI); - if (Cost > CostRemaining) + // Allow exactly one instruction to be speculated regardless of its cost + // (as long as it is safe to do so). + // This is intended to flatten the CFG even if the instruction is a division + // or other expensive operation. The speculation of an expensive instruction + // is expected to be undone in CodeGenPrepare if the speculation has not + // enabled further IR optimizations. + if (Cost > CostRemaining && + (!SpeculateOneExpensiveInst || !AggressiveInsts->empty() || Depth > 0)) return false; - CostRemaining -= Cost; + // Avoid unsigned wrap. + CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost; // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI, + Depth + 1)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -970,8 +998,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Okay, at this point, we know which new successor Pred will get. Make // sure we update the number of entries in the PHI nodes for these // successors. - for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i) - AddPredecessorToBlock(NewSuccessors[i], Pred, BB); + for (BasicBlock *NewSuccessor : NewSuccessors) + AddPredecessorToBlock(NewSuccessor, Pred, BB); Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. @@ -984,8 +1012,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size()); NewSI->setDebugLoc(PTI->getDebugLoc()); - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - NewSI->addCase(PredCases[i].Value, PredCases[i].Dest); + for (ValueEqualityComparisonCase &V : PredCases) + NewSI->addCase(V.Value, V.Dest); if (PredHasWeights || SuccHasWeights) { // Halve the weights if any of them cannot fit in an uint32_t @@ -1059,15 +1087,15 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, BasicBlock::iterator BB1_Itr = BB1->begin(); BasicBlock::iterator BB2_Itr = BB2->begin(); - Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++; + Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++; // Skip debug info if it is not identical. DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { while (isa<DbgInfoIntrinsic>(I1)) - I1 = BB1_Itr++; + I1 = &*BB1_Itr++; while (isa<DbgInfoIntrinsic>(I2)) - I2 = BB2_Itr++; + I2 = &*BB2_Itr++; } if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) || (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) @@ -1088,31 +1116,30 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, // For a normal instruction, we just move one to right before the branch, // then replace all uses of the other with the first. Finally, we remove // the now redundant second instruction. - BIParent->getInstList().splice(BI, BB1->getInstList(), I1); + BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1); if (!I2->use_empty()) I2->replaceAllUsesWith(I1); I1->intersectOptionalDataWith(I2); unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_range, - LLVMContext::MD_fpmath, - LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull - }; + LLVMContext::MD_tbaa, LLVMContext::MD_range, + LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, + LLVMContext::MD_nonnull, LLVMContext::MD_invariant_group, + LLVMContext::MD_align, LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null}; combineMetadata(I1, I2, KnownIDs); I2->eraseFromParent(); Changed = true; - I1 = BB1_Itr++; - I2 = BB2_Itr++; + I1 = &*BB1_Itr++; + I2 = &*BB2_Itr++; // Skip debug info if it is not identical. DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { while (isa<DbgInfoIntrinsic>(I1)) - I1 = BB1_Itr++; + I1 = &*BB1_Itr++; while (isa<DbgInfoIntrinsic>(I2)) - I2 = BB2_Itr++; + I2 = &*BB2_Itr++; } } while (I1->isIdenticalToWhenDefined(I2)); @@ -1147,7 +1174,7 @@ HoistTerminator: // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); - BIParent->getInstList().insert(BI, NT); + BIParent->getInstList().insert(BI->getIterator(), NT); if (!NT->getType()->isVoidTy()) { I1->replaceAllUsesWith(NT); I2->replaceAllUsesWith(NT); @@ -1265,7 +1292,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // Cannot move control-flow-involving, volatile loads, vaarg, etc. if (isa<PHINode>(I1) || isa<PHINode>(I2) || isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) || - isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) || + I1->isEHPad() || I2->isEHPad() || isa<AllocaInst>(I1) || isa<AllocaInst>(I2) || I1->mayHaveSideEffects() || I2->mayHaveSideEffects() || I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() || @@ -1324,7 +1351,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { if (!NewPN) { NewPN = PHINode::Create(DifferentOp1->getType(), 2, - DifferentOp1->getName() + ".sink", BBEnd->begin()); + DifferentOp1->getName() + ".sink", &BBEnd->front()); NewPN->addIncoming(DifferentOp1, BB1); NewPN->addIncoming(DifferentOp2, BB2); DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";); @@ -1339,7 +1366,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // instruction in the basic block down. bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin()); // Sink the instruction. - BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1); + BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(), + BB1->getInstList(), I1); if (!OldPN->use_empty()) OldPN->replaceAllUsesWith(I1); OldPN->eraseFromParent(); @@ -1355,7 +1383,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { RE1 = BB1->getInstList().rend(); if (UpdateRE2) RE2 = BB2->getInstList().rend(); - FirstNonPhiInBBEnd = I1; + FirstNonPhiInBBEnd = &*I1; NumSinkCommons++; Changed = true; } @@ -1491,7 +1519,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = std::prev(ThenBB->end()); BBI != BBE; ++BBI) { - Instruction *I = BBI; + Instruction *I = &*BBI; // Skip debug info. if (isa<DbgInfoIntrinsic>(I)) continue; @@ -1604,9 +1632,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, SpeculatedStore->setOperand(0, S); } + // Metadata can be dependent on the condition we are hoisting above. + // Conservatively strip all metadata on the instruction. + for (auto &I: *ThenBB) + I.dropUnknownNonDebugMetadata(); + // Hoist the instructions. - BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(), - std::prev(ThenBB->end())); + BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(), + ThenBB->begin(), std::prev(ThenBB->end())); // Insert selects and rewrite the PHI operands. IRBuilder<true, NoFolder> Builder(BI); @@ -1747,13 +1780,13 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { // Check for trivial simplification. if (Value *V = SimplifyInstruction(N, DL)) { - TranslateMap[BBI] = V; + TranslateMap[&*BBI] = V; delete N; // Instruction folded away, don't need actual inst } else { // Insert the new instruction into its new home. EdgeBB->getInstList().insert(InsertPt, N); if (!BBI->use_empty()) - TranslateMap[BBI] = N; + TranslateMap[&*BBI] = N; } } @@ -1850,7 +1883,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I) - if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) { + if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control // flow, so the xform is not worth it. @@ -1863,7 +1896,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I) - if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) { + if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control // flow, so the xform is not worth it. @@ -1882,13 +1915,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. if (IfBlock1) - DomBlock->getInstList().splice(InsertPt, + DomBlock->getInstList().splice(InsertPt->getIterator(), IfBlock1->getInstList(), IfBlock1->begin(), - IfBlock1->getTerminator()); + IfBlock1->getTerminator()->getIterator()); if (IfBlock2) - DomBlock->getInstList().splice(InsertPt, + DomBlock->getInstList().splice(InsertPt->getIterator(), IfBlock2->getInstList(), IfBlock2->begin(), - IfBlock2->getTerminator()); + IfBlock2->getTerminator()->getIterator()); while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { // Change the PHI node into a select instruction. @@ -2057,7 +2090,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { BI->getSuccessor(0) == PBI->getSuccessor(1))) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Curr = I++; + Instruction *Curr = &*I++; if (isa<CmpInst>(Curr)) { Cond = Curr; break; @@ -2077,7 +2110,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { return false; // Make sure the instruction after the condition is the cond branch. - BasicBlock::iterator CondIt = Cond; ++CondIt; + BasicBlock::iterator CondIt = ++Cond->getIterator(); // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; @@ -2095,7 +2128,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // Ignore dbg intrinsics. if (isa<DbgInfoIntrinsic>(I)) continue; - if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I)) + if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I)) return false; // I has only one use and can be executed unconditionally. Instruction *User = dyn_cast<Instruction>(I->user_back()); @@ -2192,17 +2225,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { Instruction *NewBonusInst = BonusInst->clone(); RemapInstruction(NewBonusInst, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); - VMap[BonusInst] = NewBonusInst; + VMap[&*BonusInst] = NewBonusInst; // If we moved a load, we cannot any longer claim any knowledge about // its potential value. The previous information might have been valid // only given the branch precondition. // For an analogous reason, we must also drop all the metadata whose // semantics we don't understand. - NewBonusInst->dropUnknownMetadata(LLVMContext::MD_dbg); + NewBonusInst->dropUnknownNonDebugMetadata(); - PredBlock->getInstList().insert(PBI, NewBonusInst); - NewBonusInst->takeName(BonusInst); + PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst); + NewBonusInst->takeName(&*BonusInst); BonusInst->setName(BonusInst->getName() + ".old"); } @@ -2211,7 +2244,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { Instruction *New = Cond->clone(); RemapInstruction(New, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); - PredBlock->getInstList().insert(PBI, New); + PredBlock->getInstList().insert(PBI->getIterator(), New); New->takeName(Cond); Cond->setName(New->getName() + ".old"); @@ -2332,11 +2365,297 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { return false; } +// If there is only one store in BB1 and BB2, return it, otherwise return +// nullptr. +static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) { + StoreInst *S = nullptr; + for (auto *BB : {BB1, BB2}) { + if (!BB) + continue; + for (auto &I : *BB) + if (auto *SI = dyn_cast<StoreInst>(&I)) { + if (S) + // Multiple stores seen. + return nullptr; + else + S = SI; + } + } + return S; +} + +static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, + Value *AlternativeV = nullptr) { + // PHI is going to be a PHI node that allows the value V that is defined in + // BB to be referenced in BB's only successor. + // + // If AlternativeV is nullptr, the only value we care about in PHI is V. It + // doesn't matter to us what the other operand is (it'll never get used). We + // could just create a new PHI with an undef incoming value, but that could + // increase register pressure if EarlyCSE/InstCombine can't fold it with some + // other PHI. So here we directly look for some PHI in BB's successor with V + // as an incoming operand. If we find one, we use it, else we create a new + // one. + // + // If AlternativeV is not nullptr, we care about both incoming values in PHI. + // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV] + // where OtherBB is the single other predecessor of BB's only successor. + PHINode *PHI = nullptr; + BasicBlock *Succ = BB->getSingleSuccessor(); + + for (auto I = Succ->begin(); isa<PHINode>(I); ++I) + if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) { + PHI = cast<PHINode>(I); + if (!AlternativeV) + break; + + assert(std::distance(pred_begin(Succ), pred_end(Succ)) == 2); + auto PredI = pred_begin(Succ); + BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI; + if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV) + break; + PHI = nullptr; + } + if (PHI) + return PHI; + + // If V is not an instruction defined in BB, just return it. + if (!AlternativeV && + (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB)) + return V; + + PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front()); + PHI->addIncoming(V, BB); + for (BasicBlock *PredBB : predecessors(Succ)) + if (PredBB != BB) + PHI->addIncoming(AlternativeV ? AlternativeV : UndefValue::get(V->getType()), + PredBB); + return PHI; +} + +static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, + BasicBlock *QTB, BasicBlock *QFB, + BasicBlock *PostBB, Value *Address, + bool InvertPCond, bool InvertQCond) { + auto IsaBitcastOfPointerType = [](const Instruction &I) { + return Operator::getOpcode(&I) == Instruction::BitCast && + I.getType()->isPointerTy(); + }; + + // If we're not in aggressive mode, we only optimize if we have some + // confidence that by optimizing we'll allow P and/or Q to be if-converted. + auto IsWorthwhile = [&](BasicBlock *BB) { + if (!BB) + return true; + // Heuristic: if the block can be if-converted/phi-folded and the + // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to + // thread this store. + unsigned N = 0; + for (auto &I : *BB) { + // Cheap instructions viable for folding. + if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) || + isa<StoreInst>(I)) + ++N; + // Free instructions. + else if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) || + IsaBitcastOfPointerType(I)) + continue; + else + return false; + } + return N <= PHINodeFoldingThreshold; + }; + + if (!MergeCondStoresAggressively && (!IsWorthwhile(PTB) || + !IsWorthwhile(PFB) || + !IsWorthwhile(QTB) || + !IsWorthwhile(QFB))) + return false; + + // For every pointer, there must be exactly two stores, one coming from + // PTB or PFB, and the other from QTB or QFB. We don't support more than one + // store (to any address) in PTB,PFB or QTB,QFB. + // FIXME: We could relax this restriction with a bit more work and performance + // testing. + StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB); + StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB); + if (!PStore || !QStore) + return false; + + // Now check the stores are compatible. + if (!QStore->isUnordered() || !PStore->isUnordered()) + return false; + + // Check that sinking the store won't cause program behavior changes. Sinking + // the store out of the Q blocks won't change any behavior as we're sinking + // from a block to its unconditional successor. But we're moving a store from + // the P blocks down through the middle block (QBI) and past both QFB and QTB. + // So we need to check that there are no aliasing loads or stores in + // QBI, QTB and QFB. We also need to check there are no conflicting memory + // operations between PStore and the end of its parent block. + // + // The ideal way to do this is to query AliasAnalysis, but we don't + // preserve AA currently so that is dangerous. Be super safe and just + // check there are no other memory operations at all. + for (auto &I : *QFB->getSinglePredecessor()) + if (I.mayReadOrWriteMemory()) + return false; + for (auto &I : *QFB) + if (&I != QStore && I.mayReadOrWriteMemory()) + return false; + if (QTB) + for (auto &I : *QTB) + if (&I != QStore && I.mayReadOrWriteMemory()) + return false; + for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end(); + I != E; ++I) + if (&*I != PStore && I->mayReadOrWriteMemory()) + return false; + + // OK, we're going to sink the stores to PostBB. The store has to be + // conditional though, so first create the predicate. + Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); + Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); + + Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(), + PStore->getParent()); + Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(), + QStore->getParent(), PPHI); + + IRBuilder<> QB(&*PostBB->getFirstInsertionPt()); + + Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond); + Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond); + + if (InvertPCond) + PPred = QB.CreateNot(PPred); + if (InvertQCond) + QPred = QB.CreateNot(QPred); + Value *CombinedPred = QB.CreateOr(PPred, QPred); + + auto *T = + SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false); + QB.SetInsertPoint(T); + StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address)); + AAMDNodes AAMD; + PStore->getAAMetadata(AAMD, /*Merge=*/false); + PStore->getAAMetadata(AAMD, /*Merge=*/true); + SI->setAAMetadata(AAMD); + + QStore->eraseFromParent(); + PStore->eraseFromParent(); + + return true; +} + +static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { + // The intention here is to find diamonds or triangles (see below) where each + // conditional block contains a store to the same address. Both of these + // stores are conditional, so they can't be unconditionally sunk. But it may + // be profitable to speculatively sink the stores into one merged store at the + // end, and predicate the merged store on the union of the two conditions of + // PBI and QBI. + // + // This can reduce the number of stores executed if both of the conditions are + // true, and can allow the blocks to become small enough to be if-converted. + // This optimization will also chain, so that ladders of test-and-set + // sequences can be if-converted away. + // + // We only deal with simple diamonds or triangles: + // + // PBI or PBI or a combination of the two + // / \ | \ + // PTB PFB | PFB + // \ / | / + // QBI QBI + // / \ | \ + // QTB QFB | QFB + // \ / | / + // PostBB PostBB + // + // We model triangles as a type of diamond with a nullptr "true" block. + // Triangles are canonicalized so that the fallthrough edge is represented by + // a true condition, as in the diagram above. + // + BasicBlock *PTB = PBI->getSuccessor(0); + BasicBlock *PFB = PBI->getSuccessor(1); + BasicBlock *QTB = QBI->getSuccessor(0); + BasicBlock *QFB = QBI->getSuccessor(1); + BasicBlock *PostBB = QFB->getSingleSuccessor(); + + bool InvertPCond = false, InvertQCond = false; + // Canonicalize fallthroughs to the true branches. + if (PFB == QBI->getParent()) { + std::swap(PFB, PTB); + InvertPCond = true; + } + if (QFB == PostBB) { + std::swap(QFB, QTB); + InvertQCond = true; + } + + // From this point on we can assume PTB or QTB may be fallthroughs but PFB + // and QFB may not. Model fallthroughs as a nullptr block. + if (PTB == QBI->getParent()) + PTB = nullptr; + if (QTB == PostBB) + QTB = nullptr; + + // Legality bailouts. We must have at least the non-fallthrough blocks and + // the post-dominating block, and the non-fallthroughs must only have one + // predecessor. + auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { + return BB->getSinglePredecessor() == P && + BB->getSingleSuccessor() == S; + }; + if (!PostBB || + !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || + !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) + return false; + if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || + (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB))) + return false; + if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2) + return false; + + // OK, this is a sequence of two diamonds or triangles. + // Check if there are stores in PTB or PFB that are repeated in QTB or QFB. + SmallPtrSet<Value *,4> PStoreAddresses, QStoreAddresses; + for (auto *BB : {PTB, PFB}) { + if (!BB) + continue; + for (auto &I : *BB) + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) + PStoreAddresses.insert(SI->getPointerOperand()); + } + for (auto *BB : {QTB, QFB}) { + if (!BB) + continue; + for (auto &I : *BB) + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) + QStoreAddresses.insert(SI->getPointerOperand()); + } + + set_intersect(PStoreAddresses, QStoreAddresses); + // set_intersect mutates PStoreAddresses in place. Rename it here to make it + // clear what it contains. + auto &CommonAddresses = PStoreAddresses; + + bool Changed = false; + for (auto *Address : CommonAddresses) + Changed |= mergeConditionalStoreToAddress( + PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond); + return Changed; +} + /// If we have a conditional branch as a predecessor of another block, /// this function tries to simplify it. We know /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. -static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { +static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + const DataLayout &DL) { assert(PBI->isConditional() && BI->isConditional()); BasicBlock *BB = BI->getParent(); @@ -2360,10 +2679,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // simplifycfg will thread the block. if (BlockIsSimpleEnoughToThreadThrough(BB)) { pred_iterator PB = pred_begin(BB), PE = pred_end(BB); - PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()), - std::distance(PB, PE), - BI->getCondition()->getName() + ".pr", - BB->begin()); + PHINode *NewPN = PHINode::Create( + Type::getInt1Ty(BB->getContext()), std::distance(PB, PE), + BI->getCondition()->getName() + ".pr", &BB->front()); // Okay, we're going to insert the PHI node. Since PBI is not the only // predecessor, compute the PHI'd conditional value for all of the preds. // Any predecessor where the condition is not computable we keep symbolic. @@ -2386,6 +2704,29 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { } } + if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition())) + if (CE->canTrap()) + return false; + + // If BI is reached from the true path of PBI and PBI's condition implies + // BI's condition, we know the direction of the BI branch. + if (PBI->getSuccessor(0) == BI->getParent() && + isImpliedCondition(PBI->getCondition(), BI->getCondition(), DL) && + PBI->getSuccessor(0) != PBI->getSuccessor(1) && + BB->getSinglePredecessor()) { + // Turn this into a branch on constant. + auto *OldCond = BI->getCondition(); + BI->setCondition(ConstantInt::getTrue(BB->getContext())); + RecursivelyDeleteTriviallyDeadInstructions(OldCond); + return true; // Nuke the branch on constant. + } + + // If both branches are conditional and both contain stores to the same + // address, remove the stores from the conditionals and create a conditional + // merged store at the end. + if (MergeCondStores && mergeConditionalStores(PBI, BI)) + return true; + // If this is a conditional branch in an empty block, and if any // predecessors are a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. @@ -2396,11 +2737,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (&*BBI != BI) return false; - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition())) - if (CE->canTrap()) - return false; - int PBIOp, BIOp; if (PBI->getSuccessor(0) == BI->getSuccessor(0)) PBIOp = BIOp = 0; @@ -2565,15 +2901,15 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; // Then remove the rest. - for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) { - BasicBlock *Succ = OldTerm->getSuccessor(I); + for (BasicBlock *Succ : OldTerm->successors()) { // Make sure only to keep exactly one copy of each edge. if (Succ == KeepEdge1) KeepEdge1 = nullptr; else if (Succ == KeepEdge2) KeepEdge2 = nullptr; else - Succ->removePredecessor(OldTerm->getParent()); + Succ->removePredecessor(OldTerm->getParent(), + /*DontDeleteUselessPHIs=*/true); } IRBuilder<> Builder(OldTerm); @@ -2827,7 +3163,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); // If Extra was used, we require at least two switch values to do the - // transformation. A switch with one value is just an cond branch. + // transformation. A switch with one value is just a conditional branch. if (ExtraCase && Values.size() < 2) return false; // TODO: Preserve branch weight metadata, similarly to how @@ -2847,7 +3183,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. if (ExtraCase) { - BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test"); + BasicBlock *NewBB = + BB->splitBasicBlock(BI->getIterator(), "switch.early.test"); // Remove the uncond branch added to the old block. TerminatorInst *OldTI = BB->getTerminator(); Builder.SetInsertPoint(OldTI); @@ -2911,34 +3248,15 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return false; // Check that there are no other instructions except for debug intrinsics. - BasicBlock::iterator I = LPInst, E = RI; + BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator(); while (++I != E) if (!isa<DbgInfoIntrinsic>(I)) return false; // Turn all invokes that unwind here into calls and delete the basic block. for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { - InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator()); - SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); - // Insert a call instruction before the invoke. - CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); - Call->takeName(II); - Call->setCallingConv(II->getCallingConv()); - Call->setAttributes(II->getAttributes()); - Call->setDebugLoc(II->getDebugLoc()); - - // Anything that used the value produced by the invoke instruction now uses - // the value produced by the call instruction. Note that we do this even - // for void functions and calls with no uses so that the callgraph edge is - // updated. - II->replaceAllUsesWith(Call); - BB->removePredecessor(II->getParent()); - - // Insert a branch to the normal destination right before the invoke. - BranchInst::Create(II->getNormalDest(), II); - - // Finally, delete the invoke instruction! - II->eraseFromParent(); + BasicBlock *Pred = *PI++; + removeUnwindEdge(Pred); } // The landingpad is now unreachable. Zap it. @@ -2946,6 +3264,124 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return true; } +bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) { + // If this is a trivial cleanup pad that executes no instructions, it can be + // eliminated. If the cleanup pad continues to the caller, any predecessor + // that is an EH pad will be updated to continue to the caller and any + // predecessor that terminates with an invoke instruction will have its invoke + // instruction converted to a call instruction. If the cleanup pad being + // simplified does not continue to the caller, each predecessor will be + // updated to continue to the unwind destination of the cleanup pad being + // simplified. + BasicBlock *BB = RI->getParent(); + CleanupPadInst *CPInst = RI->getCleanupPad(); + if (CPInst->getParent() != BB) + // This isn't an empty cleanup. + return false; + + // Check that there are no other instructions except for debug intrinsics. + BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator(); + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) + return false; + + // If the cleanup return we are simplifying unwinds to the caller, this will + // set UnwindDest to nullptr. + BasicBlock *UnwindDest = RI->getUnwindDest(); + Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr; + + // We're about to remove BB from the control flow. Before we do, sink any + // PHINodes into the unwind destination. Doing this before changing the + // control flow avoids some potentially slow checks, since we can currently + // be certain that UnwindDest and BB have no common predecessors (since they + // are both EH pads). + if (UnwindDest) { + // First, go through the PHI nodes in UnwindDest and update any nodes that + // reference the block we are removing + for (BasicBlock::iterator I = UnwindDest->begin(), + IE = DestEHPad->getIterator(); + I != IE; ++I) { + PHINode *DestPN = cast<PHINode>(I); + + int Idx = DestPN->getBasicBlockIndex(BB); + // Since BB unwinds to UnwindDest, it has to be in the PHI node. + assert(Idx != -1); + // This PHI node has an incoming value that corresponds to a control + // path through the cleanup pad we are removing. If the incoming + // value is in the cleanup pad, it must be a PHINode (because we + // verified above that the block is otherwise empty). Otherwise, the + // value is either a constant or a value that dominates the cleanup + // pad being removed. + // + // Because BB and UnwindDest are both EH pads, all of their + // predecessors must unwind to these blocks, and since no instruction + // can have multiple unwind destinations, there will be no overlap in + // incoming blocks between SrcPN and DestPN. + Value *SrcVal = DestPN->getIncomingValue(Idx); + PHINode *SrcPN = dyn_cast<PHINode>(SrcVal); + + // Remove the entry for the block we are deleting. + DestPN->removeIncomingValue(Idx, false); + + if (SrcPN && SrcPN->getParent() == BB) { + // If the incoming value was a PHI node in the cleanup pad we are + // removing, we need to merge that PHI node's incoming values into + // DestPN. + for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues(); + SrcIdx != SrcE; ++SrcIdx) { + DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx), + SrcPN->getIncomingBlock(SrcIdx)); + } + } else { + // Otherwise, the incoming value came from above BB and + // so we can just reuse it. We must associate all of BB's + // predecessors with this value. + for (auto *pred : predecessors(BB)) { + DestPN->addIncoming(SrcVal, pred); + } + } + } + + // Sink any remaining PHI nodes directly into UnwindDest. + Instruction *InsertPt = DestEHPad; + for (BasicBlock::iterator I = BB->begin(), + IE = BB->getFirstNonPHI()->getIterator(); + I != IE;) { + // The iterator must be incremented here because the instructions are + // being moved to another block. + PHINode *PN = cast<PHINode>(I++); + if (PN->use_empty()) + // If the PHI node has no uses, just leave it. It will be erased + // when we erase BB below. + continue; + + // Otherwise, sink this PHI node into UnwindDest. + // Any predecessors to UnwindDest which are not already represented + // must be back edges which inherit the value from the path through + // BB. In this case, the PHI value must reference itself. + for (auto *pred : predecessors(UnwindDest)) + if (pred != BB) + PN->addIncoming(PN, pred); + PN->moveBefore(InsertPt); + } + } + + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { + // The iterator must be updated here because we are removing this pred. + BasicBlock *PredBB = *PI++; + if (UnwindDest == nullptr) { + removeUnwindEdge(PredBB); + } else { + TerminatorInst *TI = PredBB->getTerminator(); + TI->replaceUsesOfWith(BB, UnwindDest); + } + } + + // The cleanup pad is now unreachable. Zap it. + BB->eraseFromParent(); + return true; +} + bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; @@ -3003,8 +3439,8 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // If there are any instructions immediately before the unreachable that can // be removed, do so. - while (UI != BB->begin()) { - BasicBlock::iterator BBI = UI; + while (UI->getIterator() != BB->begin()) { + BasicBlock::iterator BBI = UI->getIterator(); --BBI; // Do not delete instructions that can have side effects which might cause // the unreachable to not be reachable; specifically, calls and volatile @@ -3075,26 +3511,18 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { --i; --e; Changed = true; } - } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) { - if (II->getUnwindDest() == BB) { - // Convert the invoke to a call instruction. This would be a good - // place to note that the call does not throw though. - BranchInst *BI = Builder.CreateBr(II->getNormalDest()); - II->removeFromParent(); // Take out of symbol table - - // Insert the call now... - SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); - Builder.SetInsertPoint(BI); - CallInst *CI = Builder.CreateCall(II->getCalledValue(), - Args, II->getName()); - CI->setCallingConv(II->getCallingConv()); - CI->setAttributes(II->getAttributes()); - // If the invoke produced a value, the call does now instead. - II->replaceAllUsesWith(CI); - delete II; - Changed = true; - } + } else if ((isa<InvokeInst>(TI) && + cast<InvokeInst>(TI)->getUnwindDest() == BB) || + isa<CatchSwitchInst>(TI)) { + removeUnwindEdge(TI->getParent()); + Changed = true; + } else if (isa<CleanupReturnInst>(TI)) { + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; } + // TODO: We can remove a catchswitch if all it's catchpads end in + // unreachable. } // If this block is now dead, remove it. @@ -3249,6 +3677,29 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, } } + // If we can prove that the cases must cover all possible values, the + // default destination becomes dead and we can remove it. If we know some + // of the bits in the value, we can use that to more precisely compute the + // number of possible unique case values. + bool HasDefault = + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + const unsigned NumUnknownBits = Bits - + (KnownZero.Or(KnownOne)).countPopulation(); + assert(NumUnknownBits <= Bits); + if (HasDefault && DeadCases.empty() && + NumUnknownBits < 64 /* avoid overflow */ && + SI->getNumCases() == (1ULL << NumUnknownBits)) { + DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); + BasicBlock *NewDefault = SplitBlockPredecessors(SI->getDefaultDest(), + SI->getParent(), ""); + SI->setDefaultDest(&*NewDefault); + SplitBlock(&*NewDefault, &NewDefault->front()); + auto *OldTI = NewDefault->getTerminator(); + new UnreachableInst(SI->getContext(), OldTI); + EraseTerminatorInstAndDCECond(OldTI); + return true; + } + SmallVector<uint64_t, 8> Weights; bool HasWeight = HasBranchWeights(SI); if (HasWeight) { @@ -3439,7 +3890,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, } else if (isa<DbgInfoIntrinsic>(I)) { // Skip debug intrinsic. continue; - } else if (Constant *C = ConstantFold(I, DL, ConstantPool)) { + } else if (Constant *C = ConstantFold(&*I, DL, ConstantPool)) { // Instruction is side-effect free and constant. // If the instruction has uses outside this block or a phi node slot for @@ -3456,7 +3907,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, return false; } - ConstantPool.insert(std::make_pair(I, C)); + ConstantPool.insert(std::make_pair(&*I, C)); } else { break; } @@ -3664,7 +4115,7 @@ namespace { /// Return true if a table with TableSize elements of /// type ElementType would fit in a target-legal register. static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, - const Type *ElementType); + Type *ElementType); private: // Depending on the contents of the table, it can be represented in @@ -3880,8 +4331,8 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, - const Type *ElementType) { - const IntegerType *IT = dyn_cast<IntegerType>(ElementType); + Type *ElementType) { + auto *IT = dyn_cast<IntegerType>(ElementType); if (!IT) return false; // FIXME: If the type is wider than it needs to be, e.g. i8 but all values @@ -3992,7 +4443,7 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, assert((CaseConst == TrueConst || CaseConst == FalseConst) && "Expect true or false as compare result."); } - + // Check if the branch instruction dominates the phi node. It's a simple // dominance check, but sufficient for our needs. // Although this check is invariant in the calling loops, it's better to do it @@ -4422,7 +4873,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ return true; // If the Terminator is the only non-phi instruction, simplify the block. - BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(); + BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; @@ -4457,6 +4908,16 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ return false; } +static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) { + BasicBlock *PredPred = nullptr; + for (auto *P : predecessors(BB)) { + BasicBlock *PPred = P->getSinglePredecessor(); + if (!PPred || (PredPred && PredPred != PPred)) + return nullptr; + PredPred = PPred; + } + return PredPred; +} bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); @@ -4537,9 +4998,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI)) + if (SimplifyCondBranchToCondBranch(PBI, BI, DL)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + // Look for diamond patterns. + if (MergeCondStores) + if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) + if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (mergeConditionalStores(PBI, BI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return false; } @@ -4663,6 +5132,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { if (SimplifyReturn(RI, Builder)) return true; } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { if (SimplifyResume(RI, Builder)) return true; + } else if (CleanupReturnInst *RI = + dyn_cast<CleanupReturnInst>(BB->getTerminator())) { + if (SimplifyCleanupReturn(RI)) return true; } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { if (SimplifySwitch(SI, Builder)) return true; } else if (UnreachableInst *UI = diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index ab30aa1..ddd8775 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -47,15 +47,16 @@ namespace { Loop *L; LoopInfo *LI; ScalarEvolution *SE; + DominatorTree *DT; SmallVectorImpl<WeakVH> &DeadInsts; bool Changed; public: - SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LoopInfo *LI, - SmallVectorImpl<WeakVH> &Dead) - : L(Loop), LI(LI), SE(SE), DeadInsts(Dead), Changed(false) { + SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead) + : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -63,11 +64,13 @@ namespace { /// Iteratively perform simplification on a worklist of users of the /// specified induction variable. This is the top-level driver that applies - /// all simplicitions to users of an IV. + /// all simplifications to users of an IV. void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); + bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); + bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, @@ -166,19 +169,65 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { S = SE->getSCEVAtScope(S, ICmpLoop); X = SE->getSCEVAtScope(X, ICmpLoop); + ICmpInst::Predicate InvariantPredicate; + const SCEV *InvariantLHS, *InvariantRHS; + // If the condition is always true or always false, replace it with // a constant value. - if (SE->isKnownPredicate(Pred, S, X)) + if (SE->isKnownPredicate(Pred, S, X)) { ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); - else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) + DeadInsts.emplace_back(ICmp); + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) { ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); - else + DeadInsts.emplace_back(ICmp); + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + } else if (isa<PHINode>(IVOperand) && + SE->isLoopInvariantPredicate(Pred, S, X, ICmpLoop, + InvariantPredicate, InvariantLHS, + InvariantRHS)) { + + // Rewrite the comparison to a loop invariant comparison if it can be done + // cheaply, where cheaply means "we don't need to emit any new + // instructions". + + Value *NewLHS = nullptr, *NewRHS = nullptr; + + if (S == InvariantLHS || X == InvariantLHS) + NewLHS = + ICmp->getOperand(S == InvariantLHS ? IVOperIdx : (1 - IVOperIdx)); + + if (S == InvariantRHS || X == InvariantRHS) + NewRHS = + ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx)); + + for (Value *Incoming : cast<PHINode>(IVOperand)->incoming_values()) { + if (NewLHS && NewRHS) + break; + + const SCEV *IncomingS = SE->getSCEV(Incoming); + + if (!NewLHS && IncomingS == InvariantLHS) + NewLHS = Incoming; + if (!NewRHS && IncomingS == InvariantRHS) + NewRHS = Incoming; + } + + if (!NewLHS || !NewRHS) + // We could not find an existing value to replace either LHS or RHS. + // Generating new instructions has subtler tradeoffs, so avoid doing that + // for now. + return; + + DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); + ICmp->setPredicate(InvariantPredicate); + ICmp->setOperand(0, NewLHS); + ICmp->setOperand(1, NewRHS); + } else return; - DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); ++NumElimCmp; Changed = true; - DeadInsts.emplace_back(ICmp); } /// SimplifyIVUsers helper for eliminating useless @@ -207,8 +256,7 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, Rem->replaceAllUsesWith(Rem->getOperand(0)); else { // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). - const SCEV *LessOne = - SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); + const SCEV *LessOne = SE->getMinusSCEV(S, SE->getOne(S->getType())); if (IsSigned && !SE->isKnownNonNegative(LessOne)) return; @@ -232,9 +280,9 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, DeadInsts.emplace_back(Rem); } -/// Eliminate an operation that consumes a simple IV and has -/// no observable side-effect given the range of IV values. -/// IVOperand is guaranteed SCEVable, but UseInst may not be. +/// Eliminate an operation that consumes a simple IV and has no observable +/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, +/// but UseInst may not be. bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, Instruction *IVOperand) { if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { @@ -249,12 +297,45 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, } } - // Eliminate any operation that SCEV can prove is an identity function. + if (eliminateIdentitySCEV(UseInst, IVOperand)) + return true; + + return false; +} + +/// Eliminate any operation that SCEV can prove is an identity function. +bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, + Instruction *IVOperand) { if (!SE->isSCEVable(UseInst->getType()) || (UseInst->getType() != IVOperand->getType()) || (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) return false; + // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the + // dominator tree, even if X is an operand to Y. For instance, in + // + // %iv = phi i32 {0,+,1} + // br %cond, label %left, label %merge + // + // left: + // %X = add i32 %iv, 0 + // br label %merge + // + // merge: + // %M = phi (%X, %iv) + // + // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and + // %M.replaceAllUsesWith(%X) would be incorrect. + + if (isa<PHINode>(UseInst)) + // If UseInst is not a PHI node then we know that IVOperand dominates + // UseInst directly from the legality of SSA. + if (!DT || !DT->dominates(IVOperand, UseInst)) + return false; + + if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand)) + return false; + DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); UseInst->replaceAllUsesWith(IVOperand); @@ -436,8 +517,8 @@ static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { /// This algorithm does not require IVUsers analysis. Instead, it simplifies /// instructions in-place during analysis. Rather than rewriting induction /// variables bottom-up from their users, it transforms a chain of IVUsers -/// top-down, updating the IR only when it encouters a clear optimization -/// opportunitiy. +/// top-down, updating the IR only when it encounters a clear optimization +/// opportunity. /// /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. /// @@ -513,22 +594,21 @@ void IVVisitor::anchor() { } /// Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. -bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl<WeakVH> &Dead, IVVisitor *V) -{ - LoopInfo *LI = &LPM->getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LI, Dead); +bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead, + IVVisitor *V) { + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead); SIV.simplifyUsers(CurrIV, V); return SIV.hasChanged(); } /// Simplify users of induction variables within this /// loop. This does not actually change or add IVs. -bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl<WeakVH> &Dead) { +bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) { bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { - Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead); + Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead); } return Changed; } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp index c499c87..d5377f9 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -20,12 +20,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -64,7 +64,7 @@ namespace { // Here be subtlety: the iterator must be incremented before the loop // body (not sure why), so a range-for loop won't work here. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - Instruction *I = BI++; + Instruction *I = &*BI++; // The first time through the loop ToSimplify is empty and we try to // simplify all instructions. On later iterations ToSimplify is not // empty and we only bother simplifying instructions that are in it. diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 6bbf828..81dea6d 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" @@ -30,8 +31,8 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace PatternMatch; @@ -52,16 +53,8 @@ static cl::opt<bool> //===----------------------------------------------------------------------===// static bool ignoreCallingConv(LibFunc::Func Func) { - switch (Func) { - case LibFunc::abs: - case LibFunc::labs: - case LibFunc::llabs: - case LibFunc::strlen: - return true; - default: - return false; - } - llvm_unreachable("All cases should be covered in the switch."); + return Func == LibFunc::abs || Func == LibFunc::labs || + Func == LibFunc::llabs || Func == LibFunc::strlen; } /// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the @@ -93,16 +86,13 @@ static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { } static bool callHasFloatingPointArgument(const CallInst *CI) { - for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); - it != e; ++it) { - if ((*it)->getType()->isFloatingPointTy()) - return true; - } - return false; + return std::any_of(CI->op_begin(), CI->op_end(), [](const Use &OI) { + return OI->getType()->isFloatingPointTy(); + }); } /// \brief Check whether the overloaded unary floating point function -/// corresponing to \a Ty is available. +/// corresponding to \a Ty is available. static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, LibFunc::Func DoubleFn, LibFunc::Func FloatFn, LibFunc::Func LongDoubleFn) { @@ -116,6 +106,23 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, } } +/// \brief Check whether we can use unsafe floating point math for +/// the function passed as input. +static bool canUseUnsafeFPMath(Function *F) { + + // FIXME: For finer-grain optimization, we need intrinsics to have the same + // fast-math flag decorations that are applied to FP instructions. For now, + // we have to rely on the function-level unsafe-fp-math attribute to do this + // optimization because there's no other way to express that the call can be + // relaxed. + if (F->hasFnAttribute("unsafe-fp-math")) { + Attribute Attr = F->getFnAttribute("unsafe-fp-math"); + if (Attr.getValueAsString() == "true") + return true; + } + return false; +} + /// \brief Returns whether \p F matches the signature expected for the /// string/memory copying library function \p Func. /// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset. @@ -467,9 +474,6 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // Verify the "stpcpy" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy)) return nullptr; @@ -484,7 +488,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { if (Len == 0) return nullptr; - Type *PT = FT->getParamType(0); + Type *PT = Callee->getFunctionType()->getParamType(0); Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); @@ -497,8 +501,6 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy)) return nullptr; @@ -531,7 +533,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { if (Len > SrcLen + 1) return nullptr; - Type *PT = FT->getParamType(0); + Type *PT = Callee->getFunctionType()->getParamType(0); // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1); @@ -862,6 +864,27 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { return B.CreateSub(LHSV, RHSV, "chardiff"); } + // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0 + if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) { + + IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8); + unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType); + + if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment && + getKnownAlignment(RHS, DL, CI) >= PrefAlignment) { + + Type *LHSPtrTy = + IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); + Type *RHSPtrTy = + IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); + + Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv"); + Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv"); + + return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); + } + } + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) StringRef LHSStr, RHSStr; if (getConstantStringInfo(LHS, LHSStr) && @@ -972,7 +995,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, // floor((double)floatval) -> (double)floorf(floatval) if (Callee->isIntrinsic()) { - Module *M = CI->getParent()->getParent()->getParent(); + Module *M = CI->getModule(); Intrinsic::ID IID = Callee->getIntrinsicID(); Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); V = B.CreateCall(F, V); @@ -1015,9 +1038,9 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; - if (UnsafeFPShrink && Callee->getName() == "cos" && TLI->has(LibFunc::cosf)) { + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - } FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the @@ -1035,13 +1058,37 @@ Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) { return Ret; } +static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { + // Multiplications calculated using Addition Chains. + // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html + + assert(Exp != 0 && "Incorrect exponent 0 not handled"); + + if (InnerChain[Exp]) + return InnerChain[Exp]; + + static const unsigned AddChain[33][2] = { + {0, 0}, // Unused. + {0, 0}, // Unused (base case = pow1). + {1, 1}, // Unused (pre-computed). + {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4}, + {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7}, + {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10}, + {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13}, + {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16}, + }; + + InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B), + getPow(InnerChain, AddChain[Exp][1], B)); + return InnerChain[Exp]; +} + Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; - if (UnsafeFPShrink && Callee->getName() == "pow" && TLI->has(LibFunc::powf)) { + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - } FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the @@ -1060,7 +1107,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { if (Op1C->isExactlyValue(2.0) && hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f, LibFunc::exp2l)) - return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); + return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp2), B, + Callee->getAttributes()); // pow(10.0, x) -> exp10(x) if (Op1C->isExactlyValue(10.0) && hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f, @@ -1069,6 +1117,32 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Callee->getAttributes()); } + bool unsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent()); + + // pow(exp(x), y) -> exp(x*y) + // pow(exp2(x), y) -> exp2(x * y) + // We enable these only under fast-math. Besides rounding + // differences the transformation changes overflow and + // underflow behavior quite dramatically. + // Example: x = 1000, y = 0.001. + // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). + if (unsafeFPMath) { + if (auto *OpC = dyn_cast<CallInst>(Op1)) { + IRBuilder<>::FastMathFlagGuard Guard(B); + FastMathFlags FMF; + FMF.setUnsafeAlgebra(); + B.SetFastMathFlags(FMF); + + LibFunc::Func Func; + Function *OpCCallee = OpC->getCalledFunction(); + if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && + TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) + return EmitUnaryFloatFnCall( + B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"), + OpCCallee->getName(), B, OpCCallee->getAttributes()); + } + } + ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); if (!Op2C) return Ret; @@ -1081,10 +1155,15 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { LibFunc::sqrtl) && hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf, LibFunc::fabsl)) { + + // In -ffast-math, pow(x, 0.5) -> sqrt(x). + if (unsafeFPMath) + return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, + Callee->getAttributes()); + // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). // This is faster than calling pow, and still handles negative zero // and negative infinity correctly. - // TODO: In fast-math mode, this could be just sqrt(x). // TODO: In finite-only mode, this could be just fabs(sqrt(x)). Value *Inf = ConstantFP::getInfinity(CI->getType()); Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); @@ -1102,18 +1181,42 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return B.CreateFMul(Op1, Op1, "pow2"); if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); + + // In -ffast-math, generate repeated fmul instead of generating pow(x, n). + if (unsafeFPMath) { + APFloat V = abs(Op2C->getValueAPF()); + // We limit to a max of 7 fmul(s). Thus max exponent is 32. + // This transformation applies to integer exponents only. + if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan || + !V.isInteger()) + return nullptr; + + // We will memoize intermediate products of the Addition Chain. + Value *InnerChain[33] = {nullptr}; + InnerChain[1] = Op1; + InnerChain[2] = B.CreateFMul(Op1, Op1); + + // We cannot readily convert a non-double type (like float) to a double. + // So we first convert V to something which could be converted to double. + bool ignored; + V.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + Value *FMul = getPow(InnerChain, V.convertToDouble(), B); + // For negative exponents simply compute the reciprocal. + if (Op2C->isNegative()) + FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul); + return FMul; + } + return nullptr; } Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Function *Caller = CI->getParent()->getParent(); - Value *Ret = nullptr; - if (UnsafeFPShrink && Callee->getName() == "exp2" && - TLI->has(LibFunc::exp2f)) { + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - } FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the @@ -1162,11 +1265,10 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; - if (Callee->getName() == "fabs" && TLI->has(LibFunc::fabsf)) { + StringRef Name = Callee->getName(); + if (Name == "fabs" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, false); - } FunctionType *FT = Callee->getFunctionType(); // Make sure this has 1 argument of FP type which matches the result type. @@ -1184,6 +1286,105 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { return Ret; } +Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { + // If we can shrink the call to a float function rather than a double + // function, do that first. + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + if ((Name == "fmin" && hasFloatVersion(Name)) || + (Name == "fmax" && hasFloatVersion(Name))) { + Value *Ret = optimizeBinaryDoubleFP(CI, B); + if (Ret) + return Ret; + } + + // Make sure this has 2 arguments of FP type which match the result type. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + !FT->getParamType(0)->isFloatingPointTy()) + return nullptr; + + IRBuilder<>::FastMathFlagGuard Guard(B); + FastMathFlags FMF; + Function *F = CI->getParent()->getParent(); + if (canUseUnsafeFPMath(F)) { + // Unsafe algebra sets all fast-math-flags to true. + FMF.setUnsafeAlgebra(); + } else { + // At a minimum, no-nans-fp-math must be true. + Attribute Attr = F->getFnAttribute("no-nans-fp-math"); + if (Attr.getValueAsString() != "true") + return nullptr; + // No-signed-zeros is implied by the definitions of fmax/fmin themselves: + // "Ideally, fmax would be sensitive to the sign of zero, for example + // fmax(-0. 0, +0. 0) would return +0; however, implementation in software + // might be impractical." + FMF.setNoSignedZeros(); + FMF.setNoNaNs(); + } + B.SetFastMathFlags(FMF); + + // We have a relaxed floating-point environment. We can ignore NaN-handling + // and transform to a compare and select. We do not have to consider errno or + // exceptions, because fmin/fmax do not have those. + Value *Op0 = CI->getArgOperand(0); + Value *Op1 = CI->getArgOperand(1); + Value *Cmp = Callee->getName().startswith("fmin") ? + B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1); + return B.CreateSelect(Cmp, Op0, Op1); +} + +Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && hasFloatVersion(Name)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + FunctionType *FT = Callee->getFunctionType(); + + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + return Ret; + Value *Op1 = CI->getArgOperand(0); + auto *OpC = dyn_cast<CallInst>(Op1); + if (!OpC) + return Ret; + + // log(pow(x,y)) -> y*log(x) + // This is only applicable to log, log2, log10. + if (Name != "log" && Name != "log2" && Name != "log10") + return Ret; + + IRBuilder<>::FastMathFlagGuard Guard(B); + FastMathFlags FMF; + FMF.setUnsafeAlgebra(); + B.SetFastMathFlags(FMF); + + LibFunc::Func Func; + Function *F = OpC->getCalledFunction(); + if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && + Func == LibFunc::pow) || F->getIntrinsicID() == Intrinsic::pow)) + return B.CreateFMul(OpC->getArgOperand(1), + EmitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B, + Callee->getAttributes()), "mul"); + + // log(exp2(y)) -> y*log(2) + if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) && + TLI->has(Func) && Func == LibFunc::exp2) + return B.CreateFMul( + OpC->getArgOperand(0), + EmitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0), + Callee->getName(), B, Callee->getAttributes()), + "logmul"); + return Ret; +} + Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); @@ -1191,19 +1392,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); + if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + return Ret; - // FIXME: For finer-grain optimization, we need intrinsics to have the same - // fast-math flag decorations that are applied to FP instructions. For now, - // we have to rely on the function-level unsafe-fp-math attribute to do this - // optimization because there's no other way to express that the sqrt can be - // reassociated. - Function *F = CI->getParent()->getParent(); - if (F->hasFnAttribute("unsafe-fp-math")) { - // Check for unsafe-fp-math = true. - Attribute Attr = F->getFnAttribute("unsafe-fp-math"); - if (Attr.getValueAsString() != "true") - return Ret; - } Value *Op = CI->getArgOperand(0); if (Instruction *I = dyn_cast<Instruction>(Op)) { if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) { @@ -1238,8 +1429,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { // and multiply. // FIXME: We're not checking the sqrt because it doesn't have // fast-math-flags (see earlier comment). - IRBuilder<true, ConstantFolder, - IRBuilderDefaultInserter<true> >::FastMathFlagGuard Guard(B); + IRBuilder<>::FastMathFlagGuard Guard(B); B.SetFastMathFlags(I->getFastMathFlags()); // If we found a repeated factor, hoist it out of the square root and // replace it with the fabs of that factor. @@ -1262,6 +1452,40 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { return Ret; } +Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + FunctionType *FT = Callee->getFunctionType(); + + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + return Ret; + Value *Op1 = CI->getArgOperand(0); + auto *OpC = dyn_cast<CallInst>(Op1); + if (!OpC) + return Ret; + + // tan(atan(x)) -> x + // tanf(atanf(x)) -> x + // tanl(atanl(x)) -> x + LibFunc::Func Func; + Function *F = OpC->getCalledFunction(); + if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && + ((Func == LibFunc::atan && Callee->getName() == "tan") || + (Func == LibFunc::atanf && Callee->getName() == "tanf") || + (Func == LibFunc::atanl && Callee->getName() == "tanl"))) + Ret = OpC->getArgOperand(0); + return Ret; +} + static bool isTrigLibCall(CallInst *CI); static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, bool UseFloat, Value *&Sin, Value *&Cos, @@ -1329,9 +1553,9 @@ LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat, return; Function *Callee = CI->getCalledFunction(); - StringRef FuncName = Callee->getName(); LibFunc::Func Func; - if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) || !isTrigLibCall(CI)) + if (!Callee || !TLI->getLibFunc(Callee->getName(), Func) || !TLI->has(Func) || + !isTrigLibCall(CI)) return; if (IsFloat) { @@ -1353,10 +1577,8 @@ LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat, void LibCallSimplifier::replaceTrigInsts(SmallVectorImpl<CallInst *> &Calls, Value *Res) { - for (SmallVectorImpl<CallInst *>::iterator I = Calls.begin(), E = Calls.end(); - I != E; ++I) { - replaceAllUsesWith(*I, Res); - } + for (CallInst *C : Calls) + replaceAllUsesWith(C, Res); } void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, @@ -1387,8 +1609,7 @@ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { // If the argument is an instruction, it must dominate all uses so put our // sincos call there. - BasicBlock::iterator Loc = ArgInst; - B.SetInsertPoint(ArgInst->getParent(), ++Loc); + B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); } else { // Otherwise (e.g. for a constant) the beginning of the function is as // good a place as any. @@ -1413,15 +1634,16 @@ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, // Integer Library Call Optimizations //===----------------------------------------------------------------------===// +static bool checkIntUnaryReturnAndParam(Function *Callee) { + FunctionType *FT = Callee->getFunctionType(); + return FT->getNumParams() == 1 && FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0)->isIntegerTy(); +} + Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 2 arguments of the same FP type, which match the - // result type. - if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy(32) || - !FT->getParamType(0)->isIntegerTy()) + if (!checkIntUnaryReturnAndParam(Callee)) return nullptr; - Value *Op = CI->getArgOperand(0); // Constant fold. @@ -1436,7 +1658,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType); - Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz"); + Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); V = B.CreateIntCast(V, B.getInt32Ty(), false); @@ -1461,11 +1683,7 @@ Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // We require integer(i32) - if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || - !FT->getParamType(0)->isIntegerTy(32)) + if (!checkIntUnaryReturnAndParam(CI->getCalledFunction())) return nullptr; // isdigit(c) -> (c-'0') <u 10 @@ -1476,11 +1694,7 @@ Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // We require integer(i32) - if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || - !FT->getParamType(0)->isIntegerTy(32)) + if (!checkIntUnaryReturnAndParam(CI->getCalledFunction())) return nullptr; // isascii(c) -> c <u 128 @@ -1490,11 +1704,7 @@ Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // We require i32(i32) - if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isIntegerTy(32)) + if (!checkIntUnaryReturnAndParam(CI->getCalledFunction())) return nullptr; // toascii(c) -> c & 0x7f @@ -1529,10 +1739,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B, } static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) { - if (!ColdErrorCalls) - return false; - - if (!Callee || !Callee->isDeclaration()) + if (!ColdErrorCalls || !Callee || !Callee->isDeclaration()) return false; if (StreamArg < 0) @@ -1968,16 +2175,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { // Command-line parameter overrides function attribute. if (EnableUnsafeFPShrink.getNumOccurrences() > 0) UnsafeFPShrink = EnableUnsafeFPShrink; - else if (Callee->hasFnAttribute("unsafe-fp-math")) { - // FIXME: This is the same problem as described in optimizeSqrt(). - // If calls gain access to IR-level FMF, then use that instead of a - // function attribute. - - // Check for unsafe-fp-math = true. - Attribute Attr = Callee->getFnAttribute("unsafe-fp-math"); - if (Attr.getValueAsString() == "true") - UnsafeFPShrink = true; - } + else if (canUseUnsafeFPMath(Callee)) + UnsafeFPShrink = true; // First, check for intrinsics. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { @@ -1990,6 +2189,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeExp2(CI, Builder); case Intrinsic::fabs: return optimizeFabs(CI, Builder); + case Intrinsic::log: + return optimizeLog(CI, Builder); case Intrinsic::sqrt: return optimizeSqrt(CI, Builder); default: @@ -2001,13 +2202,17 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) { // Try to further simplify the result. CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI); - if (SimplifiedCI && SimplifiedCI->getCalledFunction()) - if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) { + if (SimplifiedCI && SimplifiedCI->getCalledFunction()) { + // Use an IR Builder from SimplifiedCI if available instead of CI + // to guarantee we reach all uses we might replace later on. + IRBuilder<> TmpBuilder(SimplifiedCI); + if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) { // If we were able to further simplify, remove the now redundant call. SimplifiedCI->replaceAllUsesWith(V); SimplifiedCI->eraseFromParent(); return V; } + } return SimplifiedFortifiedCI; } @@ -2068,8 +2273,18 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeFWrite(CI, Builder); case LibFunc::fputs: return optimizeFPuts(CI, Builder); + case LibFunc::log: + case LibFunc::log10: + case LibFunc::log1p: + case LibFunc::log2: + case LibFunc::logb: + return optimizeLog(CI, Builder); case LibFunc::puts: return optimizePuts(CI, Builder); + case LibFunc::tan: + case LibFunc::tanf: + case LibFunc::tanl: + return optimizeTan(CI, Builder); case LibFunc::perror: return optimizeErrorReporting(CI, Builder); case LibFunc::vfprintf: @@ -2097,24 +2312,23 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { case LibFunc::exp: case LibFunc::exp10: case LibFunc::expm1: - case LibFunc::log: - case LibFunc::log10: - case LibFunc::log1p: - case LibFunc::log2: - case LibFunc::logb: case LibFunc::sin: case LibFunc::sinh: - case LibFunc::tan: case LibFunc::tanh: if (UnsafeFPShrink && hasFloatVersion(FuncName)) return optimizeUnaryDoubleFP(CI, Builder, true); return nullptr; case LibFunc::copysign: - case LibFunc::fmin: - case LibFunc::fmax: if (hasFloatVersion(FuncName)) return optimizeBinaryDoubleFP(CI, Builder); return nullptr; + case LibFunc::fminf: + case LibFunc::fmin: + case LibFunc::fminl: + case LibFunc::fmaxf: + case LibFunc::fmax: + case LibFunc::fmaxl: + return optimizeFMinFMax(CI, Builder); default: return nullptr; } @@ -2133,37 +2347,27 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { Replacer(I, With); } -/*static*/ void LibCallSimplifier::replaceAllUsesWithDefault(Instruction *I, - Value *With) { - I->replaceAllUsesWith(With); - I->eraseFromParent(); -} - // TODO: // Additional cases that we need to add to this file: // // cbrt: // * cbrt(expN(X)) -> expN(x/3) // * cbrt(sqrt(x)) -> pow(x,1/6) -// * cbrt(sqrt(x)) -> pow(x,1/9) +// * cbrt(cbrt(x)) -> pow(x,1/9) // // exp, expf, expl: // * exp(log(x)) -> x // // log, logf, logl: // * log(exp(x)) -> x -// * log(x**y) -> y*log(x) // * log(exp(y)) -> y*log(e) -// * log(exp2(y)) -> y*log(2) // * log(exp10(y)) -> y*log(10) // * log(sqrt(x)) -> 0.5*log(x) -// * log(pow(x,y)) -> y*log(x) // // lround, lroundf, lroundl: // * lround(cnst) -> cnst' // // pow, powf, powl: -// * pow(exp(x),y) -> exp(x*y) // * pow(sqrt(x),y) -> pow(x,y*0.5) // * pow(pow(x,y),z)-> pow(x,y*z) // @@ -2179,9 +2383,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { // * sqrt(Nroot(x)) -> pow(x,1/(2*N)) // * sqrt(pow(x,y)) -> pow(|x|,y*0.5) // -// tan, tanf, tanl: -// * tan(atan(x)) -> x -// // trunc, truncf, truncl: // * trunc(cnst) -> cnst' // diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp new file mode 100644 index 0000000..ad6b782 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp @@ -0,0 +1,85 @@ +//===- SplitModule.cpp - Split a module into partitions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the function llvm::SplitModule, which splits a module +// into multiple linkable partitions. It can be used to implement parallel code +// generation for link-time optimization. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SplitModule.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +static void externalize(GlobalValue *GV) { + if (GV->hasLocalLinkage()) { + GV->setLinkage(GlobalValue::ExternalLinkage); + GV->setVisibility(GlobalValue::HiddenVisibility); + } + + // Unnamed entities must be named consistently between modules. setName will + // give a distinct name to each such entity. + if (!GV->hasName()) + GV->setName("__llvmsplit_unnamed"); +} + +// Returns whether GV should be in partition (0-based) I of N. +static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) { + if (auto GA = dyn_cast<GlobalAlias>(GV)) + if (const GlobalObject *Base = GA->getBaseObject()) + GV = Base; + + StringRef Name; + if (const Comdat *C = GV->getComdat()) + Name = C->getName(); + else + Name = GV->getName(); + + // Partition by MD5 hash. We only need a few bits for evenness as the number + // of partitions will generally be in the 1-2 figure range; the low 16 bits + // are enough. + MD5 H; + MD5::MD5Result R; + H.update(Name); + H.final(R); + return (R[0] | (R[1] << 8)) % N == I; +} + +void llvm::SplitModule( + std::unique_ptr<Module> M, unsigned N, + std::function<void(std::unique_ptr<Module> MPart)> ModuleCallback) { + for (Function &F : *M) + externalize(&F); + for (GlobalVariable &GV : M->globals()) + externalize(&GV); + for (GlobalAlias &GA : M->aliases()) + externalize(&GA); + + // FIXME: We should be able to reuse M as the last partition instead of + // cloning it. + for (unsigned I = 0; I != N; ++I) { + ValueToValueMapTy VMap; + std::unique_ptr<Module> MPart( + CloneModule(M.get(), VMap, [=](const GlobalValue *GV) { + return isInPartition(GV, I, N); + })); + if (I != 0) + MPart->setModuleInlineAsm(""); + ModuleCallback(std::move(MPart)); + } +} diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp index a2a54da..1d1f602 100644 --- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp @@ -69,7 +69,6 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" using namespace llvm; diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 7e00a80..6b1d1da 100644 --- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -50,11 +50,11 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // std::vector<BasicBlock*> ReturningBlocks; std::vector<BasicBlock*> UnreachableBlocks; - for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - if (isa<ReturnInst>(I->getTerminator())) - ReturningBlocks.push_back(I); - else if (isa<UnreachableInst>(I->getTerminator())) - UnreachableBlocks.push_back(I); + for (BasicBlock &I : F) + if (isa<ReturnInst>(I.getTerminator())) + ReturningBlocks.push_back(&I); + else if (isa<UnreachableInst>(I.getTerminator())) + UnreachableBlocks.push_back(&I); // Then unreachable blocks. if (UnreachableBlocks.empty()) { diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 8c72641..1add78e 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -19,11 +19,14 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" using namespace llvm; // Out of line method to get vtable etc for class. void ValueMapTypeRemapper::anchor() {} void ValueMaterializer::anchor() {} +void ValueMaterializer::materializeInitFor(GlobalValue *New, GlobalValue *Old) { +} Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, @@ -35,15 +38,28 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // If we have a materializer and it can materialize a value, use that. if (Materializer) { - if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V))) - return VM[V] = NewV; + if (Value *NewV = + Materializer->materializeDeclFor(const_cast<Value *>(V))) { + VM[V] = NewV; + if (auto *NewGV = dyn_cast<GlobalValue>(NewV)) + Materializer->materializeInitFor( + NewGV, const_cast<GlobalValue *>(cast<GlobalValue>(V))); + return NewV; + } } // Global values do not need to be seeded into the VM if they // are using the identity mapping. - if (isa<GlobalValue>(V)) + if (isa<GlobalValue>(V)) { + if (Flags & RF_NullMapMissingGlobalValues) { + assert(!(Flags & RF_IgnoreMissingEntries) && + "Illegal to specify both RF_NullMapMissingGlobalValues and " + "RF_IgnoreMissingEntries"); + return nullptr; + } return VM[V] = const_cast<Value*>(V); - + } + if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { // Inline asm may need *type* remapping. FunctionType *NewTy = IA->getFunctionType(); @@ -73,7 +89,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // correct. For now, just match behaviour from before the metadata/value // split. // - // assert(MappedMD && "Referenced metadata value not in value map"); + // assert((MappedMD || (Flags & RF_NullMapMissingGlobalValues)) && + // "Referenced metadata value not in value map"); return VM[V] = MetadataAsValue::get(V->getContext(), MappedMD); } @@ -127,9 +144,13 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM, Flags, TypeMapper, Materializer)); } - + Type *NewSrcTy = nullptr; + if (TypeMapper) + if (auto *GEPO = dyn_cast<GEPOperator>(C)) + NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType()); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - return VM[V] = CE->getWithOperands(Ops, NewTy); + return VM[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy); if (isa<ConstantArray>(C)) return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops); if (isa<ConstantStruct>(C)) @@ -146,29 +167,42 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, } static Metadata *mapToMetadata(ValueToValueMapTy &VM, const Metadata *Key, - Metadata *Val) { + Metadata *Val, ValueMaterializer *Materializer, + RemapFlags Flags) { VM.MD()[Key].reset(Val); + if (Materializer && !(Flags & RF_HaveUnmaterializedMetadata)) { + auto *N = dyn_cast_or_null<MDNode>(Val); + // Need to invoke this once we have non-temporary MD. + if (!N || !N->isTemporary()) + Materializer->replaceTemporaryMetadata(Key, Val); + } return Val; } -static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) { - return mapToMetadata(VM, MD, const_cast<Metadata *>(MD)); +static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD, + ValueMaterializer *Materializer, RemapFlags Flags) { + return mapToMetadata(VM, MD, const_cast<Metadata *>(MD), Materializer, Flags); } static Metadata *MapMetadataImpl(const Metadata *MD, - SmallVectorImpl<MDNode *> &Cycles, + SmallVectorImpl<MDNode *> &DistinctWorklist, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer); -static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles, +static Metadata *mapMetadataOp(Metadata *Op, + SmallVectorImpl<MDNode *> &DistinctWorklist, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { if (!Op) return nullptr; - if (Metadata *MappedOp = - MapMetadataImpl(Op, Cycles, VM, Flags, TypeMapper, Materializer)) + + if (Materializer && !Materializer->isMetadataNeeded(Op)) + return nullptr; + + if (Metadata *MappedOp = MapMetadataImpl(Op, DistinctWorklist, VM, Flags, + TypeMapper, Materializer)) return MappedOp; // Use identity map if MappedOp is null and we can ignore missing entries. if (Flags & RF_IgnoreMissingEntries) @@ -178,89 +212,113 @@ static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles, // correct. For now, just match behaviour from before the metadata/value // split. // - // llvm_unreachable("Referenced metadata not in value map!"); + // assert((Flags & RF_NullMapMissingGlobalValues) && + // "Referenced metadata not in value map!"); return nullptr; } -/// \brief Remap nodes. +/// Resolve uniquing cycles involving the given metadata. +static void resolveCycles(Metadata *MD, bool MDMaterialized) { + if (auto *N = dyn_cast_or_null<MDNode>(MD)) { + if (!MDMaterialized && N->isTemporary()) + return; + if (!N->isResolved()) + N->resolveCycles(MDMaterialized); + } +} + +/// Remap the operands of an MDNode. /// -/// Insert \c NewNode in the value map, and then remap \c OldNode's operands. -/// Assumes that \c NewNode is already a clone of \c OldNode. +/// If \c Node is temporary, uniquing cycles are ignored. If \c Node is +/// distinct, uniquing cycles are resolved as they're found. /// -/// \pre \c NewNode is a clone of \c OldNode. -static bool remap(const MDNode *OldNode, MDNode *NewNode, - SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM, - RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - assert(OldNode->getNumOperands() == NewNode->getNumOperands() && - "Expected nodes to match"); - assert(OldNode->isResolved() && "Expected resolved node"); - assert(!NewNode->isUniqued() && "Expected non-uniqued node"); - - // Map the node upfront so it's available for cyclic references. - mapToMetadata(VM, OldNode, NewNode); - bool AnyChanged = false; - for (unsigned I = 0, E = OldNode->getNumOperands(); I != E; ++I) { - Metadata *Old = OldNode->getOperand(I); - assert(NewNode->getOperand(I) == Old && - "Expected old operands to already be in place"); +/// \pre \c Node.isDistinct() or \c Node.isTemporary(). +static bool remapOperands(MDNode &Node, + SmallVectorImpl<MDNode *> &DistinctWorklist, + ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + assert(!Node.isUniqued() && "Expected temporary or distinct node"); + const bool IsDistinct = Node.isDistinct(); - Metadata *New = mapMetadataOp(OldNode->getOperand(I), Cycles, VM, Flags, - TypeMapper, Materializer); + bool AnyChanged = false; + for (unsigned I = 0, E = Node.getNumOperands(); I != E; ++I) { + Metadata *Old = Node.getOperand(I); + Metadata *New = mapMetadataOp(Old, DistinctWorklist, VM, Flags, TypeMapper, + Materializer); if (Old != New) { AnyChanged = true; - NewNode->replaceOperandWith(I, New); + Node.replaceOperandWith(I, New); + + // Resolve uniquing cycles underneath distinct nodes on the fly so they + // don't infect later operands. + if (IsDistinct) + resolveCycles(New, !(Flags & RF_HaveUnmaterializedMetadata)); } } return AnyChanged; } -/// \brief Map a distinct MDNode. +/// Map a distinct MDNode. /// -/// Distinct nodes are not uniqued, so they must always recreated. +/// Whether distinct nodes change is independent of their operands. If \a +/// RF_MoveDistinctMDs, then they are reused, and their operands remapped in +/// place; effectively, they're moved from one graph to another. Otherwise, +/// they're cloned/duplicated, and the new copy's operands are remapped. static Metadata *mapDistinctNode(const MDNode *Node, - SmallVectorImpl<MDNode *> &Cycles, + SmallVectorImpl<MDNode *> &DistinctWorklist, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { assert(Node->isDistinct() && "Expected distinct node"); - MDNode *NewMD = MDNode::replaceWithDistinct(Node->clone()); - remap(Node, NewMD, Cycles, VM, Flags, TypeMapper, Materializer); + MDNode *NewMD; + if (Flags & RF_MoveDistinctMDs) + NewMD = const_cast<MDNode *>(Node); + else + NewMD = MDNode::replaceWithDistinct(Node->clone()); - // Track any cycles beneath this node. - for (Metadata *Op : NewMD->operands()) - if (auto *Node = dyn_cast_or_null<MDNode>(Op)) - if (!Node->isResolved()) - Cycles.push_back(Node); - - return NewMD; + // Remap operands later. + DistinctWorklist.push_back(NewMD); + return mapToMetadata(VM, Node, NewMD, Materializer, Flags); } /// \brief Map a uniqued MDNode. /// /// Uniqued nodes may not need to be recreated (they may map to themselves). static Metadata *mapUniquedNode(const MDNode *Node, - SmallVectorImpl<MDNode *> &Cycles, + SmallVectorImpl<MDNode *> &DistinctWorklist, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { - assert(Node->isUniqued() && "Expected uniqued node"); + assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isUniqued()) && + "Expected uniqued node"); - // Create a temporary node upfront in case we have a metadata cycle. + // Create a temporary node and map it upfront in case we have a uniquing + // cycle. If necessary, this mapping will get updated by RAUW logic before + // returning. auto ClonedMD = Node->clone(); - if (!remap(Node, ClonedMD.get(), Cycles, VM, Flags, TypeMapper, Materializer)) - // No operands changed, so use the identity mapping. - return mapToSelf(VM, Node); + mapToMetadata(VM, Node, ClonedMD.get(), Materializer, Flags); + if (!remapOperands(*ClonedMD, DistinctWorklist, VM, Flags, TypeMapper, + Materializer)) { + // No operands changed, so use the original. + ClonedMD->replaceAllUsesWith(const_cast<MDNode *>(Node)); + // Even though replaceAllUsesWith would have replaced the value map + // entry, we need to explictly map with the final non-temporary node + // to replace any temporary metadata via the callback. + return mapToSelf(VM, Node, Materializer, Flags); + } - // At least one operand has changed, so uniquify the cloned node. + // Uniquify the cloned node. Explicitly map it with the final non-temporary + // node so that replacement of temporary metadata via the callback occurs. return mapToMetadata(VM, Node, - MDNode::replaceWithUniqued(std::move(ClonedMD))); + MDNode::replaceWithUniqued(std::move(ClonedMD)), + Materializer, Flags); } static Metadata *MapMetadataImpl(const Metadata *MD, - SmallVectorImpl<MDNode *> &Cycles, + SmallVectorImpl<MDNode *> &DistinctWorklist, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { @@ -269,26 +327,28 @@ static Metadata *MapMetadataImpl(const Metadata *MD, return NewMD; if (isa<MDString>(MD)) - return mapToSelf(VM, MD); + return mapToSelf(VM, MD, Materializer, Flags); if (isa<ConstantAsMetadata>(MD)) if ((Flags & RF_NoModuleLevelChanges)) - return mapToSelf(VM, MD); + return mapToSelf(VM, MD, Materializer, Flags); if (const auto *VMD = dyn_cast<ValueAsMetadata>(MD)) { Value *MappedV = MapValue(VMD->getValue(), VM, Flags, TypeMapper, Materializer); if (VMD->getValue() == MappedV || (!MappedV && (Flags & RF_IgnoreMissingEntries))) - return mapToSelf(VM, MD); + return mapToSelf(VM, MD, Materializer, Flags); // FIXME: This assert crashes during bootstrap, but I think it should be // correct. For now, just match behaviour from before the metadata/value // split. // - // assert(MappedV && "Referenced metadata not in value map!"); + // assert((MappedV || (Flags & RF_NullMapMissingGlobalValues)) && + // "Referenced metadata not in value map!"); if (MappedV) - return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV)); + return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV), Materializer, + Flags); return nullptr; } @@ -299,37 +359,54 @@ static Metadata *MapMetadataImpl(const Metadata *MD, // If this is a module-level metadata and we know that nothing at the // module level is changing, then use an identity mapping. if (Flags & RF_NoModuleLevelChanges) - return mapToSelf(VM, MD); + return mapToSelf(VM, MD, Materializer, Flags); // Require resolved nodes whenever metadata might be remapped. - assert(Node->isResolved() && "Unexpected unresolved node"); + assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isResolved()) && + "Unexpected unresolved node"); + + if (Materializer && Node->isTemporary()) { + assert(Flags & RF_HaveUnmaterializedMetadata); + Metadata *TempMD = + Materializer->mapTemporaryMetadata(const_cast<Metadata *>(MD)); + // If the above callback returned an existing temporary node, use it + // instead of the current temporary node. This happens when earlier + // function importing passes already created and saved a temporary + // metadata node for the same value id. + if (TempMD) { + mapToMetadata(VM, MD, TempMD, Materializer, Flags); + return TempMD; + } + } if (Node->isDistinct()) - return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer); + return mapDistinctNode(Node, DistinctWorklist, VM, Flags, TypeMapper, + Materializer); - return mapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, Materializer); + return mapUniquedNode(Node, DistinctWorklist, VM, Flags, TypeMapper, + Materializer); } Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { - SmallVector<MDNode *, 8> Cycles; - Metadata *NewMD = - MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer); - - // Resolve cycles underneath MD. - if (NewMD && NewMD != MD) { - if (auto *N = dyn_cast<MDNode>(NewMD)) - if (!N->isResolved()) - N->resolveCycles(); - - for (MDNode *N : Cycles) - if (!N->isResolved()) - N->resolveCycles(); - } else { - // Shouldn't get unresolved cycles if nothing was remapped. - assert(Cycles.empty() && "Expected no unresolved cycles"); - } + SmallVector<MDNode *, 8> DistinctWorklist; + Metadata *NewMD = MapMetadataImpl(MD, DistinctWorklist, VM, Flags, TypeMapper, + Materializer); + + // When there are no module-level changes, it's possible that the metadata + // graph has temporaries. Skip the logic to resolve cycles, since it's + // unnecessary (and invalid) in that case. + if (Flags & RF_NoModuleLevelChanges) + return NewMD; + + // Resolve cycles involving the entry metadata. + resolveCycles(NewMD, !(Flags & RF_HaveUnmaterializedMetadata)); + + // Remap the operands of distinct MDNodes. + while (!DistinctWorklist.empty()) + remapOperands(*DistinctWorklist.pop_back_val(), DistinctWorklist, VM, Flags, + TypeMapper, Materializer); return NewMD; } @@ -374,14 +451,11 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, // Remap attached metadata. SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; I->getAllMetadata(MDs); - for (SmallVectorImpl<std::pair<unsigned, MDNode *>>::iterator - MI = MDs.begin(), - ME = MDs.end(); - MI != ME; ++MI) { - MDNode *Old = MI->second; + for (const auto &MI : MDs) { + MDNode *Old = MI.second; MDNode *New = MapMetadata(Old, VMap, Flags, TypeMapper, Materializer); if (New != Old) - I->setMetadata(MI->first, New); + I->setMetadata(MI.first, New); } if (!TypeMapper) |