diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
24 files changed, 1320 insertions, 1319 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp index 8e5a1eb..d831452 100644 --- a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -473,14 +473,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, // Check to see if this value is already used in the memory instruction's // block. If so, it's already live into the block at the very least, so we // can reasonably fold it. - BasicBlock *MemBB = MemoryInst->getParent(); - for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end(); - UI != E; ++UI) - // We know that uses of arguments and instructions have to be instructions. - if (cast<Instruction>(*UI)->getParent() == MemBB) - return true; - - return false; + return Val->isUsedInBasicBlock(MemoryInst->getParent()); } diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index a7f9efd..3859a1a 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -249,7 +249,6 @@ unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { if (Term->getSuccessor(i) == Succ) return i; } - return 0; } /// SplitEdge - Split the edge connecting specified block. Pass P must @@ -453,9 +452,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, /// of the edges being split is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - BasicBlock *const *Preds, - unsigned NumPreds, const char *Suffix, - Pass *P) { + ArrayRef<BasicBlock*> Preds, + const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); @@ -464,7 +462,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BranchInst *BI = BranchInst::Create(BB, NewBB); // Move the edges from Preds to point to NewBB instead of BB. - for (unsigned i = 0; i != NumPreds; ++i) { + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. @@ -477,7 +475,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. - if (NumPreds == 0) { + if (Preds.size() == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); @@ -486,12 +484,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; - UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), - P, HasLoopExit); + UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit); // Update the PHI nodes in BB with the values coming from NewBB. - UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI, - P, HasLoopExit); + UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit); return NewBB; } diff --git a/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp b/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp deleted file mode 100644 index 23a30cc5..0000000 --- a/contrib/llvm/lib/Transforms/Utils/BasicInliner.cpp +++ /dev/null @@ -1,182 +0,0 @@ -//===- BasicInliner.cpp - Basic function level inliner --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a simple function based inliner that does not use -// call graph information. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "basicinliner" -#include "llvm/Module.h" -#include "llvm/Function.h" -#include "llvm/Transforms/Utils/BasicInliner.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallPtrSet.h" -#include <vector> - -using namespace llvm; - -static cl::opt<unsigned> -BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200), - cl::desc("Control the amount of basic inlining to perform (default = 200)")); - -namespace llvm { - - /// BasicInlinerImpl - BasicInliner implemantation class. This hides - /// container info, used by basic inliner, from public interface. - struct BasicInlinerImpl { - - BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT - void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT - public: - BasicInlinerImpl(TargetData *T) : TD(T) {} - - /// addFunction - Add function into the list of functions to process. - /// All functions must be inserted using this interface before invoking - /// inlineFunctions(). - void addFunction(Function *F) { - Functions.push_back(F); - } - - /// neverInlineFunction - Sometimes a function is never to be inlined - /// because of one or other reason. - void neverInlineFunction(Function *F) { - NeverInline.insert(F); - } - - /// inlineFuctions - Walk all call sites in all functions supplied by - /// client. Inline as many call sites as possible. Delete completely - /// inlined functions. - void inlineFunctions(); - - private: - TargetData *TD; - std::vector<Function *> Functions; - SmallPtrSet<const Function *, 16> NeverInline; - SmallPtrSet<Function *, 8> DeadFunctions; - InlineCostAnalyzer CA; - }; - -/// inlineFuctions - Walk all call sites in all functions supplied by -/// client. Inline as many call sites as possible. Delete completely -/// inlined functions. -void BasicInlinerImpl::inlineFunctions() { - - // Scan through and identify all call sites ahead of time so that we only - // inline call sites in the original functions, not call sites that result - // from inlining other functions. - std::vector<CallSite> CallSites; - - for (std::vector<Function *>::iterator FI = Functions.begin(), - FE = Functions.end(); FI != FE; ++FI) { - Function *F = *FI; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { - CallSite CS(cast<Value>(I)); - if (CS && CS.getCalledFunction() - && !CS.getCalledFunction()->isDeclaration()) - CallSites.push_back(CS); - } - } - - DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); - - // Inline call sites. - bool Changed = false; - do { - Changed = false; - for (unsigned index = 0; index != CallSites.size() && !CallSites.empty(); - ++index) { - CallSite CS = CallSites[index]; - if (Function *Callee = CS.getCalledFunction()) { - - // Eliminate calls that are never inlinable. - if (Callee->isDeclaration() || - CS.getInstruction()->getParent()->getParent() == Callee) { - CallSites.erase(CallSites.begin() + index); - --index; - continue; - } - InlineCost IC = CA.getInlineCost(CS, NeverInline); - if (IC.isAlways()) { - DEBUG(dbgs() << " Inlining: cost=always" - <<", call: " << *CS.getInstruction()); - } else if (IC.isNever()) { - DEBUG(dbgs() << " NOT Inlining: cost=never" - <<", call: " << *CS.getInstruction()); - continue; - } else { - int Cost = IC.getValue(); - - if (Cost >= (int) BasicInlineThreshold) { - DEBUG(dbgs() << " NOT Inlining: cost = " << Cost - << ", call: " << *CS.getInstruction()); - continue; - } else { - DEBUG(dbgs() << " Inlining: cost = " << Cost - << ", call: " << *CS.getInstruction()); - } - } - - // Inline - InlineFunctionInfo IFI(0, TD); - if (InlineFunction(CS, IFI)) { - if (Callee->use_empty() && (Callee->hasLocalLinkage() || - Callee->hasAvailableExternallyLinkage())) - DeadFunctions.insert(Callee); - Changed = true; - CallSites.erase(CallSites.begin() + index); - --index; - } - } - } - } while (Changed); - - // Remove completely inlined functions from module. - for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(), - E = DeadFunctions.end(); I != E; ++I) { - Function *D = *I; - Module *M = D->getParent(); - M->getFunctionList().remove(D); - } -} - -BasicInliner::BasicInliner(TargetData *TD) { - Impl = new BasicInlinerImpl(TD); -} - -BasicInliner::~BasicInliner() { - delete Impl; -} - -/// addFunction - Add function into the list of functions to process. -/// All functions must be inserted using this interface before invoking -/// inlineFunctions(). -void BasicInliner::addFunction(Function *F) { - Impl->addFunction(F); -} - -/// neverInlineFunction - Sometimes a function is never to be inlined because -/// of one or other reason. -void BasicInliner::neverInlineFunction(Function *F) { - Impl->neverInlineFunction(F); -} - -/// inlineFuctions - Walk all call sites in all functions supplied by -/// client. Inline as many call sites as possible. Delete completely -/// inlined functions. -void BasicInliner::inlineFunctions() { - Impl->inlineFunctions(); -} - -} diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index c052910..f752d79 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -372,8 +372,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { BasicBlock *NewExitBB = - SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), - "split", P); + SplitBlockPredecessors(Exit, Preds, "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); } diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 4b5f45b..a808303 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -15,11 +15,15 @@ #include "llvm/Type.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/LLVMContext.h" #include "llvm/Intrinsics.h" +#include "llvm/ADT/SmallString.h" using namespace llvm; @@ -206,19 +210,16 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, /// 'floor'). This function is known to take a single of type matching 'Op' and /// returns one value with the same type. If 'Op' is a long double, 'l' is /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. -Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name, - IRBuilder<> &B, const AttrListPtr &Attrs) { - char NameBuffer[20]; +Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, + const AttrListPtr &Attrs) { + SmallString<20> NameBuffer; if (!Op->getType()->isDoubleTy()) { // If we need to add a suffix, copy into NameBuffer. - unsigned NameLen = strlen(Name); - assert(NameLen < sizeof(NameBuffer)-2); - memcpy(NameBuffer, Name, NameLen); + NameBuffer += Name; if (Op->getType()->isFloatTy()) - NameBuffer[NameLen] = 'f'; // floorf + NameBuffer += 'f'; // floorf else - NameBuffer[NameLen] = 'l'; // floorl - NameBuffer[NameLen+1] = 0; + NameBuffer += 'l'; // floorl Name = NameBuffer; } @@ -299,20 +300,21 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const TargetData *TD) { + const TargetData *TD, const TargetLibraryInfo *TLI) { Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), + F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI, 3), B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); else - F = M->getOrInsertFunction("fputs", B.getInt32Ty(), + F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); @@ -324,23 +326,25 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture); AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); + StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), + F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI, 3), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), TD->getIntPtrType(Context), File->getType(), NULL); else - F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context), + F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), TD->getIntPtrType(Context), diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index cf21f1e..20052a4 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -23,8 +23,11 @@ #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/SmallVector.h" #include <map> @@ -60,7 +63,6 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, if (CodeInfo) { CodeInfo->ContainsCalls |= hasCalls; - CodeInfo->ContainsUnwinds |= isa<UnwindInst>(BB->getTerminator()); CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->getEntryBlock(); @@ -75,7 +77,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, - const char *NameSuffix, ClonedCodeInfo *CodeInfo) { + const char *NameSuffix, ClonedCodeInfo *CodeInfo, + ValueMapTypeRemapper *TypeMapper) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG @@ -113,8 +116,23 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); - VMap[&BB] = CBB; // Add basic block mapping. + // Add basic block mapping. + VMap[&BB] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (BB.hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), + const_cast<BasicBlock*>(&BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); + } + + // Note return instructions for the caller. if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) Returns.push_back(RI); } @@ -126,7 +144,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Loop over all instructions, fixing each one as we find it... for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) RemapInstruction(II, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper); } /// CloneFunction - Return a copy of the specified function, but without @@ -181,7 +200,6 @@ namespace { const Function *OldFunc; ValueToValueMapTy &VMap; bool ModuleLevelChanges; - SmallVectorImpl<ReturnInst*> &Returns; const char *NameSuffix; ClonedCodeInfo *CodeInfo; const TargetData *TD; @@ -189,24 +207,18 @@ namespace { PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, bool moduleLevelChanges, - SmallVectorImpl<ReturnInst*> &returns, const char *nameSuffix, ClonedCodeInfo *codeInfo, const TargetData *td) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), - Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { + NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { } /// CloneBlock - The specified block is found to be reachable, clone it and /// anything that it can reach. void CloneBlock(const BasicBlock *BB, std::vector<const BasicBlock*> &ToClone); - - public: - /// ConstantFoldMappedInstruction - Constant fold the specified instruction, - /// mapping its operands through VMap if they are available. - Constant *ConstantFoldMappedInstruction(const Instruction *I); }; } @@ -214,7 +226,7 @@ namespace { /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, std::vector<const BasicBlock*> &ToClone){ - TrackingVH<Value> &BBEntry = VMap[BB]; + WeakVH &BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; @@ -224,25 +236,55 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BBEntry = NewBB = BasicBlock::Create(BB->getContext()); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + // + // Note that we don't need to fix the mapping for unreachable blocks; + // the default mapping there is safe. + if (BB->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), + const_cast<BasicBlock*>(BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); + } + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; // Loop over all instructions, and copy them over, DCE'ing as we go. This // loop doesn't include the terminator. for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end(); II != IE; ++II) { - // If this instruction constant folds, don't bother cloning the instruction, - // instead, just add the constant to the value map. - if (Constant *C = ConstantFoldMappedInstruction(II)) { - VMap[II] = C; - continue; + Instruction *NewInst = II->clone(); + + // Eagerly remap operands to the newly cloned instruction, except for PHI + // nodes for which we defer processing until we update the CFG. + if (!isa<PHINode>(NewInst)) { + RemapInstruction(NewInst, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + + // If we can simplify this instruction to some other value, simply add + // a mapping to that value rather than inserting a new instruction into + // the basic block. + if (Value *V = SimplifyInstruction(NewInst, TD)) { + // On the off-chance that this simplifies to an instruction in the old + // function, map it back into the new function. + if (Value *MappedV = VMap.lookup(V)) + V = MappedV; + + VMap[II] = V; + delete NewInst; + continue; + } } - Instruction *NewInst = II->clone(); if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); - NewBB->getInstList().push_back(NewInst); VMap[II] = NewInst; // Add instruction map to value. - + NewBB->getInstList().push_back(NewInst); hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (isa<ConstantInt>(AI->getArraySize())) @@ -281,7 +323,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, Cond = dyn_cast_or_null<ConstantInt>(V); } if (Cond) { // Constant fold to uncond branch! - BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond)); + SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond); + BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor()); VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; @@ -303,38 +346,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (CodeInfo) { CodeInfo->ContainsCalls |= hasCalls; - CodeInfo->ContainsUnwinds |= isa<UnwindInst>(OldTI); CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->front(); } - - if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator())) - Returns.push_back(RI); -} - -/// ConstantFoldMappedInstruction - Constant fold the specified instruction, -/// mapping its operands through VMap if they are available. -Constant *PruningFunctionCloner:: -ConstantFoldMappedInstruction(const Instruction *I) { - SmallVector<Constant*, 8> Ops; - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), - VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges))) - Ops.push_back(Op); - else - return 0; // All operands not constant! - - if (const CmpInst *CI = dyn_cast<CmpInst>(I)) - return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], - TD); - - if (const LoadInst *LI = dyn_cast<LoadInst>(I)) - if (!LI->isVolatile()) - return ConstantFoldLoadFromConstPtr(Ops[0], TD); - - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD); } /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, @@ -361,7 +376,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, - Returns, NameSuffix, CodeInfo, TD); + NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; @@ -386,29 +401,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); - - // Loop over all of the instructions in the block, fixing up operand - // references as we go. This uses VMap to do all the hard work. - // - BasicBlock::iterator I = NewBB->begin(); - - DebugLoc TheCallDL; - if (TheCall) - TheCallDL = TheCall->getDebugLoc(); - + // Handle PHI nodes specially, as we have to remove references to dead // blocks. - if (PHINode *PN = dyn_cast<PHINode>(I)) { - // Skip over all PHI nodes, remembering them for later. - BasicBlock::const_iterator OldI = BI->begin(); - for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) - PHIToResolve.push_back(cast<PHINode>(OldI)); - } - - // Otherwise, remap the rest of the instructions normally. - for (; I != NewBB->end(); ++I) - RemapInstruction(I, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) + if (const PHINode *PN = dyn_cast<PHINode>(I)) + PHIToResolve.push_back(PN); + else + break; + + // Finally, remap the terminator instructions, as those can't be remapped + // until all BBs are mapped. + RemapInstruction(NewBB->getTerminator(), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); } // Defer PHI resolution until rest of function is resolved, PHI resolution @@ -490,31 +495,55 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ++OldI; } } - // NOTE: We cannot eliminate single entry phi nodes here, because of - // VMap. Single entry phi nodes can have multiple VMap entries - // pointing at them. Thus, deleting one would require scanning the VMap - // to update any entries in it that would require that. This would be - // really slow. } - + + // Make a second pass over the PHINodes now that all of them have been + // remapped into the new function, simplifying the PHINode and performing any + // recursive simplifications exposed. This will transparently update the + // WeakVH in the VMap. Notably, we rely on that so that if we coalesce + // two PHINodes, the iteration over the old PHIs remains valid, and the + // mapping will just map us to the new node (which may not even be a PHI + // node). + for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) + if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) + recursivelySimplifyInstruction(PN, TD); + // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. - Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); + Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); + Function::iterator I = Begin; while (I != NewFunc->end()) { + // Check if this block has become dead during inlining or other + // simplifications. Note that the first block will appear dead, as it has + // not yet been wired up properly. + if (I != Begin && (pred_begin(I) == pred_end(I) || + I->getSinglePredecessor() == I)) { + BasicBlock *DeadBB = I++; + DeleteDeadBlock(DeadBB); + continue; + } + + // We need to simplify conditional branches and switches with a constant + // operand. We try to prune these out when cloning, but if the + // simplification required looking through PHI nodes, those are only + // available after forming the full basic block. That may leave some here, + // and we still want to prune the dead code as early as possible. + ConstantFoldTerminator(I); + BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } - // Note that we can't eliminate uncond branches if the destination has - // single-entry PHI nodes. Eliminating the single-entry phi nodes would - // require scanning the VMap to update any entries that point to the phi - // node. BasicBlock *Dest = BI->getSuccessor(0); - if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { + if (!Dest->getSinglePredecessor()) { ++I; continue; } - + + // We shouldn't be able to get single-entry PHI nodes here, as instsimplify + // above should have zapped all of them.. + assert(!isa<PHINode>(Dest->begin())); + // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); @@ -530,4 +559,13 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // Do not increment I, iteratively merge all things this block branches to. } + + // Make a final pass over the basic blocks from theh old function to gather + // any return instructions which survived folding. We have to do this here + // because we can iteratively remove and merge returns above. + for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]), + E = NewFunc->end(); + I != E; ++I) + if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) + Returns.push_back(RI); } diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp new file mode 100644 index 0000000..9b09915 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -0,0 +1,96 @@ +//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file holds routines to help analyse compare instructions +// and fold them into constants or other compare instructions +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CmpInstAnalysis.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" + +using namespace llvm; + +/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits +/// are carefully arranged to allow folding of expressions such as: +/// +/// (A < B) | (A > B) --> (A != B) +/// +/// Note that this is only valid if the first and second predicates have the +/// same sign. Is illegal to do: (A u< B) | (A s> B) +/// +/// Three bits are used to represent the condition, as follows: +/// 0 A > B +/// 1 A == B +/// 2 A < B +/// +/// <=> Value Definition +/// 000 0 Always false +/// 001 1 A > B +/// 010 2 A == B +/// 011 3 A >= B +/// 100 4 A < B +/// 101 5 A != B +/// 110 6 A <= B +/// 111 7 Always true +/// +unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) { + ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate() + : ICI->getPredicate(); + switch (Pred) { + // False -> 0 + case ICmpInst::ICMP_UGT: return 1; // 001 + case ICmpInst::ICMP_SGT: return 1; // 001 + case ICmpInst::ICMP_EQ: return 2; // 010 + case ICmpInst::ICMP_UGE: return 3; // 011 + case ICmpInst::ICMP_SGE: return 3; // 011 + case ICmpInst::ICMP_ULT: return 4; // 100 + case ICmpInst::ICMP_SLT: return 4; // 100 + case ICmpInst::ICMP_NE: return 5; // 101 + case ICmpInst::ICMP_ULE: return 6; // 110 + case ICmpInst::ICMP_SLE: return 6; // 110 + // True -> 7 + default: + llvm_unreachable("Invalid ICmp predicate!"); + } +} + +/// getICmpValue - This is the complement of getICmpCode, which turns an +/// opcode and two operands into either a constant true or false, or the +/// predicate for a new ICmp instruction. The sign is passed in to determine +/// which kind of predicate to use in the new icmp instruction. +/// Non-NULL return value will be a true or false constant. +/// NULL return means a new ICmp is needed. The predicate for which is +/// output in NewICmpPred. +Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, + CmpInst::Predicate &NewICmpPred) { + switch (Code) { + default: llvm_unreachable("Illegal ICmp code!"); + case 0: // False. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; + case 2: NewICmpPred = ICmpInst::ICMP_EQ; break; + case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; + case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; + case 5: NewICmpPred = ICmpInst::ICMP_NE; break; + case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; + case 7: // True. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); + } + return NULL; +} + +/// PredicatesFoldable - Return true if both predicates match sign or if at +/// least one of them is an equality comparison (which is signless). +bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { + return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || + (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || + (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); +} diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 5f47ebb..e8c0b80 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -615,9 +615,10 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, default: // Otherwise, make the default destination of the switch instruction be one // of the other successors. - TheSwitch->setOperand(0, call); - TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks)); - TheSwitch->removeCase(NumExitBlocks); // Remove redundant case + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); break; } } diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index 8cc2649..99b5830 100644 --- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -6,21 +6,12 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file provide the function DemoteRegToStack(). This function takes a -// virtual register computed by an Instruction and replaces it with a slot in -// the stack frame, allocated via alloca. It returns the pointer to the -// AllocaInst inserted. After this function is called on an instruction, we are -// guaranteed that the only user of the instruction is a store that is -// immediately after it. -// -//===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Type.h" -#include <map> +#include "llvm/ADT/DenseMap.h" using namespace llvm; /// DemoteRegToStack - This function takes a virtual register computed by an @@ -28,8 +19,7 @@ using namespace llvm; /// alloca. This allows the CFG to be changed around without fear of /// invalidating the SSA information for the value. It returns the pointer to /// the alloca inserted to create a stack slot for I. -/// -AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, +AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, Instruction *AllocaPoint) { if (I.use_empty()) { I.eraseFromParent(); @@ -47,21 +37,20 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, F->getEntryBlock().begin()); } - // Change all of the users of the instruction to read from the stack slot - // instead. + // Change all of the users of the instruction to read from the stack slot. while (!I.use_empty()) { Instruction *U = cast<Instruction>(I.use_back()); if (PHINode *PN = dyn_cast<PHINode>(U)) { // If this is a PHI node, we can't insert a load of the value before the - // use. Instead, insert the load in the predecessor block corresponding + // use. Instead insert the load in the predecessor block corresponding // to the incoming value. // // Note that if there are multiple edges from a basic block to this PHI - // node that we cannot multiple loads. The problem is that the resultant - // PHI node will have multiple values (from each load) coming in from the - // same block, which is illegal SSA form. For this reason, we keep track - // and reuse loads we insert. - std::map<BasicBlock*, Value*> Loads; + // node that we cannot have multiple loads. The problem is that the + // resulting PHI node will have multiple values (from each load) coming in + // from the same block, which is illegal SSA form. For this reason, we + // keep track of and reuse loads we insert. + DenseMap<BasicBlock*, Value*> Loads; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == &I) { Value *&V = Loads[PN->getIncomingBlock(i)]; @@ -81,9 +70,9 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, } - // Insert stores of the computed value into the stack slot. We have to be - // careful is I is an invoke instruction though, because we can't insert the - // store AFTER the terminator instruction. + // Insert stores of the computed value into the stack slot. We have to be + // careful if I is an invoke instruction, because we can't insert the store + // AFTER the terminator instruction. BasicBlock::iterator InsertPt; if (!isa<TerminatorInst>(I)) { InsertPt = &I; @@ -97,18 +86,17 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, InsertPt = II.getNormalDest()->begin(); } - for (; isa<PHINode>(InsertPt); ++InsertPt) - /* empty */; // Don't insert before any PHI nodes. - new StoreInst(&I, Slot, InsertPt); + for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) + /* empty */; // Don't insert before PHI nodes or landingpad instrs. + new StoreInst(&I, Slot, InsertPt); return Slot; } - -/// DemotePHIToStack - This function takes a virtual register computed by a phi -/// node and replaces it with a slot in the stack frame, allocated via alloca. -/// The phi node is deleted and it returns the pointer to the alloca inserted. -AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { +/// DemotePHIToStack - This function takes a virtual register computed by a PHI +/// node and replaces it with a slot in the stack frame allocated via alloca. +/// The PHI node is deleted. It returns the pointer to the alloca inserted. +AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { if (P->use_empty()) { P->eraseFromParent(); return 0; @@ -125,7 +113,7 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { F->getEntryBlock().begin()); } - // Iterate over each operand, insert store in each predecessor. + // Iterate over each operand inserting a store in each predecessor. for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) { assert(II->getParent() != P->getIncomingBlock(i) && @@ -135,12 +123,11 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { P->getIncomingBlock(i)->getTerminator()); } - // Insert load in place of the phi and replace all uses. + // Insert a load in place of the PHI and replace all uses. Value *V = new LoadInst(Slot, P->getName()+".reload", P); P->replaceAllUsesWith(V); - // Delete phi. + // Delete PHI. P->eraseFromParent(); - return Slot; } diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 5464dbc..d2b167a 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -10,13 +10,6 @@ // This file implements inlining of a function into a call site, resolving // parameters and the return value as appropriate. // -// The code in this file for handling inlines through invoke -// instructions preserves semantics only under some assumptions about -// the behavior of unwinders which correspond to gcc-style libUnwind -// exception personality functions. Eventually the IR will be -// improved to make this unnecessary, but until then, this code is -// marked [LIBUNWIND]. -// //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" @@ -38,271 +31,52 @@ #include "llvm/Support/IRBuilder.h" using namespace llvm; -bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) { - return InlineFunction(CallSite(CI), IFI); -} -bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) { - return InlineFunction(CallSite(II), IFI); -} - -// FIXME: New EH - Remove the functions marked [LIBUNWIND] when new EH is -// turned on. - -/// [LIBUNWIND] Look for an llvm.eh.exception call in the given block. -static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) { - for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) { - EHExceptionInst *exn = dyn_cast<EHExceptionInst>(i); - if (exn) return exn; - } - - return 0; -} - -/// [LIBUNWIND] Look for the 'best' llvm.eh.selector instruction for -/// the given llvm.eh.exception call. -static EHSelectorInst *findSelectorForException(EHExceptionInst *exn) { - BasicBlock *exnBlock = exn->getParent(); - - EHSelectorInst *outOfBlockSelector = 0; - for (Instruction::use_iterator - ui = exn->use_begin(), ue = exn->use_end(); ui != ue; ++ui) { - EHSelectorInst *sel = dyn_cast<EHSelectorInst>(*ui); - if (!sel) continue; - - // Immediately accept an eh.selector in the same block as the - // excepton call. - if (sel->getParent() == exnBlock) return sel; - - // Otherwise, use the first selector we see. - if (!outOfBlockSelector) outOfBlockSelector = sel; - } - - return outOfBlockSelector; +bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, + bool InsertLifetime) { + return InlineFunction(CallSite(CI), IFI, InsertLifetime); } - -/// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector -/// in the given landing pad. In principle, llvm.eh.exception is -/// required to be in the landing pad; in practice, SplitCriticalEdge -/// can break that invariant, and then inlining can break it further. -/// There's a real need for a reliable solution here, but until that -/// happens, we have some fragile workarounds here. -static EHSelectorInst *findSelectorForLandingPad(BasicBlock *lpad) { - // Look for an exception call in the actual landing pad. - EHExceptionInst *exn = findExceptionInBlock(lpad); - if (exn) return findSelectorForException(exn); - - // Okay, if that failed, look for one in an obvious successor. If - // we find one, we'll fix the IR by moving things back to the - // landing pad. - - bool dominates = true; // does the lpad dominate the exn call - BasicBlock *nonDominated = 0; // if not, the first non-dominated block - BasicBlock *lastDominated = 0; // and the block which branched to it - - BasicBlock *exnBlock = lpad; - - // We need to protect against lpads that lead into infinite loops. - SmallPtrSet<BasicBlock*,4> visited; - visited.insert(exnBlock); - - do { - // We're not going to apply this hack to anything more complicated - // than a series of unconditional branches, so if the block - // doesn't terminate in an unconditional branch, just fail. More - // complicated cases can arise when, say, sinking a call into a - // split unwind edge and then inlining it; but that can do almost - // *anything* to the CFG, including leaving the selector - // completely unreachable. The only way to fix that properly is - // to (1) prohibit transforms which move the exception or selector - // values away from the landing pad, e.g. by producing them with - // instructions that are pinned to an edge like a phi, or - // producing them with not-really-instructions, and (2) making - // transforms which split edges deal with that. - BranchInst *branch = dyn_cast<BranchInst>(&exnBlock->back()); - if (!branch || branch->isConditional()) return 0; - - BasicBlock *successor = branch->getSuccessor(0); - - // Fail if we found an infinite loop. - if (!visited.insert(successor)) return 0; - - // If the successor isn't dominated by exnBlock: - if (!successor->getSinglePredecessor()) { - // We don't want to have to deal with threading the exception - // through multiple levels of phi, so give up if we've already - // followed a non-dominating edge. - if (!dominates) return 0; - - // Otherwise, remember this as a non-dominating edge. - dominates = false; - nonDominated = successor; - lastDominated = exnBlock; - } - - exnBlock = successor; - - // Can we stop here? - exn = findExceptionInBlock(exnBlock); - } while (!exn); - - // Look for a selector call for the exception we found. - EHSelectorInst *selector = findSelectorForException(exn); - if (!selector) return 0; - - // The easy case is when the landing pad still dominates the - // exception call, in which case we can just move both calls back to - // the landing pad. - if (dominates) { - selector->moveBefore(lpad->getFirstNonPHI()); - exn->moveBefore(selector); - return selector; - } - - // Otherwise, we have to split at the first non-dominating block. - // The CFG looks basically like this: - // lpad: - // phis_0 - // insnsAndBranches_1 - // br label %nonDominated - // nonDominated: - // phis_2 - // insns_3 - // %exn = call i8* @llvm.eh.exception() - // insnsAndBranches_4 - // %selector = call @llvm.eh.selector(i8* %exn, ... - // We need to turn this into: - // lpad: - // phis_0 - // %exn0 = call i8* @llvm.eh.exception() - // %selector0 = call @llvm.eh.selector(i8* %exn0, ... - // insnsAndBranches_1 - // br label %split // from lastDominated - // nonDominated: - // phis_2 (without edge from lastDominated) - // %exn1 = call i8* @llvm.eh.exception() - // %selector1 = call i8* @llvm.eh.selector(i8* %exn1, ... - // br label %split - // split: - // phis_2 (edge from lastDominated, edge from split) - // %exn = phi ... - // %selector = phi ... - // insns_3 - // insnsAndBranches_4 - - assert(nonDominated); - assert(lastDominated); - - // First, make clones of the intrinsics to go in lpad. - EHExceptionInst *lpadExn = cast<EHExceptionInst>(exn->clone()); - EHSelectorInst *lpadSelector = cast<EHSelectorInst>(selector->clone()); - lpadSelector->setArgOperand(0, lpadExn); - lpadSelector->insertBefore(lpad->getFirstNonPHI()); - lpadExn->insertBefore(lpadSelector); - - // Split the non-dominated block. - BasicBlock *split = - nonDominated->splitBasicBlock(nonDominated->getFirstNonPHI(), - nonDominated->getName() + ".lpad-fix"); - - // Redirect the last dominated branch there. - cast<BranchInst>(lastDominated->back()).setSuccessor(0, split); - - // Move the existing intrinsics to the end of the old block. - selector->moveBefore(&nonDominated->back()); - exn->moveBefore(selector); - - Instruction *splitIP = &split->front(); - - // For all the phis in nonDominated, make a new phi in split to join - // that phi with the edge from lastDominated. - for (BasicBlock::iterator - i = nonDominated->begin(), e = nonDominated->end(); i != e; ++i) { - PHINode *phi = dyn_cast<PHINode>(i); - if (!phi) break; - - PHINode *splitPhi = PHINode::Create(phi->getType(), 2, phi->getName(), - splitIP); - phi->replaceAllUsesWith(splitPhi); - splitPhi->addIncoming(phi, nonDominated); - splitPhi->addIncoming(phi->removeIncomingValue(lastDominated), - lastDominated); - } - - // Make new phis for the exception and selector. - PHINode *exnPhi = PHINode::Create(exn->getType(), 2, "", splitIP); - exn->replaceAllUsesWith(exnPhi); - selector->setArgOperand(0, exn); // except for this use - exnPhi->addIncoming(exn, nonDominated); - exnPhi->addIncoming(lpadExn, lastDominated); - - PHINode *selectorPhi = PHINode::Create(selector->getType(), 2, "", splitIP); - selector->replaceAllUsesWith(selectorPhi); - selectorPhi->addIncoming(selector, nonDominated); - selectorPhi->addIncoming(lpadSelector, lastDominated); - - return lpadSelector; +bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, + bool InsertLifetime) { + return InlineFunction(CallSite(II), IFI, InsertLifetime); } namespace { /// A class for recording information about inlining through an invoke. class InvokeInliningInfo { - BasicBlock *OuterUnwindDest; - EHSelectorInst *OuterSelector; - BasicBlock *InnerUnwindDest; - PHINode *InnerExceptionPHI; - PHINode *InnerSelectorPHI; - SmallVector<Value*, 8> UnwindDestPHIValues; - - // FIXME: New EH - These will replace the analogous ones above. BasicBlock *OuterResumeDest; //< Destination of the invoke's unwind. BasicBlock *InnerResumeDest; //< Destination for the callee's resume. LandingPadInst *CallerLPad; //< LandingPadInst associated with the invoke. PHINode *InnerEHValuesPHI; //< PHI for EH values from landingpad insts. + SmallVector<Value*, 8> UnwindDestPHIValues; public: InvokeInliningInfo(InvokeInst *II) - : OuterUnwindDest(II->getUnwindDest()), OuterSelector(0), - InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0), - OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0), + : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0), CallerLPad(0), InnerEHValuesPHI(0) { // If there are PHI nodes in the unwind destination block, we need to keep // track of which values came into them from the invoke before removing // the edge from this block. llvm::BasicBlock *InvokeBB = II->getParent(); - BasicBlock::iterator I = OuterUnwindDest->begin(); + BasicBlock::iterator I = OuterResumeDest->begin(); for (; isa<PHINode>(I); ++I) { // Save the value to use for this edge. PHINode *PHI = cast<PHINode>(I); UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); } - // FIXME: With the new EH, this if/dyn_cast should be a 'cast'. - if (LandingPadInst *LPI = dyn_cast<LandingPadInst>(I)) { - CallerLPad = LPI; - } + CallerLPad = cast<LandingPadInst>(I); } - /// The outer unwind destination is the target of unwind edges - /// introduced for calls within the inlined function. - BasicBlock *getOuterUnwindDest() const { - return OuterUnwindDest; + /// getOuterResumeDest - The outer unwind destination is the target of + /// unwind edges introduced for calls within the inlined function. + BasicBlock *getOuterResumeDest() const { + return OuterResumeDest; } - EHSelectorInst *getOuterSelector() { - if (!OuterSelector) - OuterSelector = findSelectorForLandingPad(OuterUnwindDest); - return OuterSelector; - } - - BasicBlock *getInnerUnwindDest(); - - // FIXME: New EH - Rename when new EH is turned on. - BasicBlock *getInnerUnwindDestNewEH(); + BasicBlock *getInnerResumeDest(); LandingPadInst *getLandingPadInst() const { return CallerLPad; } - bool forwardEHResume(CallInst *call, BasicBlock *src); - /// forwardResume - Forward the 'resume' instruction to the caller's landing /// pad block. When the landing pad block has only one predecessor, this is /// a simple branch. When there is more than one predecessor, we need to @@ -314,7 +88,7 @@ namespace { /// destination block for the given basic block, using the values for the /// original invoke's source block. void addIncomingPHIValuesFor(BasicBlock *BB) const { - addIncomingPHIValuesForInto(BB, OuterUnwindDest); + addIncomingPHIValuesForInto(BB, OuterResumeDest); } void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { @@ -327,113 +101,8 @@ namespace { }; } -/// [LIBUNWIND] Get or create a target for the branch out of rewritten calls to -/// llvm.eh.resume. -BasicBlock *InvokeInliningInfo::getInnerUnwindDest() { - if (InnerUnwindDest) return InnerUnwindDest; - - // Find and hoist the llvm.eh.exception and llvm.eh.selector calls - // in the outer landing pad to immediately following the phis. - EHSelectorInst *selector = getOuterSelector(); - if (!selector) return 0; - - // The call to llvm.eh.exception *must* be in the landing pad. - Instruction *exn = cast<Instruction>(selector->getArgOperand(0)); - assert(exn->getParent() == OuterUnwindDest); - - // TODO: recognize when we've already done this, so that we don't - // get a linear number of these when inlining calls into lots of - // invokes with the same landing pad. - - // Do the hoisting. - Instruction *splitPoint = exn->getParent()->getFirstNonPHI(); - assert(splitPoint != selector && "selector-on-exception dominance broken!"); - if (splitPoint == exn) { - selector->removeFromParent(); - selector->insertAfter(exn); - splitPoint = selector->getNextNode(); - } else { - exn->moveBefore(splitPoint); - selector->moveBefore(splitPoint); - } - - // Split the landing pad. - InnerUnwindDest = OuterUnwindDest->splitBasicBlock(splitPoint, - OuterUnwindDest->getName() + ".body"); - - // The number of incoming edges we expect to the inner landing pad. - const unsigned phiCapacity = 2; - - // Create corresponding new phis for all the phis in the outer landing pad. - BasicBlock::iterator insertPoint = InnerUnwindDest->begin(); - BasicBlock::iterator I = OuterUnwindDest->begin(); - for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { - PHINode *outerPhi = cast<PHINode>(I); - PHINode *innerPhi = PHINode::Create(outerPhi->getType(), phiCapacity, - outerPhi->getName() + ".lpad-body", - insertPoint); - outerPhi->replaceAllUsesWith(innerPhi); - innerPhi->addIncoming(outerPhi, OuterUnwindDest); - } - - // Create a phi for the exception value... - InnerExceptionPHI = PHINode::Create(exn->getType(), phiCapacity, - "exn.lpad-body", insertPoint); - exn->replaceAllUsesWith(InnerExceptionPHI); - selector->setArgOperand(0, exn); // restore this use - InnerExceptionPHI->addIncoming(exn, OuterUnwindDest); - - // ...and the selector. - InnerSelectorPHI = PHINode::Create(selector->getType(), phiCapacity, - "selector.lpad-body", insertPoint); - selector->replaceAllUsesWith(InnerSelectorPHI); - InnerSelectorPHI->addIncoming(selector, OuterUnwindDest); - - // All done. - return InnerUnwindDest; -} - -/// [LIBUNWIND] Try to forward the given call, which logically occurs -/// at the end of the given block, as a branch to the inner unwind -/// block. Returns true if the call was forwarded. -bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) { - // First, check whether this is a call to the intrinsic. - Function *fn = dyn_cast<Function>(call->getCalledValue()); - if (!fn || fn->getName() != "llvm.eh.resume") - return false; - - // At this point, we need to return true on all paths, because - // otherwise we'll construct an invoke of the intrinsic, which is - // not well-formed. - - // Try to find or make an inner unwind dest, which will fail if we - // can't find a selector call for the outer unwind dest. - BasicBlock *dest = getInnerUnwindDest(); - bool hasSelector = (dest != 0); - - // If we failed, just use the outer unwind dest, dropping the - // exception and selector on the floor. - if (!hasSelector) - dest = OuterUnwindDest; - - // Make a branch. - BranchInst::Create(dest, src); - - // Update the phis in the destination. They were inserted in an - // order which makes this work. - addIncomingPHIValuesForInto(src, dest); - - if (hasSelector) { - InnerExceptionPHI->addIncoming(call->getArgOperand(0), src); - InnerSelectorPHI->addIncoming(call->getArgOperand(1), src); - } - - return true; -} - -/// Get or create a target for the branch from ResumeInsts. -BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() { - // FIXME: New EH - rename this function when new EH is turned on. +/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts. +BasicBlock *InvokeInliningInfo::getInnerResumeDest() { if (InnerResumeDest) return InnerResumeDest; // Split the landing pad. @@ -472,7 +141,7 @@ BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() { /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. void InvokeInliningInfo::forwardResume(ResumeInst *RI) { - BasicBlock *Dest = getInnerUnwindDestNewEH(); + BasicBlock *Dest = getInnerResumeDest(); BasicBlock *Src = RI->getParent(); BranchInst::Create(Dest, Src); @@ -485,14 +154,6 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) { RI->eraseFromParent(); } -/// [LIBUNWIND] Check whether this selector is "only cleanups": -/// call i32 @llvm.eh.selector(blah, blah, i32 0) -static bool isCleanupOnlySelector(EHSelectorInst *selector) { - if (selector->getNumArgOperands() != 3) return false; - ConstantInt *val = dyn_cast<ConstantInt>(selector->getArgOperand(2)); - return (val && val->isZero()); -} - /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into /// an invoke, we have to turn all of the calls that can throw into /// invokes. This function analyze BB to see if there are any calls, and if so, @@ -507,77 +168,34 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *I = BBI++; - if (LPI) // FIXME: New EH - This won't be NULL in the new EH. - if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) { - unsigned NumClauses = LPI->getNumClauses(); - L->reserveClauses(NumClauses); - for (unsigned i = 0; i != NumClauses; ++i) - L->addClause(LPI->getClause(i)); - } + if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) { + unsigned NumClauses = LPI->getNumClauses(); + L->reserveClauses(NumClauses); + for (unsigned i = 0; i != NumClauses; ++i) + L->addClause(LPI->getClause(i)); + } // We only need to check for function calls: inlined invoke // instructions require no special handling. CallInst *CI = dyn_cast<CallInst>(I); - if (CI == 0) continue; - - // LIBUNWIND: merge selector instructions. - if (EHSelectorInst *Inner = dyn_cast<EHSelectorInst>(CI)) { - EHSelectorInst *Outer = Invoke.getOuterSelector(); - if (!Outer) continue; - - bool innerIsOnlyCleanup = isCleanupOnlySelector(Inner); - bool outerIsOnlyCleanup = isCleanupOnlySelector(Outer); - - // If both selectors contain only cleanups, we don't need to do - // anything. TODO: this is really just a very specific instance - // of a much more general optimization. - if (innerIsOnlyCleanup && outerIsOnlyCleanup) continue; - - // Otherwise, we just append the outer selector to the inner selector. - SmallVector<Value*, 16> NewSelector; - for (unsigned i = 0, e = Inner->getNumArgOperands(); i != e; ++i) - NewSelector.push_back(Inner->getArgOperand(i)); - for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i) - NewSelector.push_back(Outer->getArgOperand(i)); - - CallInst *NewInner = - IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector); - // No need to copy attributes, calling convention, etc. - NewInner->takeName(Inner); - Inner->replaceAllUsesWith(NewInner); - Inner->eraseFromParent(); - continue; - } - + // If this call cannot unwind, don't convert it to an invoke. - if (CI->doesNotThrow()) + if (!CI || CI->doesNotThrow()) continue; - - // Convert this function call into an invoke instruction. - // First, split the basic block. + + // Convert this function call into an invoke instruction. First, split the + // basic block. BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); // Delete the unconditional branch inserted by splitBasicBlock BB->getInstList().pop_back(); - // LIBUNWIND: If this is a call to @llvm.eh.resume, just branch - // directly to the new landing pad. - if (Invoke.forwardEHResume(CI, BB)) { - // TODO: 'Split' is now unreachable; clean it up. - - // We want to leave the original call intact so that the call - // graph and other structures won't get misled. We also have to - // avoid processing the next block, or we'll iterate here forever. - return true; - } - - // Otherwise, create the new invoke instruction. + // Create the new invoke instruction. ImmutableCallSite CS(CI); SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end()); - InvokeInst *II = - InvokeInst::Create(CI->getCalledValue(), Split, - Invoke.getOuterUnwindDest(), - InvokeArgs, CI->getName(), BB); + InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, + Invoke.getOuterResumeDest(), + InvokeArgs, CI->getName(), BB); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); @@ -585,21 +203,20 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // updates the CallGraph if present, because it uses a WeakVH. CI->replaceAllUsesWith(II); - Split->getInstList().pop_front(); // Delete the original call + // Delete the original call + Split->getInstList().pop_front(); - // Update any PHI nodes in the exceptional block to indicate that - // there is now a new entry in them. + // Update any PHI nodes in the exceptional block to indicate that there is + // now a new entry in them. Invoke.addIncomingPHIValuesFor(BB); return false; } return false; } - /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls -/// in the body of the inlined function into invokes and turn unwind -/// instructions into branches to the invoke unwind dest. +/// in the body of the inlined function into invokes. /// /// II is the invoke instruction being inlined. FirstNewBlock is the first /// block of the inlined code (the last block is the end of the function), @@ -614,7 +231,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // start of the inlined code to its end, checking for stuff we need to // rewrite. If the code doesn't have calls or unwinds, we know there is // nothing to rewrite. - if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) { + if (!InlinedCodeInfo.ContainsCalls) { // Now that everything is happy, we have one final detail. The PHI nodes in // the exception destination block still have entries due to the original // invoke instruction. Eliminate these entries (which might even delete the @@ -628,30 +245,13 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ if (InlinedCodeInfo.ContainsCalls) if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) { - // Honor a request to skip the next block. We don't need to - // consider UnwindInsts in this case either. + // Honor a request to skip the next block. ++BB; continue; } - if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - // An UnwindInst requires special handling when it gets inlined into an - // invoke site. Once this happens, we know that the unwind would cause - // a control transfer to the invoke exception destination, so we can - // transform it into a direct branch to the exception destination. - BranchInst::Create(InvokeDest, UI); - - // Delete the unwind instruction! - UI->eraseFromParent(); - - // Update any PHI nodes in the exceptional block to indicate that - // there is now a new entry in them. - Invoke.addIncomingPHIValuesFor(BB); - } - - if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { + if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) Invoke.forwardResume(RI); - } } // Now that everything is happy, we have one final detail. The PHI nodes in @@ -836,8 +436,8 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { return false; } -/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively -/// update InlinedAtEntry of a DebugLoc. +/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to +/// recursively update InlinedAtEntry of a DebugLoc. static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, const DebugLoc &InlinedAtDL, LLVMContext &Ctx) { @@ -847,16 +447,15 @@ static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), NewInlinedAtDL.getAsMDNode(Ctx)); } - + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), InlinedAtDL.getAsMDNode(Ctx)); } - /// fixupLineNumbers - Update inlined instructions' line numbers to /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, - Instruction *TheCall) { + Instruction *TheCall) { DebugLoc TheCallDL = TheCall->getDebugLoc(); if (TheCallDL.isUnknown()) return; @@ -878,18 +477,18 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, } } -// InlineFunction - This function inlines the called function into the basic -// block of the caller. This returns false if it is not possible to inline this -// call. The program is still in a well defined state if this occurs though. -// -// Note that this only does one level of inlining. For example, if the -// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now -// exists in the instruction stream. Similarly this will inline a recursive -// function by one level. -// -bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { +/// InlineFunction - This function inlines the called function into the basic +/// block of the caller. This returns false if it is not possible to inline +/// this call. The program is still in a well defined state if this occurs +/// though. +/// +/// Note that this only does one level of inlining. For example, if the +/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now +/// exists in the instruction stream. Similarly this will inline a recursive +/// function by one level. +bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, + bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); - LLVMContext &Context = TheCall->getContext(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); @@ -924,43 +523,40 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { return false; } - // Find the personality function used by the landing pads of the caller. If it - // exists, then check to see that it matches the personality function used in - // the callee. - for (Function::const_iterator - I = Caller->begin(), E = Caller->end(); I != E; ++I) + // Get the personality function from the callee if it contains a landing pad. + Value *CalleePersonality = 0; + for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); + I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); - // FIXME: This 'isa' here should become go away once the new EH system is - // in place. - if (!isa<LandingPadInst>(BB->getFirstNonPHI())) - continue; - const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI()); - const Value *CallerPersFn = LP->getPersonalityFn(); - - // If the personality functions match, then we can perform the - // inlining. Otherwise, we can't inline. - // TODO: This isn't 100% true. Some personality functions are proper - // supersets of others and can be used in place of the other. - for (Function::const_iterator - I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) - if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { - const BasicBlock *BB = II->getUnwindDest(); - // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once - // the new EH system is in place. - if (const LandingPadInst *LP = - dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) - if (CallerPersFn != LP->getPersonalityFn()) - return false; - break; - } - + const LandingPadInst *LP = BB->getLandingPadInst(); + CalleePersonality = LP->getPersonalityFn(); break; } + // Find the personality function used by the landing pads of the caller. If it + // exists, then check to see that it matches the personality function used in + // the callee. + if (CalleePersonality) { + for (Function::const_iterator I = Caller->begin(), E = Caller->end(); + I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + const LandingPadInst *LP = BB->getLandingPadInst(); + + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + if (LP->getPersonalityFn() != CalleePersonality) + return false; + + break; + } + } + // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. - // Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned @@ -987,7 +583,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. - if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) { + if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); @@ -1023,7 +619,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. - // { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), @@ -1063,7 +658,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. - if (!IFI.StaticAllocas.empty()) { + if (InsertLifetime && !IFI.StaticAllocas.empty()) { IRBuilder<> builder(FirstNewBlock->begin()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; @@ -1098,20 +693,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); } - - // Count the number of StackRestore calls we insert. - unsigned NumStackRestores = Returns.size(); - - // If we are inlining an invoke instruction, insert restores before each - // unwind. These unwinds will be rewritten into branches later. - if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) { - for (Function::iterator BB = FirstNewBlock, E = Caller->end(); - BB != E; ++BB) - if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr); - ++NumStackRestores; - } - } } // If we are inlining tail call instruction through a call site that isn't @@ -1131,21 +712,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { } } - // If we are inlining through a 'nounwind' call site then any inlined 'unwind' - // instructions are unreachable. - if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind) - for (Function::iterator BB = FirstNewBlock, E = Caller->end(); - BB != E; ++BB) { - TerminatorInst *Term = BB->getTerminator(); - if (isa<UnwindInst>(Term)) { - new UnreachableInst(Context, Term); - BB->getInstList().erase(Term); - } - } - // If we are inlining for an invoke instruction, we must make sure to rewrite - // any inlined 'unwind' instructions into branches to the invoke exception - // destination, and call instructions into invoke instructions. + // any call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); @@ -1308,11 +876,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. - if (PHI) + if (PHI) { if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } + } return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 7034feb..d1c4d59 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" @@ -105,33 +106,32 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { // If we are switching on a constant, we can convert the switch into a // single branch instruction! ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition()); - BasicBlock *TheOnlyDest = SI->getSuccessor(0); // The default dest + BasicBlock *TheOnlyDest = SI->getDefaultDest(); BasicBlock *DefaultDest = TheOnlyDest; - assert(TheOnlyDest == SI->getDefaultDest() && - "Default destination is not successor #0?"); // Figure out which case it goes to. - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { // Found case matching a constant operand? - if (SI->getSuccessorValue(i) == CI) { - TheOnlyDest = SI->getSuccessor(i); + if (i.getCaseValue() == CI) { + TheOnlyDest = i.getCaseSuccessor(); break; } // Check to see if this branch is going to the same place as the default // dest. If so, eliminate it as an explicit compare. - if (SI->getSuccessor(i) == DefaultDest) { + if (i.getCaseSuccessor() == DefaultDest) { // Remove this entry. DefaultDest->removePredecessor(SI->getParent()); SI->removeCase(i); - --i; --e; // Don't skip an entry... + --i; --e; continue; } // Otherwise, check to see if the switch only branches to one destination. // We do this by reseting "TheOnlyDest" to null when we find two non-equal // destinations. - if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0; + if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0; } if (CI && !TheOnlyDest) { @@ -165,14 +165,16 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { return true; } - if (SI->getNumSuccessors() == 2) { + if (SI->getNumCases() == 1) { // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. + SwitchInst::CaseIt FirstCase = SI->case_begin(); Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), - SI->getSuccessorValue(1), "cond"); + FirstCase.getCaseValue(), "cond"); // Insert the new branch. - Builder.CreateCondBr(Cond, SI->getSuccessor(1), SI->getSuccessor(0)); + Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(), + SI->getDefaultDest()); // Delete the old switch. SI->eraseFromParent(); @@ -257,6 +259,13 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { II->getIntrinsicID() == Intrinsic::lifetime_end) return isa<UndefValue>(II->getArgOperand(1)); } + + if (extractMallocCall(I)) return true; + + if (CallInst *CI = isFreeCall(I)) + if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) + return C->isNullValue() || isa<UndefValue>(C); + return false; } @@ -346,22 +355,27 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { /// instructions in other blocks as well in this block. bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { bool MadeChange = false; - for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { + +#ifndef NDEBUG + // In debug builds, ensure that the terminator of the block is never replaced + // or deleted by these simplifications. The idea of simplification is that it + // cannot introduce new instructions, and there is no way to replace the + // terminator of a block without introducing a new instruction. + AssertingVH<Instruction> TerminatorVH(--BB->end()); +#endif + + for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) { + assert(!BI->isTerminator()); Instruction *Inst = BI++; - - if (Value *V = SimplifyInstruction(Inst, TD)) { - WeakVH BIHandle(BI); - ReplaceAndSimplifyAllUses(Inst, V, TD); + + WeakVH BIHandle(BI); + if (recursivelySimplifyInstruction(Inst, TD)) { MadeChange = true; if (BIHandle != BI) BI = BB->begin(); continue; } - if (Inst->isTerminator()) - break; - - WeakVH BIHandle(BI); MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); if (BIHandle != BI) BI = BB->begin(); @@ -399,17 +413,11 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, WeakVH PhiIt = &BB->front(); while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); + Value *OldPhiIt = PhiIt; - Value *PNV = SimplifyInstruction(PN, TD); - if (PNV == 0) continue; + if (!recursivelySimplifyInstruction(PN, TD)) + continue; - // If we're able to simplify the phi to a single value, substitute the new - // value into all of its uses. - assert(PNV != PN && "SimplifyInstruction broken!"); - - Value *OldPhiIt = PhiIt; - ReplaceAndSimplifyAllUses(PN, PNV, TD); - // If recursive simplification ended up deleting the next PHI node we would // iterate to, then our iterator is invalid, restart scanning from the top // of the block. @@ -486,22 +494,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { if (Succ->getSinglePredecessor()) return true; // Make a list of the predecessors of BB - typedef SmallPtrSet<BasicBlock*, 16> BlockSet; - BlockSet BBPreds(pred_begin(BB), pred_end(BB)); - - // Use that list to make another list of common predecessors of BB and Succ - BlockSet CommonPreds; - for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ); - PI != PE; ++PI) { - BasicBlock *P = *PI; - if (BBPreds.count(P)) - CommonPreds.insert(P); - } + SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); - // Shortcut, if there are no common predecessors, merging is always safe - if (CommonPreds.empty()) - return true; - // Look at all the phi nodes in Succ, to see if they present a conflict when // merging these blocks for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { @@ -512,28 +506,28 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // merge the phi nodes and then the blocks can still be merged PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB)); if (BBPN && BBPN->getParent() == BB) { - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) { - if (BBPN->getIncomingValueForBlock(*PI) - != PN->getIncomingValueForBlock(*PI)) { + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && + BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) { DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with " << BBPN->getName() << " with regard to common predecessor " - << (*PI)->getName() << "\n"); + << IBB->getName() << "\n"); return false; } } } else { Value* Val = PN->getIncomingValueForBlock(BB); - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) { + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { // See if the incoming value for the common predecessor is equal to the // one for BB, in which case this phi node will not prevent the merging // of the block. - if (Val != PN->getIncomingValueForBlock(*PI)) { + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) { DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with regard to common " - << "predecessor " << (*PI)->getName() << "\n"); + << "predecessor " << IBB->getName() << "\n"); return false; } } @@ -740,6 +734,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, // If there is a large requested alignment and we can, bump up the alignment // of the global. if (GV->isDeclaration()) return Align; + // If the memory we set aside for the global may not be the memory used by + // the final program then it is impossible for us to reliably enforce the + // preferred alignment. + if (GV->isWeakForLinker()) return Align; if (GV->getAlignment() >= PrefAlign) return GV->getAlignment(); @@ -764,9 +762,8 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD); + ComputeMaskedBits(V, KnownZero, KnownOne, TD); unsigned TrailZ = KnownZero.countTrailingOnes(); // Avoid trouble with rediculously large TrailZ values, such as diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index cbd54a8..0bc185d 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -99,7 +99,8 @@ namespace { bool ProcessLoop(Loop *L, LPPassManager &LPM); BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); BasicBlock *InsertPreheaderForLoop(Loop *L); - Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM); + Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM, + BasicBlock *Preheader); BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); void PlaceSplitBlockCarefully(BasicBlock *NewBB, SmallVectorImpl<BasicBlock*> &SplitPreds, @@ -240,7 +241,7 @@ ReprocessLoop: // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (L->getNumBackEdges() < 8) { - if (SeparateNestedLoop(L, LPM)) { + if (SeparateNestedLoop(L, LPM, Preheader)) { ++NumNested; // This is a big restructuring change, reprocess the whole loop. Changed = true; @@ -265,7 +266,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); @@ -379,19 +380,27 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { } // Split out the loop pre-header. - BasicBlock *NewBB = - SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), - ".preheader", this); + BasicBlock *PreheaderBB; + if (!Header->isLandingPad()) { + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", + this); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", + ".split-lp", this, NewBBs); + PreheaderBB = NewBBs[0]; + } - NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); - DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName() - << "\n"); + PreheaderBB->getTerminator()->setDebugLoc( + Header->getFirstNonPHI()->getDebugLoc()); + DEBUG(dbgs() << "LoopSimplify: Creating pre-header " + << PreheaderBB->getName() << "\n"); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. - PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L); + PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); - return NewBB; + return PreheaderBB; } /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit @@ -420,9 +429,7 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { this, NewBBs); NewExitBB = NewBBs[0]; } else { - NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], - LoopBlocks.size(), ".loopexit", - this); + NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this); } DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " @@ -456,7 +463,7 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -529,7 +536,16 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, /// If we are able to separate out a loop, return the new outer loop that was /// created. /// -Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { +Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, + BasicBlock *Preheader) { + // Don't try to separate loops without a preheader. + if (!Preheader) + return 0; + + // The header is not a landing pad; preheader insertion should ensure this. + assert(!L->getHeader()->isLandingPad() && + "Can't insert backedge to landing pad"); + PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI); if (PN == 0) return 0; // No known way to partition. @@ -537,16 +553,15 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { // handles the case when a PHI node has multiple instances of itself as // arguments. SmallVector<BasicBlock*, 8> OuterLoopPreds; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (PN->getIncomingValue(i) != PN || !L->contains(PN->getIncomingBlock(i))) { // We can't split indirectbr edges. if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) return 0; - OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } - + } DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); // If ScalarEvolution is around and knows anything about values in @@ -556,9 +571,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { SE->forgetLoop(L); BasicBlock *Header = L->getHeader(); - BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0], - OuterLoopPreds.size(), - ".outer", this); + BasicBlock *NewBB = + SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -640,6 +654,9 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { if (!Preheader) return 0; + // The header is not a landing pad; preheader insertion should ensure this. + assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); + // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 62e4fa2..e15497a 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -135,7 +135,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, - unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) { + bool AllowRuntime, unsigned TripMultiple, + LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -148,6 +149,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, return false; } + // Loops with indirectbr cannot be cloned. + if (!L->isSafeToClone()) { + DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); + return false; + } + BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); @@ -165,12 +172,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, return false; } - // Notify ScalarEvolution that the loop will be substantially changed, - // if not outright eliminated. - ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); - if (SE) - SE->forgetLoop(L); - if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) @@ -181,6 +182,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (TripCount != 0 && Count > TripCount) Count = TripCount; + // Don't enter the unroll code if there is nothing to do. This way we don't + // need to support "partial unrolling by 1". + if (TripCount == 0 && Count < 2) + return false; + assert(Count > 0); assert(TripMultiple > 0); assert(TripCount == 0 || TripCount % TripMultiple == 0); @@ -188,6 +194,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; + // We assume a run-time trip count if the compiler cannot + // figure out the loop trip count and the unroll-runtime + // flag is specified. + bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); + + if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) + return false; + + // Notify ScalarEvolution that the loop will be substantially changed, + // if not outright eliminated. + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE) + SE->forgetLoop(L); + // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { @@ -209,6 +229,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + } else if (RuntimeTripCount) { + DEBUG(dbgs() << " with run-time trip count"); } DEBUG(dbgs() << "!\n"); } @@ -332,6 +354,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, BasicBlock *Dest = Headers[j]; bool NeedConditional = true; + if (RuntimeTripCount && j != 0) { + NeedConditional = false; + } + // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp new file mode 100644 index 0000000..3aa6bef --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -0,0 +1,372 @@ +//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities for loops with run-time +// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time +// trip counts. +// +// The functions in this file are used to generate extra code when the +// run-time trip count modulo the unroll factor is not 0. When this is the +// case, we need to generate code to execute these 'left over' iterations. +// +// The current strategy generates an if-then-else sequence prior to the +// unrolled loop to execute the 'left over' iterations. Other strategies +// include generate a loop before or after the unrolled loop. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unroll" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/BasicBlock.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <algorithm> + +using namespace llvm; + +STATISTIC(NumRuntimeUnrolled, + "Number of loops unrolled with run-time trip counts"); + +/// Connect the unrolling prolog code to the original loop. +/// The unrolling prolog code contains code to execute the +/// 'extra' iterations if the run-time trip count modulo the +/// unroll count is non-zero. +/// +/// This function performs the following: +/// - Create PHI nodes at prolog end block to combine values +/// that exit the prolog code and jump around the prolog. +/// - Add a PHI operand to a PHI node at the loop exit block +/// for values that exit the prolog and go around the loop. +/// - Branch around the original loop if the trip count is less +/// than the unroll factor. +/// +static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, + BasicBlock *LastPrologBB, BasicBlock *PrologEnd, + BasicBlock *OrigPH, BasicBlock *NewPH, + ValueToValueMapTy &LVMap, Pass *P) { + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch != 0 && "Loop must have a latch"); + + // Create a PHI node for each outgoing value from the original loop + // (which means it is an outgoing value from the prolog code too). + // The new PHI node is inserted in the prolog end basic block. + // The new PHI name is added as an operand of a PHI node in either + // the loop header or the loop exit block. + for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch); + SBI != SBE; ++SBI) { + for (BasicBlock::iterator BBI = (*SBI)->begin(); + PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { + + // Add a new PHI node to the prolog end block and add the + // appropriate incoming values. + PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr", + PrologEnd->getTerminator()); + // Adding a value to the new PHI node from the original loop preheader. + // This is the value that skips all the prolog code. + if (L->contains(PN)) { + NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH); + } else { + NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH); + } + + Value *V = PN->getIncomingValueForBlock(Latch); + if (Instruction *I = dyn_cast<Instruction>(V)) { + if (L->contains(I)) { + V = LVMap[I]; + } + } + // Adding a value to the new PHI node from the last prolog block + // that was created. + NewPN->addIncoming(V, LastPrologBB); + + // Update the existing PHI node operand with the value from the + // new PHI node. How this is done depends on if the existing + // PHI node is in the original loop block, or the exit block. + if (L->contains(PN)) { + PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN); + } else { + PN->addIncoming(NewPN, PrologEnd); + } + } + } + + // Create a branch around the orignal loop, which is taken if the + // trip count is less than the unroll factor. + Instruction *InsertPt = PrologEnd->getTerminator(); + Instruction *BrLoopExit = + new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, + ConstantInt::get(TripCount->getType(), Count)); + BasicBlock *Exit = L->getUniqueExitBlock(); + assert(Exit != 0 && "Loop must have a single exit block only"); + // Split the exit to maintain loop canonicalization guarantees + SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); + if (!Exit->isLandingPad()) { + SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa", + P, NewBBs); + } + // Add the branch to the exit block (around the unrolled loop) + BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt); + InsertPt->eraseFromParent(); +} + +/// Create a clone of the blocks in a loop and connect them together. +/// This function doesn't create a clone of the loop structure. +/// +/// There are two value maps that are defined and used. VMap is +/// for the values in the current loop instance. LVMap contains +/// the values from the last loop instance. We need the LVMap values +/// to update the inital values for the current loop instance. +/// +static void CloneLoopBlocks(Loop *L, + bool FirstCopy, + BasicBlock *InsertTop, + BasicBlock *InsertBot, + std::vector<BasicBlock *> &NewBlocks, + LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, + ValueToValueMapTy &LVMap, + LoopInfo *LI) { + + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + Function *F = Header->getParent(); + LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); + // For each block in the original loop, create a new copy, + // and update the value map with the newly created values. + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F); + NewBlocks.push_back(NewBB); + + if (Loop *ParentLoop = L->getParentLoop()) + ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + + VMap[*BB] = NewBB; + if (Header == *BB) { + // For the first block, add a CFG connection to this newly + // created block + InsertTop->getTerminator()->setSuccessor(0, NewBB); + + // Change the incoming values to the ones defined in the + // previously cloned loop. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *NewPHI = cast<PHINode>(VMap[I]); + if (FirstCopy) { + // We replace the first phi node with the value from the preheader + VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); + NewBB->getInstList().erase(NewPHI); + } else { + // Update VMap with values from the previous block + unsigned idx = NewPHI->getBasicBlockIndex(Latch); + Value *InVal = NewPHI->getIncomingValue(idx); + if (Instruction *I = dyn_cast<Instruction>(InVal)) + if (L->contains(I)) + InVal = LVMap[InVal]; + NewPHI->setIncomingValue(idx, InVal); + NewPHI->setIncomingBlock(idx, InsertTop); + } + } + } + + if (Latch == *BB) { + VMap.erase((*BB)->getTerminator()); + NewBB->getTerminator()->eraseFromParent(); + BranchInst::Create(InsertBot, NewBB); + } + } + // LastValueMap is updated with the values for the current loop + // which are used the next time this function is called. + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) { + LVMap[VI->first] = VI->second; + } +} + +/// Insert code in the prolog code when unrolling a loop with a +/// run-time trip-count. +/// +/// This method assumes that the loop unroll factor is total number +/// of loop bodes in the loop after unrolling. (Some folks refer +/// to the unroll factor as the number of *extra* copies added). +/// We assume also that the loop unroll factor is a power-of-two. So, after +/// unrolling the loop, the number of loop bodies executed is 2, +/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch +/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for +/// the switch instruction is generated. +/// +/// extraiters = tripcount % loopfactor +/// if (extraiters == 0) jump Loop: +/// if (extraiters == loopfactor) jump L1 +/// if (extraiters == loopfactor-1) jump L2 +/// ... +/// L1: LoopBody; +/// L2: LoopBody; +/// ... +/// if tripcount < loopfactor jump End +/// Loop: +/// ... +/// End: +/// +bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, + LPPassManager *LPM) { + // for now, only unroll loops that contain a single exit + if (!L->getExitingBlock()) + return false; + + // Make sure the loop is in canonical form, and there is a single + // exit block only. + if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0) + return false; + + // Use Scalar Evolution to compute the trip count. This allows more + // loops to be unrolled than relying on induction var simplification + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE == 0) + return false; + + // Only unroll loops with a computable trip count and the trip count needs + // to be an int value (allowing a pointer type is a TODO item) + const SCEV *BECount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) + return false; + + // Add 1 since the backedge count doesn't include the first loop iteration + const SCEV *TripCountSC = + SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); + if (isa<SCEVCouldNotCompute>(TripCountSC)) + return false; + + // We only handle cases when the unroll factor is a power of 2. + // Count is the loop unroll factor, the number of extra copies added + 1. + if ((Count & (Count-1)) != 0) + return false; + + // If this loop is nested, then the loop unroller changes the code in + // parent loop, so the Scalar Evolution pass needs to be run again + if (Loop *ParentLoop = L->getParentLoop()) + SE->forgetLoop(ParentLoop); + + BasicBlock *PH = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + // It helps to splits the original preheader twice, one for the end of the + // prolog code and one for a new loop preheader + BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass()); + BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass()); + BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); + + // Compute the number of extra iterations required, which is: + // extra iterations = run-time trip count % (loop unroll factor + 1) + SCEVExpander Expander(*SE, "loop-unroll"); + Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), + PreHeaderBR); + Type *CountTy = TripCount->getType(); + BinaryOperator *ModVal = + BinaryOperator::CreateURem(TripCount, + ConstantInt::get(CountTy, Count), + "xtraiter"); + ModVal->insertBefore(PreHeaderBR); + + // Check if for no extra iterations, then jump to unrolled loop + Value *BranchVal = new ICmpInst(PreHeaderBR, + ICmpInst::ICMP_NE, ModVal, + ConstantInt::get(CountTy, 0), "lcmp"); + // Branch to either the extra iterations or the unrolled loop + // We will fix up the true branch label when adding loop body copies + BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); + assert(PreHeaderBR->isUnconditional() && + PreHeaderBR->getSuccessor(0) == PEnd && + "CFG edges in Preheader are not correct"); + PreHeaderBR->eraseFromParent(); + + ValueToValueMapTy LVMap; + Function *F = Header->getParent(); + // These variables are used to update the CFG links in each iteration + BasicBlock *CompareBB = 0; + BasicBlock *LastLoopBB = PH; + // Get an ordered list of blocks in the loop to help with the ordering of the + // cloned blocks in the prolog code + LoopBlocksDFS LoopBlocks(L); + LoopBlocks.perform(LI); + + // + // For each extra loop iteration, create a copy of the loop's basic blocks + // and generate a condition that branches to the copy depending on the + // number of 'left over' iterations. + // + for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) { + std::vector<BasicBlock*> NewBlocks; + ValueToValueMapTy VMap; + + // Clone all the basic blocks in the loop, but we don't clone the loop + // This function adds the appropriate CFG connections. + CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks, + LoopBlocks, VMap, LVMap, LI); + LastLoopBB = cast<BasicBlock>(VMap[Latch]); + + // Insert the cloned blocks into function just before the original loop + F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), + NewBlocks[0], F->end()); + + // Generate the code for the comparison which determines if the loop + // prolog code needs to be executed. + if (leftOverIters == Count-1) { + // There is no compare block for the fall-thru case when for the last + // left over iteration + CompareBB = NewBlocks[0]; + } else { + // Create a new block for the comparison + BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp", + F, CompareBB); + if (Loop *ParentLoop = L->getParentLoop()) { + // Add the new block to the parent loop, if needed + ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } + + // The comparison w/ the extra iteration value and branch + Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, + ConstantInt::get(CountTy, leftOverIters), + "un.tmp"); + // Branch to either the extra iterations or the unrolled loop + BranchInst::Create(NewBlocks[0], CompareBB, + BranchVal, NewBB); + CompareBB = NewBB; + PH->getTerminator()->setSuccessor(0, NewBB); + VMap[NewPH] = CompareBB; + } + + // Rewrite the cloned instruction operands to use the values + // created when the clone is created. + for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); I != E; ++I) { + RemapInstruction(I, VMap, + RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); + } + } + } + + // Connect the prolog code to the original loop and update the + // PHI functions. + ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap, + LPM->getAsPass()); + NumRuntimeUnrolled++; + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index 61ab3f6..c70ced1 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -1,3 +1,16 @@ +//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers the 'expect' intrinsic to LLVM metadata. +// +//===----------------------------------------------------------------------===// + #define DEBUG_TYPE "lower-expect-intrinsic" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -60,14 +73,17 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { LLVMContext &Context = CI->getContext(); Type *Int32Ty = Type::getInt32Ty(Context); - unsigned caseNo = SI->findCaseValue(ExpectedValue); + SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue); std::vector<Value *> Vec; unsigned n = SI->getNumCases(); - Vec.resize(n + 1); // +1 for MDString + Vec.resize(n + 1 + 1); // +1 for MDString and +1 for default case Vec[0] = MDString::get(Context, "branch_weights"); + Vec[1] = ConstantInt::get(Int32Ty, Case == SI->case_default() ? + LikelyBranchWeight : UnlikelyBranchWeight); for (unsigned i = 0; i < n; ++i) { - Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight); + Vec[i + 1 + 1] = ConstantInt::get(Int32Ty, i == Case.getCaseIndex() ? + LikelyBranchWeight : UnlikelyBranchWeight); } MDNode *WeightsNode = llvm::MDNode::get(Context, Vec); diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp index c96c8fc..9305554 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -54,7 +54,6 @@ using namespace llvm; STATISTIC(NumInvokes, "Number of invokes replaced"); -STATISTIC(NumUnwinds, "Number of unwinds replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support", @@ -193,20 +192,6 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { BB->getInstList().erase(II); ++NumInvokes; Changed = true; - } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - // Insert a call to abort() - CallInst::Create(AbortFn, "", UI)->setTailCall(); - - // Insert a return instruction. This really should be a "barrier", as it - // is unreachable. - ReturnInst::Create(F.getContext(), - F.getReturnType()->isVoidTy() ? - 0 : Constant::getNullValue(F.getReturnType()), UI); - - // Remove the unwind instruction now. - BB->getInstList().erase(UI); - - ++NumUnwinds; Changed = true; } return Changed; } @@ -404,7 +389,6 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { bool LowerInvoke::insertExpensiveEHSupport(Function &F) { SmallVector<ReturnInst*,16> Returns; - SmallVector<UnwindInst*,16> Unwinds; SmallVector<InvokeInst*,16> Invokes; UnreachableInst* UnreachablePlaceholder = 0; @@ -415,14 +399,11 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { Returns.push_back(RI); } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { Invokes.push_back(II); - } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - Unwinds.push_back(UI); } - if (Unwinds.empty() && Invokes.empty()) return false; + if (Invokes.empty()) return false; NumInvokes += Invokes.size(); - NumUnwinds += Unwinds.size(); // TODO: This is not an optimal way to do this. In particular, this always // inserts setjmp calls into the entries of functions with invoke instructions @@ -572,13 +553,6 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { CallInst::Create(AbortFn, "", TermBlock->getTerminator())->setTailCall(); - - // Replace all unwinds with a branch to the unwind handler. - for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { - BranchInst::Create(UnwindHandler, Unwinds[i]); - Unwinds[i]->eraseFromParent(); - } - // Replace the inserted unreachable with a branch to the unwind handler. if (UnreachablePlaceholder) { BranchInst::Create(UnwindHandler, UnreachablePlaceholder); diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index 686178c..a16130d 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -237,10 +237,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { unsigned numCmps = 0; // Start with "simple" cases - for (unsigned i = 1; i < SI->getNumSuccessors(); ++i) - Cases.push_back(CaseRange(SI->getSuccessorValue(i), - SI->getSuccessorValue(i), - SI->getSuccessor(i))); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) + Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(), + i.getCaseSuccessor())); + std::sort(Cases.begin(), Cases.end(), CaseCmp()); // Merge case into clusters @@ -281,7 +281,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { BasicBlock* Default = SI->getDefaultDest(); // If there is only the default destination, don't bother with the code below. - if (SI->getNumCases() == 1) { + if (!SI->getNumCases()) { BranchInst::Create(SI->getDefaultDest(), CurBlock); CurBlock->getInstList().erase(SI); return; diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp new file mode 100644 index 0000000..8491c55 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -0,0 +1,64 @@ +//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on Modules. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Support/IRBuilder.h" + +using namespace llvm; + +static void appendToGlobalArray(const char *Array, + Module &M, Function *F, int Priority) { + IRBuilder<> IRB(M.getContext()); + FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); + StructType *Ty = StructType::get( + IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL); + + Constant *RuntimeCtorInit = ConstantStruct::get( + Ty, IRB.getInt32(Priority), F, NULL); + + // Get the current set of static global constructors and add the new ctor + // to the list. + SmallVector<Constant *, 16> CurrentCtors; + if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) { + if (Constant *Init = GVCtor->getInitializer()) { + unsigned n = Init->getNumOperands(); + CurrentCtors.reserve(n + 1); + for (unsigned i = 0; i != n; ++i) + CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); + } + GVCtor->eraseFromParent(); + } + + CurrentCtors.push_back(RuntimeCtorInit); + + // Create a new initializer. + ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(), + CurrentCtors.size()); + Constant *NewInit = ConstantArray::get(AT, CurrentCtors); + + // Create the new global variable and replace all uses of + // the old global variable with the new one. + (void)new GlobalVariable(M, NewInit->getType(), false, + GlobalValue::AppendingLinkage, NewInit, Array); +} + +void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { + appendToGlobalArray("llvm.global_ctors", M, F, Priority); +} + +void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) { + appendToGlobalArray("llvm.global_dtors", M, F, Priority); +} diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index db3e942..2357d81 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -41,6 +41,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -66,7 +67,8 @@ struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); } static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { - return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; + using llvm::hash_value; + return static_cast<unsigned>(hash_value(Val)); } static bool isEqual(const EltTy &LHS, const EltTy &RHS) { return LHS == RHS; @@ -423,7 +425,8 @@ void PromoteMem2Reg::run() { // Finally, after the scan, check to see if the store is all that is left. if (Info.UsingBlocks.empty()) { - // Record debuginfo for the store and remove the declaration's debuginfo. + // Record debuginfo for the store and remove the declaration's + // debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { if (!DIB) DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); @@ -590,7 +593,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, 0, &DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index fa8061c..e60a41b 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -518,3 +518,10 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { User->eraseFromParent(); } } + +bool +LoadAndStorePromoter::isInstInList(Instruction *I, + const SmallVectorImpl<Instruction*> &Insts) + const { + return std::find(Insts.begin(), Insts.end(), I) != Insts.end(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b8c3ab4..66dd2c9 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -14,16 +14,20 @@ #define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" +#include "llvm/Operator.h" #include "llvm/Type.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -63,9 +67,8 @@ class SimplifyCFGOpt { bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI, IRBuilder<> &Builder); - bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); - bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder); + bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); bool SimplifyUnreachable(UnreachableInst *UI); bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); bool SimplifyIndirectBr(IndirectBrInst *IBI); @@ -205,6 +208,42 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, return BI->getCondition(); } +/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the +/// given instruction, which is assumed to be safe to speculate. 1 means +/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. +static unsigned ComputeSpeculationCost(const User *I) { + assert(isSafeToSpeculativelyExecute(I) && + "Instruction is not safe to speculatively execute!"); + switch (Operator::getOpcode(I)) { + default: + // In doubt, be conservative. + return UINT_MAX; + case Instruction::GetElementPtr: + // GEPs are cheap if all indices are constant. + if (!cast<GEPOperator>(I)->hasAllConstantIndices()) + return UINT_MAX; + return 1; + case Instruction::Load: + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::ICmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + return 1; // These are all cheap. + + case Instruction::Call: + case Instruction::Select: + return 2; + } +} + /// DominatesMergePoint - If we have a merge point of an "if condition" as /// accepted above, return true if the specified value dominates the block. We /// don't handle the true generality of domination here, just a special case @@ -257,46 +296,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. - if (!I->isSafeToSpeculativelyExecute()) + if (!isSafeToSpeculativelyExecute(I)) return false; - unsigned Cost = 0; - - switch (I->getOpcode()) { - default: return false; // Cannot hoist this out safely. - case Instruction::Load: - // We have to check to make sure there are no instructions before the - // load in its basic block, as we are going to hoist the load out to its - // predecessor. - if (PBB->getFirstNonPHIOrDbg() != I) - return false; - Cost = 1; - break; - case Instruction::GetElementPtr: - // GEPs are cheap if all indices are constant. - if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices()) - return false; - Cost = 1; - break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::ICmp: - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - Cost = 1; - break; // These are all cheap and non-trapping instructions. - - case Instruction::Select: - Cost = 2; - break; - } + unsigned Cost = ComputeSpeculationCost(I); if (Cost > CostRemaining) return false; @@ -373,9 +376,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, Span = Span.inverse(); // If there are a ton of values, we don't want to make a ginormous switch. - if (Span.getSetSize().ugt(8) || Span.isEmptySet() || - // We don't handle wrapped sets yet. - Span.isWrappedSet()) + if (Span.getSetSize().ugt(8) || Span.isEmptySet()) return 0; for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) @@ -430,9 +431,9 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, return 0; } - + static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { - Instruction* Cond = 0; + Instruction *Cond = 0; if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Cond = dyn_cast<Instruction>(SI->getCondition()); } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { @@ -479,8 +480,9 @@ GetValueEqualityComparisonCases(TerminatorInst *TI, BasicBlock*> > &Cases) { if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Cases.reserve(SI->getNumCases()); - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i))); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) + Cases.push_back(std::make_pair(i.getCaseValue(), + i.getCaseSuccessor())); return SI->getDefaultDest(); } @@ -603,11 +605,13 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); - for (unsigned i = SI->getNumCases()-1; i != 0; --i) - if (DeadCases.count(SI->getCaseValue(i))) { - SI->getSuccessor(i)->removePredecessor(TI->getParent()); + for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { + --i; + if (DeadCases.count(i.getCaseValue())) { + i.getCaseSuccessor()->removePredecessor(TI->getParent()); SI->removeCase(i); } + } DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; @@ -951,6 +955,20 @@ HoistTerminator: /// and an BB2 and the only successor of BB1 is BB2, hoist simple code /// (for now, restricted to a single instruction that's side effect free) from /// the BB1 into the branch block to speculatively execute it. +/// +/// Turn +/// BB: +/// %t1 = icmp +/// br i1 %t1, label %BB1, label %BB2 +/// BB1: +/// %t3 = add %t2, c +/// br label BB2 +/// BB2: +/// => +/// BB: +/// %t1 = icmp +/// %t4 = add %t2, c +/// %t3 = select i1 %t1, %t2, %t3 static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // Only speculatively execution a single instruction (not counting the // terminator) for now. @@ -967,8 +985,29 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { return false; HInst = I; } - if (!HInst) - return false; + + BasicBlock *BIParent = BI->getParent(); + + // Check the instruction to be hoisted, if there is one. + if (HInst) { + // Don't hoist the instruction if it's unsafe or expensive. + if (!isSafeToSpeculativelyExecute(HInst)) + return false; + if (ComputeSpeculationCost(HInst) > PHINodeFoldingThreshold) + return false; + + // Do not hoist the instruction if any of its operands are defined but not + // used in this BB. The transformation will prevent the operand from + // being sunk into the use block. + for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); + i != e; ++i) { + Instruction *OpI = dyn_cast<Instruction>(*i); + if (OpI && OpI->getParent() == BIParent && + !OpI->mayHaveSideEffects() && + !OpI->isUsedInBasicBlock(BIParent)) + return false; + } + } // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); @@ -983,130 +1022,78 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { Invert = true; } - // Turn - // BB: - // %t1 = icmp - // br i1 %t1, label %BB1, label %BB2 - // BB1: - // %t3 = add %t2, c - // br label BB2 - // BB2: - // => - // BB: - // %t1 = icmp - // %t4 = add %t2, c - // %t3 = select i1 %t1, %t2, %t3 - switch (HInst->getOpcode()) { - default: return false; // Not safe / profitable to hoist. - case Instruction::Add: - case Instruction::Sub: - // Not worth doing for vector ops. - if (HInst->getType()->isVectorTy()) - return false; - break; - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - // Don't mess with vector operations. - if (HInst->getType()->isVectorTy()) - return false; - break; // These are all cheap and non-trapping instructions. - } - - // If the instruction is obviously dead, don't try to predicate it. - if (HInst->use_empty()) { - HInst->eraseFromParent(); - return true; + // Collect interesting PHIs, and scan for hazards. + SmallSetVector<std::pair<Value *, Value *>, 4> PHIs; + BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0); + for (BasicBlock::iterator I = BB2->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BIParentV = PN->getIncomingValueForBlock(BIParent); + + // Skip PHIs which are trivial. + if (BB1V == BIParentV) + continue; + + // Check for saftey. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BB1V)) { + // An unfolded ConstantExpr could end up getting expanded into + // Instructions. Don't speculate this and another instruction at + // the same time. + if (HInst) + return false; + if (!isSafeToSpeculativelyExecute(CE)) + return false; + if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold) + return false; + } + + // Ok, we may insert a select for this PHI. + PHIs.insert(std::make_pair(BB1V, BIParentV)); } - // Can we speculatively execute the instruction? And what is the value - // if the condition is false? Consider the phi uses, if the incoming value - // from the "if" block are all the same V, then V is the value of the - // select if the condition is false. - BasicBlock *BIParent = BI->getParent(); - SmallVector<PHINode*, 4> PHIUses; - Value *FalseV = NULL; + // If there are no PHIs to process, bail early. This helps ensure idempotence + // as well. + if (PHIs.empty()) + return false; - BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0); - for (Value::use_iterator UI = HInst->use_begin(), E = HInst->use_end(); - UI != E; ++UI) { - // Ignore any user that is not a PHI node in BB2. These can only occur in - // unreachable blocks, because they would not be dominated by the instr. - PHINode *PN = dyn_cast<PHINode>(*UI); - if (!PN || PN->getParent() != BB2) - return false; - PHIUses.push_back(PN); - - Value *PHIV = PN->getIncomingValueForBlock(BIParent); - if (!FalseV) - FalseV = PHIV; - else if (FalseV != PHIV) - return false; // Inconsistent value when condition is false. - } - - assert(FalseV && "Must have at least one user, and it must be a PHI"); - - // Do not hoist the instruction if any of its operands are defined but not - // used in this BB. The transformation will prevent the operand from - // being sunk into the use block. - for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); - i != e; ++i) { - Instruction *OpI = dyn_cast<Instruction>(*i); - if (OpI && OpI->getParent() == BIParent && - !OpI->isUsedInBasicBlock(BIParent)) - return false; - } + // If we get here, we can hoist the instruction and if-convert. + DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";); - // If we get here, we can hoist the instruction. Try to place it - // before the icmp instruction preceding the conditional branch. - BasicBlock::iterator InsertPos = BI; - if (InsertPos != BIParent->begin()) - --InsertPos; - // Skip debug info between condition and branch. - while (InsertPos != BIParent->begin() && isa<DbgInfoIntrinsic>(InsertPos)) - --InsertPos; - if (InsertPos == BrCond && !isa<PHINode>(BrCond)) { - SmallPtrSet<Instruction *, 4> BB1Insns; - for(BasicBlock::iterator BB1I = BB1->begin(), BB1E = BB1->end(); - BB1I != BB1E; ++BB1I) - BB1Insns.insert(BB1I); - for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end(); - UI != UE; ++UI) { - Instruction *Use = cast<Instruction>(*UI); - if (!BB1Insns.count(Use)) continue; - - // If BrCond uses the instruction that place it just before - // branch instruction. - InsertPos = BI; - break; - } - } else - InsertPos = BI; - BIParent->getInstList().splice(InsertPos, BB1->getInstList(), HInst); + // Hoist the instruction. + if (HInst) + BIParent->getInstList().splice(BI, BB1->getInstList(), HInst); - // Create a select whose true value is the speculatively executed value and - // false value is the previously determined FalseV. + // Insert selects and rewrite the PHI operands. IRBuilder<true, NoFolder> Builder(BI); - SelectInst *SI; - if (Invert) - SI = cast<SelectInst> - (Builder.CreateSelect(BrCond, FalseV, HInst, - FalseV->getName() + "." + HInst->getName())); - else - SI = cast<SelectInst> - (Builder.CreateSelect(BrCond, HInst, FalseV, - HInst->getName() + "." + FalseV->getName())); - - // Make the PHI node use the select for all incoming values for "then" and - // "if" blocks. - for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) { - PHINode *PN = PHIUses[i]; - for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j) - if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent) - PN->setIncomingValue(j, SI); + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + Value *TrueV = PHIs[i].first; + Value *FalseV = PHIs[i].second; + + // Create a select whose true value is the speculatively executed value and + // false value is the previously determined FalseV. + SelectInst *SI; + if (Invert) + SI = cast<SelectInst> + (Builder.CreateSelect(BrCond, FalseV, TrueV, + FalseV->getName() + "." + TrueV->getName())); + else + SI = cast<SelectInst> + (Builder.CreateSelect(BrCond, TrueV, FalseV, + TrueV->getName() + "." + FalseV->getName())); + + // Make the PHI node use the select for all incoming values for "then" and + // "if" blocks. + for (BasicBlock::iterator I = BB2->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + unsigned BB1I = PN->getBasicBlockIndex(BB1); + unsigned BIParentI = PN->getBasicBlockIndex(BIParent); + Value *BB1V = PN->getIncomingValue(BB1I); + Value *BIParentV = PN->getIncomingValue(BIParentI); + if (TrueV == BB1V && FalseV == BIParentV) { + PN->setIncomingValue(BB1I, SI); + PN->setIncomingValue(BIParentI, SI); + } + } } ++NumSpeculations; @@ -1461,6 +1448,49 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, return true; } +/// ExtractBranchMetadata - Given a conditional BranchInstruction, retrieve the +/// probabilities of the branch taking each edge. Fills in the two APInt +/// parameters and return true, or returns false if no or invalid metadata was +/// found. +static bool ExtractBranchMetadata(BranchInst *BI, + APInt &ProbTrue, APInt &ProbFalse) { + assert(BI->isConditional() && + "Looking for probabilities on unconditional branch?"); + MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof); + if (!ProfileData || ProfileData->getNumOperands() != 3) return false; + ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1)); + ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2)); + if (!CITrue || !CIFalse) return false; + ProbTrue = CITrue->getValue(); + ProbFalse = CIFalse->getValue(); + assert(ProbTrue.getBitWidth() == 32 && ProbFalse.getBitWidth() == 32 && + "Branch probability metadata must be 32-bit integers"); + return true; +} + +/// MultiplyAndLosePrecision - Multiplies A and B, then returns the result. In +/// the event of overflow, logically-shifts all four inputs right until the +/// multiply fits. +static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D, + unsigned &BitsLost) { + BitsLost = 0; + bool Overflow = false; + APInt Result = A.umul_ov(B, Overflow); + if (Overflow) { + APInt MaxB = APInt::getMaxValue(A.getBitWidth()).udiv(A); + do { + B = B.lshr(1); + ++BitsLost; + } while (B.ugt(MaxB)); + A = A.lshr(BitsLost); + C = C.lshr(BitsLost); + D = D.lshr(BitsLost); + Result = A * B; + } + return Result; +} + + /// FoldBranchToCommonDest - If this basic block is simple enough, and if a /// predecessor branches to us and one of our successors, fold the block into /// the predecessor and use logical operations to pick the right destination. @@ -1479,7 +1509,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; - + // Allow a single instruction to be hoisted in addition to the compare // that feeds the branch. We later ensure that any values that _it_ uses // were also live in the predecessor, so that we don't unnecessarily create @@ -1487,7 +1517,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { Instruction *BonusInst = 0; if (&*FrontIt != Cond && FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond && - FrontIt->isSafeToSpeculativelyExecute()) { + isSafeToSpeculativelyExecute(FrontIt)) { BonusInst = &*FrontIt; ++FrontIt; @@ -1557,7 +1587,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { SmallPtrSet<Value*, 4> UsedValues; for (Instruction::op_iterator OI = BonusInst->op_begin(), OE = BonusInst->op_end(); OI != OE; ++OI) { - Value* V = *OI; + Value *V = *OI; if (!isa<Constant>(V)) UsedValues.insert(V); } @@ -1602,10 +1632,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { } PBI->setCondition(NewCond); - BasicBlock *OldTrue = PBI->getSuccessor(0); - BasicBlock *OldFalse = PBI->getSuccessor(1); - PBI->setSuccessor(0, OldFalse); - PBI->setSuccessor(1, OldTrue); + PBI->swapSuccessors(); } // If we have a bonus inst, clone it into the predecessor block. @@ -1638,6 +1665,94 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { PBI->setSuccessor(1, FalseDest); } + // TODO: If BB is reachable from all paths through PredBlock, then we + // could replace PBI's branch probabilities with BI's. + + // Merge probability data into PredBlock's branch. + APInt A, B, C, D; + if (ExtractBranchMetadata(PBI, C, D) && ExtractBranchMetadata(BI, A, B)) { + // Given IR which does: + // bbA: + // br i1 %x, label %bbB, label %bbC + // bbB: + // br i1 %y, label %bbD, label %bbC + // Let's call the probability that we take the edge from %bbA to %bbB + // 'a', from %bbA to %bbC, 'b', from %bbB to %bbD 'c' and from %bbB to + // %bbC probability 'd'. + // + // We transform the IR into: + // bbA: + // br i1 %z, label %bbD, label %bbC + // where the probability of going to %bbD is (a*c) and going to bbC is + // (b+a*d). + // + // Probabilities aren't stored as ratios directly. Using branch weights, + // we get: + // (a*c)% = A*C, (b+(a*d))% = A*D+B*C+B*D. + + // In the event of overflow, we want to drop the LSB of the input + // probabilities. + unsigned BitsLost; + + // Ignore overflow result on ProbTrue. + APInt ProbTrue = MultiplyAndLosePrecision(A, C, B, D, BitsLost); + + APInt Tmp1 = MultiplyAndLosePrecision(B, D, A, C, BitsLost); + if (BitsLost) { + ProbTrue = ProbTrue.lshr(BitsLost*2); + } + + APInt Tmp2 = MultiplyAndLosePrecision(A, D, C, B, BitsLost); + if (BitsLost) { + ProbTrue = ProbTrue.lshr(BitsLost*2); + Tmp1 = Tmp1.lshr(BitsLost*2); + } + + APInt Tmp3 = MultiplyAndLosePrecision(B, C, A, D, BitsLost); + if (BitsLost) { + ProbTrue = ProbTrue.lshr(BitsLost*2); + Tmp1 = Tmp1.lshr(BitsLost*2); + Tmp2 = Tmp2.lshr(BitsLost*2); + } + + bool Overflow1 = false, Overflow2 = false; + APInt Tmp4 = Tmp2.uadd_ov(Tmp3, Overflow1); + APInt ProbFalse = Tmp4.uadd_ov(Tmp1, Overflow2); + + if (Overflow1 || Overflow2) { + ProbTrue = ProbTrue.lshr(1); + Tmp1 = Tmp1.lshr(1); + Tmp2 = Tmp2.lshr(1); + Tmp3 = Tmp3.lshr(1); + Tmp4 = Tmp2 + Tmp3; + ProbFalse = Tmp4 + Tmp1; + } + + // The sum of branch weights must fit in 32-bits. + if (ProbTrue.isNegative() && ProbFalse.isNegative()) { + ProbTrue = ProbTrue.lshr(1); + ProbFalse = ProbFalse.lshr(1); + } + + if (ProbTrue != ProbFalse) { + // Normalize the result. + APInt GCD = APIntOps::GreatestCommonDivisor(ProbTrue, ProbFalse); + ProbTrue = ProbTrue.udiv(GCD); + ProbFalse = ProbFalse.udiv(GCD); + + LLVMContext &Context = BI->getContext(); + Value *Ops[3]; + Ops[0] = BI->getMetadata(LLVMContext::MD_prof)->getOperand(0); + Ops[1] = ConstantInt::get(Context, ProbTrue); + Ops[2] = ConstantInt::get(Context, ProbFalse); + PBI->setMetadata(LLVMContext::MD_prof, MDNode::get(Context, Ops)); + } else { + PBI->setMetadata(LLVMContext::MD_prof, NULL); + } + } else { + PBI->setMetadata(LLVMContext::MD_prof, NULL); + } + // Copy any debug value intrinsics into the end of PredBlock. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (isa<DbgInfoIntrinsic>(*I)) @@ -1894,8 +2009,8 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { // Find the relevant condition and destinations. Value *Condition = Select->getCondition(); - BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal)); - BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal)); + BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor(); + BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor(); // Perform the actual simplification. return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB); @@ -1979,7 +2094,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, // Ok, the block is reachable from the default dest. If the constant we're // comparing exists in one of the other edges, then we can constant fold ICI // and zap it. - if (SI->findCaseValue(Cst) != 0) { + if (SI->findCaseValue(Cst) != SI->case_default()) { Value *V; if (ICI->getPredicate() == ICmpInst::ICMP_EQ) V = ConstantInt::getFalse(BB->getContext()); @@ -2235,52 +2350,6 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { return false; } -bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) { - // Check to see if the first instruction in this block is just an unwind. - // If so, replace any invoke instructions which use this as an exception - // destination with call instructions. - BasicBlock *BB = UI->getParent(); - if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; - - bool Changed = false; - SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); - while (!Preds.empty()) { - BasicBlock *Pred = Preds.back(); - InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()); - if (II && II->getUnwindDest() == BB) { - // Insert a new branch instruction before the invoke, because this - // is now a fall through. - Builder.SetInsertPoint(II); - BranchInst *BI = Builder.CreateBr(II->getNormalDest()); - Pred->getInstList().remove(II); // Take out of symbol table - - // Insert the call now. - SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3); - Builder.SetInsertPoint(BI); - CallInst *CI = Builder.CreateCall(II->getCalledValue(), - Args, II->getName()); - CI->setCallingConv(II->getCallingConv()); - CI->setAttributes(II->getAttributes()); - // If the invoke produced a value, the Call now does instead. - II->replaceAllUsesWith(CI); - delete II; - Changed = true; - } - - Preds.pop_back(); - } - - // If this block is now dead (and isn't the entry block), remove it. - if (pred_begin(BB) == pred_end(BB) && - BB != &BB->getParent()->getEntryBlock()) { - // We know there are no successors, so just nuke the block. - BB->eraseFromParent(); - return true; - } - - return Changed; -} - bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *BB = UI->getParent(); @@ -2352,8 +2421,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - if (SI->getSuccessor(i) == BB) { + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) + if (i.getCaseSuccessor() == BB) { BB->removePredecessor(SI->getParent()); SI->removeCase(i); --i; --e; @@ -2361,14 +2431,15 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } // If the default value is unreachable, figure out the most popular // destination and make it the default. - if (SI->getSuccessor(0) == BB) { + if (SI->getDefaultDest() == BB) { std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity; - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { - std::pair<unsigned, unsigned>& entry = - Popularity[SI->getSuccessor(i)]; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + std::pair<unsigned, unsigned> &entry = + Popularity[i.getCaseSuccessor()]; if (entry.first == 0) { entry.first = 1; - entry.second = i; + entry.second = i.getCaseIndex(); } else { entry.first++; } @@ -2390,7 +2461,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { if (MaxBlock) { // Make this the new default, allowing us to delete any explicit // edges to it. - SI->setSuccessor(0, MaxBlock); + SI->setDefaultDest(MaxBlock); Changed = true; // If MaxBlock has phinodes in it, remove MaxPop-1 entries from @@ -2399,8 +2470,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { for (unsigned i = 0; i != MaxPop-1; ++i) MaxBlock->removePredecessor(SI->getParent()); - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - if (SI->getSuccessor(i) == MaxBlock) { + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) + if (i.getCaseSuccessor() == MaxBlock) { SI->removeCase(i); --i; --e; } @@ -2442,17 +2514,19 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { /// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a /// integer range comparison into a sub, an icmp and a branch. static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { - assert(SI->getNumCases() > 2 && "Degenerate switch?"); + assert(SI->getNumCases() > 1 && "Degenerate switch?"); // Make sure all cases point to the same destination and gather the values. SmallVector<ConstantInt *, 16> Cases; - Cases.push_back(SI->getCaseValue(1)); - for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) { - if (SI->getSuccessor(I-1) != SI->getSuccessor(I)) + SwitchInst::CaseIt I = SI->case_begin(); + Cases.push_back(I.getCaseValue()); + SwitchInst::CaseIt PrevI = I++; + for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) { + if (PrevI.getCaseSuccessor() != I.getCaseSuccessor()) return false; - Cases.push_back(SI->getCaseValue(I)); + Cases.push_back(I.getCaseValue()); } - assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered"); + assert(Cases.size() == SI->getNumCases() && "Not all cases gathered"); // Sort the case values, then check if they form a range we can transform. array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); @@ -2462,18 +2536,19 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { } Constant *Offset = ConstantExpr::getNeg(Cases.back()); - Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1); + Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()); Value *Sub = SI->getCondition(); if (!Offset->isNullValue()) Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); - Builder.CreateCondBr(Cmp, SI->getSuccessor(1), SI->getDefaultDest()); + Builder.CreateCondBr( + Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); // Prune obsolete incoming values off the successor's PHI nodes. - for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin(); + for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin(); isa<PHINode>(BBI); ++BBI) { - for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I) + for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I) cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); } SI->eraseFromParent(); @@ -2487,24 +2562,26 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { Value *Cond = SI->getCondition(); unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - ComputeMaskedBits(Cond, APInt::getAllOnesValue(Bits), KnownZero, KnownOne); + ComputeMaskedBits(Cond, KnownZero, KnownOne); // Gather dead cases. SmallVector<ConstantInt*, 8> DeadCases; - for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) { - if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 || - (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) { - DeadCases.push_back(SI->getCaseValue(I)); + for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) { + if ((I.getCaseValue()->getValue() & KnownZero) != 0 || + (I.getCaseValue()->getValue() & KnownOne) != KnownOne) { + DeadCases.push_back(I.getCaseValue()); DEBUG(dbgs() << "SimplifyCFG: switch case '" - << SI->getCaseValue(I)->getValue() << "' is dead.\n"); + << I.getCaseValue() << "' is dead.\n"); } } // Remove dead cases from the switch. for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) { - unsigned Case = SI->findCaseValue(DeadCases[I]); + SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]); + assert(Case != SI->case_default() && + "Case was not found. Probably mistake in DeadCases forming."); // Prune unused values from PHI nodes. - SI->getSuccessor(Case)->removePredecessor(SI->getParent()); + Case.getCaseSuccessor()->removePredecessor(SI->getParent()); SI->removeCase(Case); } @@ -2553,9 +2630,9 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap; ForwardingNodesMap ForwardingNodes; - for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case. - ConstantInt *CaseValue = SI->getCaseValue(I); - BasicBlock *CaseDest = SI->getSuccessor(I); + for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) { + ConstantInt *CaseValue = I.getCaseValue(); + BasicBlock *CaseDest = I.getCaseSuccessor(); int PhiIndex; PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest, @@ -2676,8 +2753,8 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) { for (++I; isa<DbgInfoIntrinsic>(I); ++I) ; - if (I->isTerminator() - && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder)) + if (I->isTerminator() && + TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder)) return true; } @@ -2720,6 +2797,12 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (SimplifyBranchOnICmpChain(BI, TD, Builder)) return true; + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and one of our successors, fold the comparison into the + // predecessor and use logical operations to pick the right destination. + if (FoldBranchToCommonDest(BI)) + return SimplifyCFG(BB) | true; + // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if // there is any identical code in the "then" and "else" blocks. If so, we @@ -2754,12 +2837,6 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (FoldCondBranchOnPHI(BI, TD)) return SimplifyCFG(BB) | true; - // If this basic block is ONLY a setcc and a branch, and if a predecessor - // branches to us and one of our successors, fold the setcc into the - // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI)) - return SimplifyCFG(BB) | true; - // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) @@ -2809,7 +2886,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { } /// If BB has an incoming value that will always trigger undefined behavior -/// (eg. null pointer derefence), remove the branch leading here. +/// (eg. null pointer dereference), remove the branch leading here. static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { for (BasicBlock::iterator i = BB->begin(); PHINode *PHI = dyn_cast<PHINode>(i); ++i) @@ -2883,17 +2960,15 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { } else { if (SimplifyCondBranch(BI, Builder)) return true; } - } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { - if (SimplifyResume(RI, Builder)) return true; } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { if (SimplifyReturn(RI, Builder)) return true; + } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { + if (SimplifyResume(RI, Builder)) return true; } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { if (SimplifySwitch(SI, Builder)) return true; } else if (UnreachableInst *UI = dyn_cast<UnreachableInst>(BB->getTerminator())) { if (SimplifyUnreachable(UI)) return true; - } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - if (SimplifyUnwind(UI, Builder)) return true; } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(BB->getTerminator())) { if (SimplifyIndirectBr(IBI)) return true; diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 76289c0..4030bef 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -46,7 +46,6 @@ namespace { LoopInfo *LI; DominatorTree *DT; ScalarEvolution *SE; - IVUsers *IU; // NULL for DisableIVRewrite const TargetData *TD; // May be NULL SmallVectorImpl<WeakVH> &DeadInsts; @@ -59,7 +58,6 @@ namespace { L(Loop), LI(LPM->getAnalysisIfAvailable<LoopInfo>()), SE(SE), - IU(IVU), TD(LPM->getAnalysisIfAvailable<TargetData>()), DeadInsts(Dead), Changed(false) { @@ -107,8 +105,8 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) // Attempt to fold a binary operator with constant operand. // e.g. ((I + 1) >> 2) => I >> 2 - if (IVOperand->getNumOperands() != 2 || - !isa<ConstantInt>(IVOperand->getOperand(1))) + if (!isa<BinaryOperator>(IVOperand) + || !isa<ConstantInt>(IVOperand->getOperand(1))) return 0; IVSrc = IVOperand->getOperand(0); @@ -229,11 +227,6 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, Rem->replaceAllUsesWith(Sel); } - // Inform IVUsers about the new users. - if (IU) { - if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) - IU->AddUsersIfInteresting(I); - } DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); ++NumElimRem; Changed = true; @@ -375,6 +368,8 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { namespace llvm { +void IVVisitor::anchor() { } + /// simplifyUsersOfIV - Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, @@ -397,36 +392,4 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM, return Changed; } -/// simplifyIVUsers - Perform simplification on instructions recorded by the -/// IVUsers pass. -/// -/// This is the old approach to IV simplification to be replaced by -/// SimplifyLoopIVs. -bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl<WeakVH> &Dead) { - SimplifyIndvar SIV(IU->getLoop(), SE, LPM, Dead); - - // Each round of simplification involves a round of eliminating operations - // followed by a round of widening IVs. A single IVUsers worklist is used - // across all rounds. The inner loop advances the user. If widening exposes - // more uses, then another pass through the outer loop is triggered. - for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) { - Instruction *UseInst = I->getUser(); - Value *IVOperand = I->getOperandValToReplace(); - - if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { - SIV.eliminateIVComparison(ICmp, IVOperand); - continue; - } - if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { - bool IsSigned = Rem->getOpcode() == Instruction::SRem; - if (IsSigned || Rem->getOpcode() == Instruction::URem) { - SIV.eliminateIVRemainder(Rem, IVOperand, IsSigned); - continue; - } - } - } - return SIV.hasChanged(); -} - } // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp index ac005f9..81eb9e0 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -39,12 +40,14 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); } /// runOnFunction - Remove instructions that simplify. bool runOnFunction(Function &F) { const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -60,7 +63,7 @@ namespace { continue; // Don't waste time simplifying unused instructions. if (!I->use_empty()) - if (Value *V = SimplifyInstruction(I, TD, DT)) { + if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) { // Mark all uses for resimplification next time round the loop. for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) @@ -84,8 +87,11 @@ namespace { } char InstSimplifier::ID = 0; -INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions", - false, false) +INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) char &llvm::InstructionSimplifierID = InstSimplifier::ID; // Public interface to the simplify instructions pass. diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 46d4ada..b1cad06 100644 --- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -50,33 +50,13 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // return. // std::vector<BasicBlock*> ReturningBlocks; - std::vector<BasicBlock*> UnwindingBlocks; std::vector<BasicBlock*> UnreachableBlocks; for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) if (isa<ReturnInst>(I->getTerminator())) ReturningBlocks.push_back(I); - else if (isa<UnwindInst>(I->getTerminator())) - UnwindingBlocks.push_back(I); else if (isa<UnreachableInst>(I->getTerminator())) UnreachableBlocks.push_back(I); - // Handle unwinding blocks first. - if (UnwindingBlocks.empty()) { - UnwindBlock = 0; - } else if (UnwindingBlocks.size() == 1) { - UnwindBlock = UnwindingBlocks.front(); - } else { - UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F); - new UnwindInst(F.getContext(), UnwindBlock); - - for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(), - E = UnwindingBlocks.end(); I != E; ++I) { - BasicBlock *BB = *I; - BB->getInstList().pop_back(); // Remove the unwind insn - BranchInst::Create(UnwindBlock, BB); - } - } - // Then unreachable blocks. if (UnreachableBlocks.empty()) { UnreachableBlock = 0; |