diff options
author | dim <dim@FreeBSD.org> | 2011-07-17 15:36:56 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2011-07-17 15:36:56 +0000 |
commit | 1176aa52646fe641a4243a246aa7f960c708a274 (patch) | |
tree | c8086addb211fa670a9d2b1038d8c2e453229755 /lib/Transforms | |
parent | ece02cd5829cea836e9365b0845a8ef042d17b0a (diff) | |
download | FreeBSD-src-1176aa52646fe641a4243a246aa7f960c708a274.zip FreeBSD-src-1176aa52646fe641a4243a246aa7f960c708a274.tar.gz |
Vendor import of llvm trunk r135360:
http://llvm.org/svn/llvm-project/llvm/trunk@135360
Diffstat (limited to 'lib/Transforms')
55 files changed, 5785 insertions, 1612 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 54a7f67..fa007cf 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -493,7 +493,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. const FunctionType *FTy = F->getFunctionType(); - std::vector<const Type*> Params; + std::vector<Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; @@ -733,12 +733,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 179b150..3de7bfc 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -2,7 +2,6 @@ add_llvm_library(LLVMipo ArgumentPromotion.cpp ConstantMerge.cpp DeadArgumentElimination.cpp - DeadTypeElimination.cpp ExtractGV.cpp FunctionAttrs.cpp GlobalDCE.cpp diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index d4eaf0c..1517765 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -208,7 +208,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // the old function, but doesn't have isVarArg set. const FunctionType *FTy = Fn.getFunctionType(); - std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end()); + std::vector<Type*> Params(FTy->param_begin(), FTy->param_end()); FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); unsigned NumArgs = Params.size(); @@ -244,11 +244,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(PAL); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(PAL); if (cast<CallInst>(Call)->isTailCall()) @@ -647,7 +647,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Start by computing a new prototype for the function, which is the same as // the old function, but has fewer arguments and a different return type. const FunctionType *FTy = F->getFunctionType(); - std::vector<const Type*> Params; + std::vector<Type*> Params; // Set up to build a new list of parameter attributes. SmallVector<AttributeWithIndex, 8> AttributesVec; @@ -659,13 +659,13 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Find out the new return value. - const Type *RetTy = FTy->getReturnType(); + Type *RetTy = FTy->getReturnType(); const Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); // -1 means unused, other numbers are the new index SmallVector<int, 5> NewRetIdxs(RetCount, -1); - std::vector<const Type*> RetTypes; + std::vector<Type*> RetTypes; if (RetTy->isVoidTy()) { NRetTy = RetTy; } else { @@ -822,11 +822,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(NewCallPAL); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(NewCallPAL); if (cast<CallInst>(Call)->isTailCall()) diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp deleted file mode 100644 index d3d4963..0000000 --- a/lib/Transforms/IPO/DeadTypeElimination.cpp +++ /dev/null @@ -1,112 +0,0 @@ -//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass is used to cleanup the output of GCC. It eliminate names for types -// that are unused in the entire translation unit, using the FindUsedTypes pass. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "deadtypeelim" -#include "llvm/Transforms/IPO.h" -#include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/Module.h" -#include "llvm/TypeSymbolTable.h" -#include "llvm/DerivedTypes.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -STATISTIC(NumKilled, "Number of unused typenames removed from symtab"); - -namespace { - struct DTE : public ModulePass { - static char ID; // Pass identification, replacement for typeid - DTE() : ModulePass(ID) { - initializeDTEPass(*PassRegistry::getPassRegistry()); - } - - // doPassInitialization - For this pass, it removes global symbol table - // entries for primitive types. These are never used for linking in GCC and - // they make the output uglier to look at, so we nuke them. - // - // Also, initialize instance variables. - // - bool runOnModule(Module &M); - - // getAnalysisUsage - This function needs FindUsedTypes to do its job... - // - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<FindUsedTypes>(); - } - }; -} - -char DTE::ID = 0; -INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination", - false, false) -INITIALIZE_PASS_DEPENDENCY(FindUsedTypes) -INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false) - -ModulePass *llvm::createDeadTypeEliminationPass() { - return new DTE(); -} - - -// ShouldNukeSymtabEntry - Return true if this module level symbol table entry -// should be eliminated. -// -static inline bool ShouldNukeSymtabEntry(const Type *Ty){ - // Nuke all names for primitive types! - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) - return true; - - // Nuke all pointers to primitive types as well... - if (const PointerType *PT = dyn_cast<PointerType>(Ty)) - if (PT->getElementType()->isPrimitiveType() || - PT->getElementType()->isIntegerTy()) - return true; - - return false; -} - -// run - For this pass, it removes global symbol table entries for primitive -// types. These are never used for linking in GCC and they make the output -// uglier to look at, so we nuke them. Also eliminate types that are never used -// in the entire program as indicated by FindUsedTypes. -// -bool DTE::runOnModule(Module &M) { - bool Changed = false; - - TypeSymbolTable &ST = M.getTypeSymbolTable(); - const SetVector<const Type*> &T = getAnalysis<FindUsedTypes>().getTypes(); - std::set<const Type*> UsedTypes(T.begin(), T.end()); - - // Check the symbol table for superfluous type entries... - // - // Grab the 'type' plane of the module symbol... - TypeSymbolTable::iterator TI = ST.begin(); - TypeSymbolTable::iterator TE = ST.end(); - while ( TI != TE ) { - // If this entry should be unconditionally removed, or if we detect that - // the type is not used, remove it. - const Type *RHS = TI->second; - if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) { - ST.remove(TI++); - ++NumKilled; - Changed = true; - } else { - ++TI; - // We only need to leave one name for each type. - UsedTypes.erase(RHS); - } - } - - return Changed; -} - -// vim: sw=2 diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index cdf7b76..4ac721d 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1999,9 +1999,13 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, const std::vector<Function*> &Ctors) { // If we made a change, reassemble the initializer list. - std::vector<Constant*> CSVals; - CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); - CSVals.push_back(0); + Constant *CSVals[2]; + CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535); + CSVals[1] = 0; + + const StructType *StructTy = + cast <StructType>( + cast<ArrayType>(GCL->getType()->getElementType())->getElementType()); // Create the new init list. std::vector<Constant*> CAList; @@ -2016,12 +2020,10 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 0x7fffffff); } - CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); + CAList.push_back(ConstantStruct::get(StructTy, CSVals)); } // Create the array initializer. - const Type *StructTy = - cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList); @@ -2218,42 +2220,40 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); // Return the modified struct. - return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), - STy->isPacked()); - } else { - ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); - const SequentialType *InitTy = cast<SequentialType>(Init->getType()); - - uint64_t NumElts; - if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) - NumElts = ATy->getNumElements(); - else - NumElts = cast<VectorType>(InitTy)->getNumElements(); - + return ConstantStruct::get(STy, Elts); + } + + ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); + const SequentialType *InitTy = cast<SequentialType>(Init->getType()); - // Break up the array into elements. - if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) - Elts.push_back(cast<Constant>(*i)); - } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) { - for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) - Elts.push_back(cast<Constant>(*i)); - } else if (isa<ConstantAggregateZero>(Init)) { - Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); - } else { - assert(isa<UndefValue>(Init) && "This code is out of sync with " - " ConstantFoldLoadThroughGEPConstantExpr"); - Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); - } + uint64_t NumElts; + if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) + NumElts = ATy->getNumElements(); + else + NumElts = cast<VectorType>(InitTy)->getNumElements(); + + // Break up the array into elements. + if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) + Elts.push_back(cast<Constant>(*i)); + } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) { + for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) + Elts.push_back(cast<Constant>(*i)); + } else if (isa<ConstantAggregateZero>(Init)) { + Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); + } else { + assert(isa<UndefValue>(Init) && "This code is out of sync with " + " ConstantFoldLoadThroughGEPConstantExpr"); + Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); + } - assert(CI->getZExtValue() < NumElts); - Elts[CI->getZExtValue()] = - EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + assert(CI->getZExtValue() < NumElts); + Elts[CI->getZExtValue()] = + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - if (Init->getType()->isArrayTy()) - return ConstantArray::get(cast<ArrayType>(InitTy), Elts); - return ConstantVector::get(Elts); - } + if (Init->getType()->isArrayTy()) + return ConstantArray::get(cast<ArrayType>(InitTy), Elts); + return ConstantVector::get(Elts); } /// CommitValueTo - We have decided that Addr (which satisfies the predicate diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 21dcb51..31ce95f 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -25,7 +25,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeConstantMergePass(Registry); initializeDAEPass(Registry); initializeDAHPass(Registry); - initializeDTEPass(Registry); initializeFunctionAttrsPass(Registry); initializeGlobalDCEPass(Registry); initializeGlobalOptPass(Registry); @@ -63,10 +62,6 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createDeadArgEliminationPass()); } -void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createDeadTypeEliminationPass()); -} - void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createFunctionAttrsPass()); } diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index 52ecf17..659476b 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -267,7 +267,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) CastInst* CI = new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst); Value *Args[] = { CI, Inst->getArgOperand(1) }; - CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst); + CallInst::Create(ThrowLongJmp, Args, "", Inst); SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()]; @@ -386,7 +386,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) GetSetJmpMap(Func), BufPtr, ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++) }; - CallInst::Create(AddSJToMap, Args, Args + 3, "", Inst); + CallInst::Create(AddSJToMap, Args, "", Inst); // We are guaranteed that there are no values live across basic blocks // (because we are "not in SSA form" yet), but there can still be values live @@ -482,7 +482,7 @@ void LowerSetJmp::visitCallInst(CallInst& CI) std::vector<Value*> Params(CS.arg_begin(), CS.arg_end()); InvokeInst* II = InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func], - Params.begin(), Params.end(), CI.getName(), Term); + Params, CI.getName(), Term); II->setCallingConv(CI.getCallingConv()); II->setAttributes(CI.getAttributes()); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index f741443..7796d05 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -218,7 +218,6 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, llvm_unreachable("Unknown type!"); // Fall through in Release mode. case Type::IntegerTyID: - case Type::OpaqueTyID: case Type::VectorTyID: // Ty1 == Ty2 would have returned true earlier. return false; @@ -733,7 +732,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { ++i; } - CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end()); + CallInst *CI = Builder.CreateCall(F, Args); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); if (NewG->getReturnType()->isVoidTy()) { diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 2f3baeb..b7e63dc 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -175,8 +175,7 @@ bool PruneEH::SimplifyFunction(Function *F) { if (II->doesNotThrow()) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); // Insert a call instruction before the invoke. - CallInst *Call = CallInst::Create(II->getCalledValue(), - Args.begin(), Args.end(), "", II); + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); Call->takeName(II); Call->setCallingConv(II->getCallingConv()); Call->setAttributes(II->getAttributes()); diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index a690765..0fbaff1 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -28,8 +28,8 @@ #include "llvm/Pass.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ValueSymbolTable.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -143,8 +143,7 @@ static void RemoveDeadConstant(Constant *C) { assert(C->use_empty() && "Constant is not dead!"); SmallPtrSet<Constant*, 4> Operands; for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) - if (isa<DerivedType>(C->getOperand(i)->getType()) && - OnlyUsedBy(C->getOperand(i), C)) + if (OnlyUsedBy(C->getOperand(i), C)) Operands.insert(cast<Constant>(C->getOperand(i))); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { if (!GV->hasLocalLinkage()) return; // Don't delete non static globals. @@ -174,13 +173,19 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { } } -// Strip the symbol table of its names. -static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) { - for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) { - if (PreserveDbgInfo && StringRef(TI->first).startswith("llvm.dbg")) - ++TI; - else - ST.remove(TI++); +// Strip any named types of their names. +static void StripTypeNames(Module &M, bool PreserveDbgInfo) { + std::vector<StructType*> StructTypes; + M.findUsedStructTypes(StructTypes); + + for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { + StructType *STy = StructTypes[i]; + if (STy->isAnonymous() || STy->getName().empty()) continue; + + if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg")) + continue; + + STy->setName(""); } } @@ -221,7 +226,7 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { } // Remove all names from types. - StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo); + StripTypeNames(M, PreserveDbgInfo); return true; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a08446e..64ea36f 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1400,7 +1400,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, /// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. /// If so, insert the new bswap intrinsic and return it. Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { - const IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); + IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); if (!ITy || ITy->getBitWidth() % 16 || // ByteMask only allows up to 32-byte values. ITy->getBitWidth() > 32*8) @@ -1424,9 +1424,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) if (ByteValues[i] != V) return 0; - const Type *Tys[] = { ITy }; Module *M = I.getParent()->getParent()->getParent(); - Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); return CallInst::Create(F, V); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index ef67701..537f2b3 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -217,10 +217,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (GVSrc->isConstant()) { Module *M = CI.getParent()->getParent()->getParent(); Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[3] = { CI.getArgOperand(0)->getType(), - CI.getArgOperand(1)->getType(), - CI.getArgOperand(2)->getType() }; - CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); + Type *Tys[3] = { CI.getArgOperand(0)->getType(), + CI.getArgOperand(1)->getType(), + CI.getArgOperand(2)->getType() }; + CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); Changed = true; } } @@ -355,7 +355,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::cttz: { // If all bits below the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + // FIXME: Try to simplify vectors of integers. + if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); @@ -372,7 +374,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ctlz: { // If all bits above the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + // FIXME: Try to simplify vectors of integers. + if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); @@ -412,7 +416,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(LHS->getType()), ConstantInt::getTrue(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + const StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } @@ -425,7 +430,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(LHS->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + const StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } } @@ -452,7 +458,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } @@ -472,7 +479,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } @@ -503,7 +511,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(LHS->getType()), Builder->getFalse() }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V); return InsertValueInst::Create(Struct, Mul, 0); } } // FALL THROUGH @@ -532,7 +540,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } @@ -1109,13 +1118,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Instruction *NC; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { NC = Builder->CreateInvoke(Callee, II->getNormalDest(), - II->getUnwindDest(), Args.begin(), Args.end()); + II->getUnwindDest(), Args); NC->takeName(II); cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); } else { CallInst *CI = cast<CallInst>(Caller); - NC = Builder->CreateCall(Callee, Args.begin(), Args.end()); + NC = Builder->CreateCall(Callee, Args); NC->takeName(CI); if (CI->isTailCall()) cast<CallInst>(NC)->setTailCall(); @@ -1178,7 +1187,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { const AttrListPtr &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; - const Type *NestTy = 0; + Type *NestTy = 0; Attributes NestAttr = Attribute::None; // Look for a parameter marked with the 'nest' attribute. @@ -1240,7 +1249,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { // Handle this by synthesizing a new function type, equal to FTy // with the chain parameter inserted. - std::vector<const Type*> NewTypes; + std::vector<Type*> NewTypes; NewTypes.reserve(FTy->getNumParams()+1); // Insert the chain's type into the list of parameter types, which may @@ -1280,11 +1289,11 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { NewCaller = InvokeInst::Create(NewCallee, II->getNormalDest(), II->getUnwindDest(), - NewArgs.begin(), NewArgs.end()); + NewArgs); cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); } else { - NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end()); + NewCaller = CallInst::Create(NewCallee, NewArgs); if (cast<CallInst>(Caller)->isTailCall()) cast<CallInst>(NewCaller)->setTailCall(); cast<CallInst>(NewCaller)-> diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 199902a..82c734e 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -30,6 +30,14 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, } if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { + // Cannot look past anything that might overflow. + OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); + if (OBI && !OBI->hasNoUnsignedWrap()) { + Scale = 1; + Offset = 0; + return Val; + } + if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { if (I->getOpcode() == Instruction::Shl) { // This is a value scaled by '1 << the shift amt'. @@ -1208,7 +1216,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); if (Call && Call->getCalledFunction() && Call->getCalledFunction()->getName() == "sqrt" && - Call->getNumArgOperands() == 1) { + Call->getNumArgOperands() == 1 && + Call->hasOneUse()) { CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); if (Arg && Arg->getOpcode() == Instruction::FPExt && CI.getType()->isFloatTy() && diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index c7ed098..c78760b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -42,13 +42,12 @@ static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { static bool HasAddOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().slt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ult(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().sgt(In1->getValue()); + return Result->getValue().slt(In1->getValue()); } /// AddWithOverflow - Compute Result = In1+In2, returning true if the result @@ -77,13 +76,13 @@ static bool AddWithOverflow(Constant *&Result, Constant *In1, static bool HasSubOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().sgt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ugt(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().slt(In1->getValue()); + + return Result->getValue().sgt(In1->getValue()); } /// SubWithOverflow - Compute Result = In1-In2, returning true if the result @@ -128,8 +127,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, case ICmpInst::ICMP_UGT: // True if LHS u> RHS and RHS == high-bit-mask - 1 TrueIfSigned = true; - return RHS->getValue() == - APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); + return RHS->isMaxValue(true); case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; @@ -278,8 +276,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) - Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(), - LaterIndices.size()); + Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); // If the element is masked, handle it. if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); @@ -828,7 +825,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; } } - } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. + } else if (DivRHS->isNegative()) { // Divisor is < 0. if (DivI->isExact()) RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); if (CmpRHSV == 0) { // (X / neg) op 0 @@ -1028,7 +1025,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // If the sign bit of the XorCST is not set, there is no change to // the operation, just stop using the Xor. - if (!XorCST->getValue().isNegative()) { + if (!XorCST->isNegative()) { ICI.setOperand(0, CompareVal); Worklist.Add(LHSI); return &ICI; @@ -1061,7 +1058,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) - if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { + if (!ICI.isEquality() && XorCST->isMaxValue(true)) { const APInt &NotSignBit = XorCST->getValue(); ICmpInst::Predicate Pred = ICI.isSigned() ? ICI.getUnsignedPredicate() @@ -1087,22 +1084,33 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // have its sign bit set or if it is an equality comparison. // Extending a relational comparison when we're checking the sign // bit would not work. - if (Cast->hasOneUse() && - (ICI.isEquality() || - (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { - uint32_t BitWidth = - cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth(); - APInt NewCST = AndCST->getValue().zext(BitWidth); - APInt NewCI = RHSV.zext(BitWidth); - Value *NewAnd = + if (ICI.isEquality() || + (!AndCST->isNegative() && RHSV.isNonNegative())) { + Value *NewAnd = Builder->CreateAnd(Cast->getOperand(0), - ConstantInt::get(ICI.getContext(), NewCST), - LHSI->getName()); + ConstantExpr::getZExt(AndCST, Cast->getSrcTy())); + NewAnd->takeName(LHSI); return new ICmpInst(ICI.getPredicate(), NewAnd, - ConstantInt::get(ICI.getContext(), NewCI)); + ConstantExpr::getZExt(RHS, Cast->getSrcTy())); } } - + + // If the LHS is an AND of a zext, and we have an equality compare, we can + // shrink the and/compare to the smaller type, eliminating the cast. + if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) { + const IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); + // Make sure we don't compare the upper bits, SimplifyDemandedBits + // should fold the icmp to true/false in that case. + if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { + Value *NewAnd = + Builder->CreateAnd(Cast->getOperand(0), + ConstantExpr::getTrunc(AndCST, Ty)); + NewAnd->takeName(LHSI); + return new ICmpInst(ICI.getPredicate(), NewAnd, + ConstantExpr::getTrunc(RHS, Ty)); + } + } + // If this is: (X >> C1) & C2 != C3 (where any shift and any compare // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This // happens a LOT in code produced by the C front-end, for bitfield @@ -1396,18 +1404,27 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, case Instruction::Xor: // For the xor case, we can xor two constants together, eliminating // the explicit xor. - if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), ConstantExpr::getXor(RHS, BOC)); - - // FALLTHROUGH + } else if (RHSV == 0) { + // Replace ((xor A, B) != 0) with (A != B) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + BO->getOperand(1)); + } + break; case Instruction::Sub: - // Replace (([sub|xor] A, B) != 0) with (A != B) - if (RHSV == 0) + // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants. + if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) { + if (BO->hasOneUse()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(1), + ConstantExpr::getSub(BOp0C, RHS)); + } else if (RHSV == 0) { + // Replace ((sub A, B) != 0) with (A != B) return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), BO->getOperand(1)); + } break; - case Instruction::Or: // If bits are being or'd in that are not present in the constant we // are comparing against, then the comparison could never succeed! @@ -1434,7 +1451,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, LHSI, Constant::getNullValue(RHS->getType())); - + + // Don't perform the following transforms if the AND has multiple uses + if (!BO->hasOneUse()) + break; + // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 if (BOC->getValue().isSignBit()) { Value *X = BO->getOperand(0); @@ -1659,9 +1680,9 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // result and the overflow bit. Module *M = I.getParent()->getParent()->getParent(); - const Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); + Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, - &NewType, 1); + NewType); InstCombiner::BuilderTy *Builder = IC.Builder; @@ -1701,8 +1722,8 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, Builder->SetInsertPoint(OrigAdd); Module *M = I.getParent()->getParent()->getParent(); - const Type *Ty = LHS->getType(); - Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, &Ty,1); + Type *Ty = LHS->getType(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); Value *Add = Builder->CreateExtractValue(Call, 0); @@ -2364,7 +2385,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BO1->getOperand(0)); } - if (CI->getValue().isMaxSignedValue()) { + if (CI->isMaxValue(true)) { ICmpInst::Predicate Pred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 2d29403..630a6fe 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -691,14 +691,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { bool hasNegative = false; for (unsigned i = 0; !hasNegative && i != VWidth; ++i) if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) - if (RHS->getValue().isNegative()) + if (RHS->isNegative()) hasNegative = true; if (hasNegative) { std::vector<Constant *> Elts(VWidth); for (unsigned i = 0; i != VWidth; ++i) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { - if (RHS->getValue().isNegative()) + if (RHS->isNegative()) Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); else Elts[i] = RHS; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index aeb3c3e..5733c20 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -796,7 +796,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // So at this point we know we have (Y -> OtherAddOp): // select C, (add X, Y), (sub X, Z) Value *NegVal; // Compute -Z - if (SI.getType()->isFloatingPointTy()) { + if (SI.getType()->isFPOrFPVectorTy()) { NegVal = Builder->CreateFNeg(SubOp->getOperand(1)); } else { NegVal = Builder->CreateNeg(SubOp->getOperand(1)); @@ -810,7 +810,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Builder->CreateSelect(CondVal, NewTrueOp, NewFalseOp, SI.getName() + ".p"); - if (SI.getType()->isFloatingPointTy()) + if (SI.getType()->isFPOrFPVectorTy()) return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel); else return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 92c10f5..ab98ef9 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -785,6 +785,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // getelementptr instructions into a single instruction. // if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { + + // If this GEP has only 0 indices, it is the same pointer as + // Src. If Src is not a trivial GEP too, don't combine + // the indices. + if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() && + !Src->hasOneUse()) + return 0; + // Note that if our source is a gep chain itself that we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. @@ -1191,7 +1199,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (EV.getNumIndices() > 1) // Extract the remaining indices out of the constant indexed by the // first index - return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); + return ExtractValueInst::Create(V, EV.getIndices().slice(1)); else return ReplaceInstUsesWith(EV, V); } @@ -1214,7 +1222,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // with // %E = extractvalue { i32, { i32 } } %A, 0 return ExtractValueInst::Create(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); + EV.getIndices()); } if (exti == exte && insi == inse) // Both iterators are at the end: Index lists are identical. Replace @@ -1232,9 +1240,9 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); + EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), - insi, inse); + ArrayRef<unsigned>(insi, inse)); } if (insi == inse) // The insert list is a prefix of the extract list @@ -1246,7 +1254,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // with // %E extractvalue { i32 } { i32 42 }, 0 return ExtractValueInst::Create(IV->getInsertedValueOperand(), - exti, exte); + ArrayRef<unsigned>(exti, exte)); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { // We're extracting from an intrinsic, see if we're the only user, which diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index b902213..3f2c412 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -561,25 +561,24 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( Edge += Successors; } + ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size()); GlobalVariable *EdgeTableGV = new GlobalVariable( *M, EdgeTableTy, true, GlobalValue::InternalLinkage, - ConstantArray::get(EdgeTableTy, - &EdgeTable[0], Succs.size() * Preds.size()), + ConstantArray::get(EdgeTableTy, V), "__llvm_gcda_edge_table"); EdgeTableGV->setUnnamedAddr(true); return EdgeTableGV; } Constant *GCOVProfiler::getStartFileFunc() { - const Type *Args[] = { Type::getInt8PtrTy(*Ctx) }; const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), - Args, false); + Type::getInt8PtrTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_start_file", FTy); } Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { - const Type *Args[] = { + Type *Args[] = { Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row }; @@ -589,7 +588,7 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { } Constant *GCOVProfiler::getEmitFunctionFunc() { - const Type *Args[2] = { + Type *Args[2] = { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name }; @@ -599,7 +598,7 @@ Constant *GCOVProfiler::getEmitFunctionFunc() { } Constant *GCOVProfiler::getEmitArcsFunc() { - const Type *Args[] = { + Type *Args[] = { Type::getInt32Ty(*Ctx), // uint32_t num_counters Type::getInt64PtrTy(*Ctx), // uint64_t *counters }; diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index 182a43d..7541663 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -376,7 +376,7 @@ namespace llvm { public: static const StructType *get(LLVMContext& C) { return( StructType::get( - C, TypeBuilder<types::i<32>, xcompile>::get(C), // type + TypeBuilder<types::i<32>, xcompile>::get(C), // type TypeBuilder<types::i<32>, xcompile>::get(C), // array size TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr NULL)); @@ -1062,7 +1062,7 @@ void PathProfiler::insertCounterIncrement(Value* incValue, CallInst::Create( increment ? llvmIncrementHashFunction : llvmDecrementHashFunction, - args.begin(), args.end(), "", insertPoint); + args, "", insertPoint); } } diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 7435bc3..445a5b6 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -62,8 +62,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, } Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements); - CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), - "newargc", InsertPos); + CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos); // If argc or argv are not available in main, just pass null values in. Function::arg_iterator AI; @@ -134,7 +133,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) { // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those // types. - const Type *GlobalDtorElems[2] = { + Type *GlobalDtorElems[2] = { Type::getInt32Ty(Mod->getContext()), FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo() }; @@ -164,7 +163,8 @@ void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) { GlobalVariable *GlobalDtors = new GlobalVariable( *Mod, ArrayType::get(GlobalDtorElemTy, 1), false, GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors"); - dtors.push_back(ConstantStruct::get(Mod->getContext(), Elem, 2, false)); + + dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem)); GlobalDtors->setInitializer(ConstantArray::get( cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors)); } diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index fcf914f..c223da6 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMScalarOpts LoopUnswitch.cpp LowerAtomic.cpp MemCpyOptimizer.cpp + ObjCARC.cpp Reassociate.cpp Reg2Mem.cpp SCCP.cpp diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 53e4640..cb9b5be 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -437,12 +437,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { MemDepResult InstDep = MD->getDependency(Inst); - // Ignore non-local store liveness. + // Ignore any store where we can't find a local dependence. // FIXME: cross-block DSE would be fun. :) - if (InstDep.isNonLocal() || - // Ignore self dependence, which happens in the entry block of the - // function. - InstDep.getInst() == Inst) + if (InstDep.isNonLocal() || InstDep.isUnknown()) continue; // If we're storing the same value back to a pointer that we just @@ -478,7 +475,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (Loc.Ptr == 0) continue; - while (!InstDep.isNonLocal()) { + while (!InstDep.isNonLocal() && !InstDep.isUnknown()) { // Get the memory clobbered by the instruction we depend on. MemDep will // skip any instructions that 'Loc' clearly doesn't interact with. If we // end up depending on a may- or must-aliased load, then we can't optimize @@ -542,24 +539,26 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { /// HandleFree - Handle frees of entire structures whose dependency is a store /// to a field of that structure. bool DSE::HandleFree(CallInst *F) { + bool MadeChange = false; + MemDepResult Dep = MD->getDependency(F); - do { - if (Dep.isNonLocal()) return false; - + + while (!Dep.isNonLocal() && !Dep.isUnknown()) { Instruction *Dependency = Dep.getInst(); if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) - return false; + return MadeChange; Value *DepPointer = GetUnderlyingObject(getStoredPointerOperand(Dependency)); // Check for aliasing. if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) - return false; + return MadeChange; // DCE instructions only used to calculate that store DeleteDeadInstruction(Dependency, *MD); ++NumFastStores; + MadeChange = true; // Inst's old Dependency is now deleted. Compute the next dependency, // which may also be dead, as in @@ -567,9 +566,9 @@ bool DSE::HandleFree(CallInst *F) { // s[1] = 0; // This has just been deleted. // free(s); Dep = MD->getDependency(F); - } while (!Dep.isNonLocal()); + }; - return true; + return MadeChange; } /// handleEndBlock - Remove dead stores to stack-allocated locations in the diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 2515fd1..87b7317 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -91,6 +91,7 @@ namespace { uint32_t nextValueNumber; Expression create_expression(Instruction* I); + Expression create_extractvalue_expression(ExtractValueInst* EI); uint32_t lookup_or_add_call(CallInst* C); public: ValueTable() : nextValueNumber(1) { } @@ -141,7 +142,6 @@ template <> struct DenseMapInfo<Expression> { // ValueTable Internal Functions //===----------------------------------------------------------------------===// - Expression ValueTable::create_expression(Instruction *I) { Expression e; e.type = I->getType(); @@ -150,12 +150,8 @@ Expression ValueTable::create_expression(Instruction *I) { OI != OE; ++OI) e.varargs.push_back(lookup_or_add(*OI)); - if (CmpInst *C = dyn_cast<CmpInst>(I)) + if (CmpInst *C = dyn_cast<CmpInst>(I)) { e.opcode = (C->getOpcode() << 8) | C->getPredicate(); - else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) { - for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); II != IE; ++II) @@ -165,6 +161,58 @@ Expression ValueTable::create_expression(Instruction *I) { return e; } +Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { + assert(EI != 0 && "Not an ExtractValueInst?"); + Expression e; + e.type = EI->getType(); + e.opcode = 0; + + IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); + if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { + // EI might be an extract from one of our recognised intrinsics. If it + // is we'll synthesize a semantically equivalent expression instead on + // an extract value expression. + switch (I->getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + e.opcode = Instruction::Add; + break; + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + e.opcode = Instruction::Sub; + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + e.opcode = Instruction::Mul; + break; + default: + break; + } + + if (e.opcode != 0) { + // Intrinsic recognized. Grab its args to finish building the expression. + assert(I->getNumArgOperands() == 2 && + "Expect two args for recognised intrinsics."); + e.varargs.push_back(lookup_or_add(I->getArgOperand(0))); + e.varargs.push_back(lookup_or_add(I->getArgOperand(1))); + return e; + } + } + + // Not a recognised intrinsic. Fall back to producing an extract value + // expression. + e.opcode = EI->getOpcode(); + for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); + OI != OE; ++OI) + e.varargs.push_back(lookup_or_add(*OI)); + + for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + + return e; +} + //===----------------------------------------------------------------------===// // ValueTable External Functions //===----------------------------------------------------------------------===// @@ -227,21 +275,19 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { // Non-local case. const MemoryDependenceAnalysis::NonLocalDepInfo &deps = MD->getNonLocalCallDependency(CallSite(C)); - // FIXME: call/call dependencies for readonly calls should return def, not - // clobber! Move the checking logic to MemDep! + // FIXME: Move the checking logic to MemDep! CallInst* cdep = 0; // Check to see if we have a single dominating call instruction that is // identical to C. for (unsigned i = 0, e = deps.size(); i != e; ++i) { const NonLocalDepEntry *I = &deps[i]; - // Ignore non-local dependencies. if (I->getResult().isNonLocal()) continue; - // We don't handle non-depedencies. If we already have a call, reject + // We don't handle non-definitions. If we already have a call, reject // instruction dependencies. - if (I->getResult().isClobber() || cdep != 0) { + if (!I->getResult().isDef() || cdep != 0) { cdep = 0; break; } @@ -338,11 +384,13 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::ExtractElement: case Instruction::InsertElement: case Instruction::ShuffleVector: - case Instruction::ExtractValue: case Instruction::InsertValue: case Instruction::GetElementPtr: exp = create_expression(I); break; + case Instruction::ExtractValue: + exp = create_extractvalue_expression(cast<ExtractValueInst>(I)); + break; default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; @@ -1192,8 +1240,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, // escaping uses to any values that are operands to these PHIs. for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { PHINode *P = NewPHIs[i]; - for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) - AA->addEscapingUse(P->getOperandUse(2*ii)); + for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) { + unsigned jj = PHINode::getOperandNumForIncomingValue(ii); + AA->addEscapingUse(P->getOperandUse(jj)); + } } } @@ -1224,12 +1274,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].getResult().isClobber() && - Deps[0].getResult().getInst()->getParent() == LI->getParent()) { + if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) { DEBUG( dbgs() << "GVN: non-local load "; WriteAsOperand(dbgs(), LI); - dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; + dbgs() << " has unknown dependencies\n"; ); return false; } @@ -1245,6 +1294,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); + if (DepInfo.isUnknown()) { + UnavailableBlocks.push_back(DepBB); + continue; + } + if (DepInfo.isClobber()) { // The address being loaded in this non-local block may not be the same as // the pointer operand of the load if PHI translation occurs. Make sure @@ -1305,6 +1359,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { continue; } + assert(DepInfo.isDef() && "Expecting def here"); + Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. @@ -1691,10 +1747,22 @@ bool GVN::processLoad(LoadInst *L) { return false; } + if (Dep.isUnknown()) { + DEBUG( + // fast print dep, using operator<< on instruction is too slow. + dbgs() << "GVN: load "; + WriteAsOperand(dbgs(), L); + dbgs() << " has unknown dependence\n"; + ); + return false; + } + // If it is defined in another block, try harder. if (Dep.isNonLocal()) return processNonLocalLoad(L); + assert(Dep.isDef() && "Expecting def here"); + Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { Value *StoredVal = DepSI->getValueOperand(); @@ -2133,8 +2201,11 @@ bool GVN::performPRE(Function &F) { // Because we have added a PHI-use of the pointer value, it has now // "escaped" from alias analysis' perspective. We need to inform // AA of this. - for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) - VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii)); + for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; + ++ii) { + unsigned jj = PHINode::getOperandNumForIncomingValue(ii); + VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); + } if (MD) MD->invalidateCachedPointerInfo(Phi); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 04ee7c8..dee3d38 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -52,30 +52,32 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; -STATISTIC(NumRemoved , "Number of aux indvars removed"); -STATISTIC(NumWidened , "Number of indvars widened"); -STATISTIC(NumInserted, "Number of canonical indvars added"); -STATISTIC(NumReplaced, "Number of exit values replaced"); -STATISTIC(NumLFTR , "Number of loop exit tests replaced"); -STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); -STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); -STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); - -// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis -// and referenced here. -namespace llvm { - extern bool DisableIVRewrite; -} +STATISTIC(NumRemoved , "Number of aux indvars removed"); +STATISTIC(NumWidened , "Number of indvars widened"); +STATISTIC(NumInserted , "Number of canonical indvars added"); +STATISTIC(NumReplaced , "Number of exit values replaced"); +STATISTIC(NumLFTR , "Number of loop exit tests replaced"); +STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); +STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); +STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); +STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); + +static cl::opt<bool> DisableIVRewrite( + "disable-iv-rewrite", cl::Hidden, + cl::desc("Disable canonical induction variable rewriting")); namespace { class IndVarSimplify : public LoopPass { @@ -84,12 +86,14 @@ namespace { ScalarEvolution *SE; DominatorTree *DT; TargetData *TD; + SmallVector<WeakVH, 16> DeadInsts; bool Changed; public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0) { + IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0), + Changed(false) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -101,36 +105,46 @@ namespace { AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - AU.addRequired<IVUsers>(); + if (!DisableIVRewrite) + AU.addRequired<IVUsers>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); - AU.addPreserved<IVUsers>(); + if (!DisableIVRewrite) + AU.addPreserved<IVUsers>(); AU.setPreservesCFG(); } private: + virtual void releaseMemory() { + DeadInsts.clear(); + } + bool isValidRewrite(Value *FromVal, Value *ToVal); + void HandleFloatingPointIV(Loop *L, PHINode *PH); + void RewriteNonIntegerIVs(Loop *L); + + void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); + void SimplifyIVUsers(SCEVExpander &Rewriter); + void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter); + + bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand); void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void EliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, - bool IsSigned, - PHINode *IVPhi); - void RewriteNonIntegerIVs(Loop *L); + bool IsSigned); + + void SimplifyCongruentIVs(Loop *L); + + void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter); - void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); - - void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); - void SinkUnusedInvariants(Loop *L); - - void HandleFloatingPointIV(Loop *L, PHINode *PH); }; } @@ -197,156 +211,262 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { return true; } -/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken -/// count expression can be safely and cheaply expanded into an instruction -/// sequence that can be used by LinearFunctionTestReplace. -static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || - BackedgeTakenCount->isZero()) - return false; +//===----------------------------------------------------------------------===// +// RewriteNonIntegerIVs and helpers. Prefer integer IVs. +//===----------------------------------------------------------------------===// - if (!L->getExitingBlock()) +/// ConvertToSInt - Convert APF to an integer, if possible. +static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { + bool isExact = false; + if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) return false; - - // Can't rewrite non-branch yet. - BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); - if (!BI) + // See if we can convert this to an int64_t + uint64_t UIntVal; + if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero, + &isExact) != APFloat::opOK || !isExact) return false; - - // Special case: If the backedge-taken count is a UDiv, it's very likely a - // UDiv that ScalarEvolution produced in order to compute a precise - // expression, rather than a UDiv from the user's code. If we can't find a - // UDiv in the code with some simple searching, assume the former and forego - // rewriting the loop. - if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { - ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!OrigCond) return false; - const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); - R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); - if (R != BackedgeTakenCount) { - const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); - L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); - if (L != BackedgeTakenCount) - return false; - } - } + IntVal = UIntVal; return true; } -/// getBackedgeIVType - Get the widest type used by the loop test after peeking -/// through Truncs. +/// HandleFloatingPointIV - If the loop has floating induction variable +/// then insert corresponding integer induction variable if possible. +/// For example, +/// for(double i = 0; i < 10000; ++i) +/// bar(i) +/// is converted into +/// for(int i = 0; i < 10000; ++i) +/// bar((double)i); /// -/// TODO: Unnecessary once LinearFunctionTestReplace is removed. -static const Type *getBackedgeIVType(Loop *L) { - if (!L->getExitingBlock()) - return 0; +void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { + unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); + unsigned BackEdge = IncomingEdge^1; - // Can't rewrite non-branch yet. - BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); - if (!BI) - return 0; + // Check incoming value. + ConstantFP *InitValueVal = + dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); - ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!Cond) - return 0; + int64_t InitValue; + if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) + return; - const Type *Ty = 0; - for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); - OI != OE; ++OI) { - assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); - TruncInst *Trunc = dyn_cast<TruncInst>(*OI); - if (!Trunc) - continue; + // Check IV increment. Reject this PN if increment operation is not + // an add or increment value can not be represented by an integer. + BinaryOperator *Incr = + dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); + if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; - return Trunc->getSrcTy(); + // If this is not an add of the PHI with a constantfp, or if the constant fp + // is not an integer, bail out. + ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); + int64_t IncValue; + if (IncValueVal == 0 || Incr->getOperand(0) != PN || + !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) + return; + + // Check Incr uses. One user is PN and the other user is an exit condition + // used by the conditional terminator. + Value::use_iterator IncrUse = Incr->use_begin(); + Instruction *U1 = cast<Instruction>(*IncrUse++); + if (IncrUse == Incr->use_end()) return; + Instruction *U2 = cast<Instruction>(*IncrUse++); + if (IncrUse != Incr->use_end()) return; + + // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't + // only used by a branch, we can't transform it. + FCmpInst *Compare = dyn_cast<FCmpInst>(U1); + if (!Compare) + Compare = dyn_cast<FCmpInst>(U2); + if (Compare == 0 || !Compare->hasOneUse() || + !isa<BranchInst>(Compare->use_back())) + return; + + BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); + + // We need to verify that the branch actually controls the iteration count + // of the loop. If not, the new IV can overflow and no one will notice. + // The branch block must be in the loop and one of the successors must be out + // of the loop. + assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); + if (!L->contains(TheBr->getParent()) || + (L->contains(TheBr->getSuccessor(0)) && + L->contains(TheBr->getSuccessor(1)))) + return; + + + // If it isn't a comparison with an integer-as-fp (the exit value), we can't + // transform it. + ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); + int64_t ExitValue; + if (ExitValueVal == 0 || + !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) + return; + + // Find new predicate for integer comparison. + CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; + switch (Compare->getPredicate()) { + default: return; // Unknown comparison. + case CmpInst::FCMP_OEQ: + case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; } - return Ty; -} -/// LinearFunctionTestReplace - This method rewrites the exit condition of the -/// loop to be a canonical != comparison against the incremented loop induction -/// variable. This pass is able to rewrite the exit tests of any loop where the -/// SCEV analysis can determine a loop-invariant trip count of the loop, which -/// is actually a much broader range than just linear tests. -ICmpInst *IndVarSimplify:: -LinearFunctionTestReplace(Loop *L, - const SCEV *BackedgeTakenCount, - PHINode *IndVar, - SCEVExpander &Rewriter) { - assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); - BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); + // We convert the floating point induction variable to a signed i32 value if + // we can. This is only safe if the comparison will not overflow in a way + // that won't be trapped by the integer equivalent operations. Check for this + // now. + // TODO: We could use i64 if it is native and the range requires it. - // If the exiting block is not the same as the backedge block, we must compare - // against the preincremented value, otherwise we prefer to compare against - // the post-incremented value. - Value *CmpIndVar; - const SCEV *RHS = BackedgeTakenCount; - if (L->getExitingBlock() == L->getLoopLatch()) { - // Add one to the "backedge-taken" count to get the trip count. - // If this addition may overflow, we have to be more pessimistic and - // cast the induction variable before doing the add. - const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); - const SCEV *N = - SE->getAddExpr(BackedgeTakenCount, - SE->getConstant(BackedgeTakenCount->getType(), 1)); - if ((isa<SCEVConstant>(N) && !N->isZero()) || - SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { - // No overflow. Cast the sum. - RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); - } else { - // Potential overflow. Cast before doing the add. - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); - RHS = SE->getAddExpr(RHS, - SE->getConstant(IndVar->getType(), 1)); + // The start/stride/exit values must all fit in signed i32. + if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) + return; + + // If not actually striding (add x, 0.0), avoid touching the code. + if (IncValue == 0) + return; + + // Positive and negative strides have different safety conditions. + if (IncValue > 0) { + // If we have a positive stride, we require the init to be less than the + // exit value and an equality or less than comparison. + if (InitValue >= ExitValue || + NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) + return; + + uint32_t Range = uint32_t(ExitValue-InitValue); + if (NewPred == CmpInst::ICMP_SLE) { + // Normalize SLE -> SLT, check for infinite loop. + if (++Range == 0) return; // Range overflows. } - // The BackedgeTaken expression contains the number of times that the - // backedge branches to the loop header. This is one less than the - // number of times the loop executes, so use the incremented indvar. - CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); + unsigned Leftover = Range % uint32_t(IncValue); + + // If this is an equality comparison, we require that the strided value + // exactly land on the exit value, otherwise the IV condition will wrap + // around and do things the fp IV wouldn't. + if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && + Leftover != 0) + return; + + // If the stride would wrap around the i32 before exiting, we can't + // transform the IV. + if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) + return; + } else { - // We have to use the preincremented value... - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); - CmpIndVar = IndVar; + // If we have a negative stride, we require the init to be greater than the + // exit value and an equality or greater than comparison. + if (InitValue >= ExitValue || + NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) + return; + + uint32_t Range = uint32_t(InitValue-ExitValue); + if (NewPred == CmpInst::ICMP_SGE) { + // Normalize SGE -> SGT, check for infinite loop. + if (++Range == 0) return; // Range overflows. + } + + unsigned Leftover = Range % uint32_t(-IncValue); + + // If this is an equality comparison, we require that the strided value + // exactly land on the exit value, otherwise the IV condition will wrap + // around and do things the fp IV wouldn't. + if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && + Leftover != 0) + return; + + // If the stride would wrap around the i32 before exiting, we can't + // transform the IV. + if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) + return; } - // Expand the code for the iteration count. - assert(SE->isLoopInvariant(RHS, L) && - "Computed iteration count is not loop invariant!"); - Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); + const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); - // Insert a new icmp_ne or icmp_eq instruction before the branch. - ICmpInst::Predicate Opcode; - if (L->contains(BI->getSuccessor(0))) - Opcode = ICmpInst::ICMP_NE; - else - Opcode = ICmpInst::ICMP_EQ; + // Insert new integer induction variable. + PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); + NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), + PN->getIncomingBlock(IncomingEdge)); - DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" - << " LHS:" << *CmpIndVar << '\n' - << " op:\t" - << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" - << " RHS:\t" << *RHS << "\n"); + Value *NewAdd = + BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), + Incr->getName()+".int", Incr); + NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); - ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); + ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, + ConstantInt::get(Int32Ty, ExitValue), + Compare->getName()); - Value *OrigCond = BI->getCondition(); - // It's tempting to use replaceAllUsesWith here to fully replace the old - // comparison, but that's not immediately safe, since users of the old - // comparison may not be dominated by the new comparison. Instead, just - // update the branch to use the new comparison; in the common case this - // will make old comparison dead. - BI->setCondition(Cond); - DeadInsts.push_back(OrigCond); + // In the following deletions, PN may become dead and may be deleted. + // Use a WeakVH to observe whether this happens. + WeakVH WeakPH = PN; - ++NumLFTR; - Changed = true; - return Cond; + // Delete the old floating point exit comparison. The branch starts using the + // new comparison. + NewCompare->takeName(Compare); + Compare->replaceAllUsesWith(NewCompare); + RecursivelyDeleteTriviallyDeadInstructions(Compare); + + // Delete the old floating point increment. + Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); + RecursivelyDeleteTriviallyDeadInstructions(Incr); + + // If the FP induction variable still has uses, this is because something else + // in the loop uses its value. In order to canonicalize the induction + // variable, we chose to eliminate the IV and rewrite it in terms of an + // int->fp cast. + // + // We give preference to sitofp over uitofp because it is faster on most + // platforms. + if (WeakPH) { + Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", + PN->getParent()->getFirstNonPHI()); + PN->replaceAllUsesWith(Conv); + RecursivelyDeleteTriviallyDeadInstructions(PN); + } + + // Add a new IVUsers entry for the newly-created integer PHI. + if (IU) + IU->AddUsersIfInteresting(NewPHI); } +void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { + // First step. Check to see if there are any floating-point recurrences. + // If there are, change them into integer recurrences, permitting analysis by + // the SCEV routines. + // + BasicBlock *Header = L->getHeader(); + + SmallVector<WeakVH, 8> PHIs; + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + PHIs.push_back(PN); + + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) + HandleFloatingPointIV(L, PN); + + // If the loop previously had floating-point IV, ScalarEvolution + // may not have been able to compute a trip count. Now that we've done some + // re-writing, the trip count may be computable. + if (Changed) + SE->forgetLoop(L); +} + +//===----------------------------------------------------------------------===// +// RewriteLoopExitValues - Optimize IV users outside the loop. +// As a side effect, reduces the amount of IV processing within the loop. +//===----------------------------------------------------------------------===// + /// RewriteLoopExitValues - Check to see if this loop has a computable /// loop-invariant execution count. If so, this means that we can compute the /// final value of any expressions that are recurrent in the loop, and @@ -460,29 +580,168 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { Rewriter.clearInsertPoint(); } -void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { - // First step. Check to see if there are any floating-point recurrences. - // If there are, change them into integer recurrences, permitting analysis by - // the SCEV routines. +//===----------------------------------------------------------------------===// +// Rewrite IV users based on a canonical IV. +// To be replaced by -disable-iv-rewrite. +//===----------------------------------------------------------------------===// + +/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this +/// loop. IVUsers is treated as a worklist. Each successive simplification may +/// push more users which may themselves be candidates for simplification. +/// +/// This is the old approach to IV simplification to be replaced by +/// SimplifyIVUsersNoRewrite. +/// +void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { + // Each round of simplification involves a round of eliminating operations + // followed by a round of widening IVs. A single IVUsers worklist is used + // across all rounds. The inner loop advances the user. If widening exposes + // more uses, then another pass through the outer loop is triggered. + for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) { + Instruction *UseInst = I->getUser(); + Value *IVOperand = I->getOperandValToReplace(); + + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + EliminateIVComparison(ICmp, IVOperand); + continue; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + EliminateIVRemainder(Rem, IVOperand, IsSigned); + continue; + } + } + } +} + +// FIXME: It is an extremely bad idea to indvar substitute anything more +// complex than affine induction variables. Doing so will put expensive +// polynomial evaluations inside of the loop, and the str reduction pass +// currently can only reduce affine polynomials. For now just disable +// indvar subst on anything more complex than an affine addrec, unless +// it can be expanded to a trivial value. +static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { + // Loop-invariant values are safe. + if (SE->isLoopInvariant(S, L)) return true; + + // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how + // to transform them into efficient code. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + return AR->isAffine(); + + // An add is safe it all its operands are safe. + if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { + for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), + E = Commutative->op_end(); I != E; ++I) + if (!isSafe(*I, L, SE)) return false; + return true; + } + + // A cast is safe if its operand is. + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) + return isSafe(C->getOperand(), L, SE); + + // A udiv is safe if its operands are. + if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) + return isSafe(UD->getLHS(), L, SE) && + isSafe(UD->getRHS(), L, SE); + + // SCEVUnknown is always safe. + if (isa<SCEVUnknown>(S)) + return true; + + // Nothing else is safe. + return false; +} + +void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { + // Rewrite all induction variable expressions in terms of the canonical + // induction variable. // - BasicBlock *Header = L->getHeader(); + // If there were induction variables of other sizes or offsets, manually + // add the offsets to the primary induction variable and cast, avoiding + // the need for the code evaluation methods to insert induction variables + // of different sizes. + for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { + Value *Op = UI->getOperandValToReplace(); + const Type *UseTy = Op->getType(); + Instruction *User = UI->getUser(); - SmallVector<WeakVH, 8> PHIs; - for (BasicBlock::iterator I = Header->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) - PHIs.push_back(PN); + // Compute the final addrec to expand into code. + const SCEV *AR = IU->getReplacementExpr(*UI); - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) - if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) - HandleFloatingPointIV(L, PN); + // Evaluate the expression out of the loop, if possible. + if (!L->contains(UI->getUser())) { + const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); + if (SE->isLoopInvariant(ExitVal, L)) + AR = ExitVal; + } - // If the loop previously had floating-point IV, ScalarEvolution - // may not have been able to compute a trip count. Now that we've done some - // re-writing, the trip count may be computable. - if (Changed) - SE->forgetLoop(L); + // FIXME: It is an extremely bad idea to indvar substitute anything more + // complex than affine induction variables. Doing so will put expensive + // polynomial evaluations inside of the loop, and the str reduction pass + // currently can only reduce affine polynomials. For now just disable + // indvar subst on anything more complex than an affine addrec, unless + // it can be expanded to a trivial value. + if (!isSafe(AR, L, SE)) + continue; + + // Determine the insertion point for this user. By default, insert + // immediately before the user. The SCEVExpander class will automatically + // hoist loop invariants out of the loop. For PHI nodes, there may be + // multiple uses, so compute the nearest common dominator for the + // incoming blocks. + Instruction *InsertPt = User; + if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (PHI->getIncomingValue(i) == Op) { + if (InsertPt == User) + InsertPt = PHI->getIncomingBlock(i)->getTerminator(); + else + InsertPt = + DT->findNearestCommonDominator(InsertPt->getParent(), + PHI->getIncomingBlock(i)) + ->getTerminator(); + } + + // Now expand it into actual Instructions and patch it into place. + Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); + + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); + + if (!isValidRewrite(Op, NewVal)) { + DeadInsts.push_back(NewVal); + continue; + } + // Inform ScalarEvolution that this value is changing. The change doesn't + // affect its value, but it does potentially affect which use lists the + // value will be on after the replacement, which affects ScalarEvolution's + // ability to walk use lists and drop dangling pointers when a value is + // deleted. + SE->forgetValue(User); + + // Patch the new value into place. + if (Op->hasName()) + NewVal->takeName(Op); + if (Instruction *NewValI = dyn_cast<Instruction>(NewVal)) + NewValI->setDebugLoc(User->getDebugLoc()); + User->replaceUsesOfWith(Op, NewVal); + UI->setOperandValToReplace(NewVal); + + ++NumRemoved; + Changed = true; + + // The old value may be dead now. + DeadInsts.push_back(Op); + } } +//===----------------------------------------------------------------------===// +// IV Widening - Extend the width of an IV to cover its widest uses. +//===----------------------------------------------------------------------===// + namespace { // Collect information about induction variables that are used by sign/zero // extend operations. This information is recorded by CollectExtend and @@ -493,33 +752,30 @@ namespace { WideIVInfo() : WidestNativeType(0), IsSigned(false) {} }; - typedef std::map<PHINode *, WideIVInfo> WideIVMap; } /// CollectExtend - Update information about the induction variable that is /// extended by this sign or zero extend operation. This is used to determine /// the final width of the IV before actually widening it. -static void CollectExtend(CastInst *Cast, PHINode *Phi, bool IsSigned, - WideIVMap &IVMap, ScalarEvolution *SE, - const TargetData *TD) { +static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI, + ScalarEvolution *SE, const TargetData *TD) { const Type *Ty = Cast->getType(); uint64_t Width = SE->getTypeSizeInBits(Ty); if (TD && !TD->isLegalInteger(Width)) return; - WideIVInfo &IVInfo = IVMap[Phi]; - if (!IVInfo.WidestNativeType) { - IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty); - IVInfo.IsSigned = IsSigned; + if (!WI.WidestNativeType) { + WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); + WI.IsSigned = IsSigned; return; } // We extend the IV to satisfy the sign of its first user, arbitrarily. - if (IVInfo.IsSigned != IsSigned) + if (WI.IsSigned != IsSigned) return; - if (Width > SE->getTypeSizeInBits(IVInfo.WidestNativeType)) - IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty); + if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) + WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); } namespace { @@ -529,43 +785,45 @@ namespace { /// inserting truncs whenever we stop propagating the type. /// class WidenIV { + // Parameters PHINode *OrigPhi; const Type *WideType; bool IsSigned; - IVUsers *IU; - LoopInfo *LI; - Loop *L; + // Context + LoopInfo *LI; + Loop *L; ScalarEvolution *SE; - DominatorTree *DT; - SmallVectorImpl<WeakVH> &DeadInsts; + DominatorTree *DT; + // Result PHINode *WidePhi; Instruction *WideInc; const SCEV *WideIncExpr; + SmallVectorImpl<WeakVH> &DeadInsts; - SmallPtrSet<Instruction*,16> Processed; + SmallPtrSet<Instruction*,16> Widened; + SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers; public: - WidenIV(PHINode *PN, const WideIVInfo &IVInfo, IVUsers *IUsers, - LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, + WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo, + ScalarEvolution *SEv, DominatorTree *DTree, SmallVectorImpl<WeakVH> &DI) : OrigPhi(PN), - WideType(IVInfo.WidestNativeType), - IsSigned(IVInfo.IsSigned), - IU(IUsers), + WideType(WI.WidestNativeType), + IsSigned(WI.IsSigned), LI(LInfo), L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), - DeadInsts(DI), WidePhi(0), WideInc(0), - WideIncExpr(0) { + WideIncExpr(0), + DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); } - bool CreateWideIV(SCEVExpander &Rewriter); + PHINode *CreateWideIV(SCEVExpander &Rewriter); protected: Instruction *CloneIVUser(Instruction *NarrowUse, @@ -574,58 +832,13 @@ protected: const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); - Instruction *WidenIVUse(Instruction *NarrowUse, - Instruction *NarrowDef, + Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, Instruction *WideDef); + + void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); }; } // anonymous namespace -/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this -/// loop. IVUsers is treated as a worklist. Each successive simplification may -/// push more users which may themselves be candidates for simplification. -/// -void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { - WideIVMap IVMap; - - // Each round of simplification involves a round of eliminating operations - // followed by a round of widening IVs. A single IVUsers worklist is used - // across all rounds. The inner loop advances the user. If widening exposes - // more uses, then another pass through the outer loop is triggered. - for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E;) { - for(; I != E; ++I) { - Instruction *UseInst = I->getUser(); - Value *IVOperand = I->getOperandValToReplace(); - - if (DisableIVRewrite) { - if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { - bool IsSigned = Cast->getOpcode() == Instruction::SExt; - if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { - CollectExtend(Cast, I->getPhi(), IsSigned, IVMap, SE, TD); - continue; - } - } - } - if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { - EliminateIVComparison(ICmp, IVOperand); - continue; - } - if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { - bool IsSigned = Rem->getOpcode() == Instruction::SRem; - if (IsSigned || Rem->getOpcode() == Instruction::URem) { - EliminateIVRemainder(Rem, IVOperand, IsSigned, I->getPhi()); - continue; - } - } - } - for (WideIVMap::const_iterator I = IVMap.begin(), E = IVMap.end(); - I != E; ++I) { - WidenIV Widener(I->first, I->second, IU, LI, SE, DT, DeadInsts); - if (Widener.CreateWideIV(Rewriter)) - Changed = true; - } - } -} - static Value *getExtend( Value *NarrowOper, const Type *WideType, bool IsSigned, IRBuilder<> &Builder) { return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : @@ -671,34 +884,16 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, LHS, RHS, NarrowBO->getName()); Builder.Insert(WideBO); - if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); - if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); - + if (const OverflowingBinaryOperator *OBO = + dyn_cast<OverflowingBinaryOperator>(NarrowBO)) { + if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); + if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); + } return WideBO; } llvm_unreachable(0); } -// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' -// perspective after widening it's type? In other words, can the extend be -// safely hoisted out of the loop with SCEV reducing the value to a recurrence -// on the same loop. If so, return the sign or zero extended -// recurrence. Otherwise return NULL. -const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { - if (!SE->isSCEVable(NarrowUse->getType())) - return 0; - - const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); - const SCEV *WideExpr = IsSigned ? - SE->getSignExtendExpr(NarrowExpr, WideType) : - SE->getZeroExtendExpr(NarrowExpr, WideType); - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); - if (!AddRec || AddRec->getLoop() != L) - return 0; - - return AddRec; -} - /// HoistStep - Attempt to hoist an IV increment above a potential use. /// /// To successfully hoist, two criteria must be met: @@ -733,18 +928,41 @@ static bool HoistStep(Instruction *IncV, Instruction *InsertPos, return true; } +// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' +// perspective after widening it's type? In other words, can the extend be +// safely hoisted out of the loop with SCEV reducing the value to a recurrence +// on the same loop. If so, return the sign or zero extended +// recurrence. Otherwise return NULL. +const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { + if (!SE->isSCEVable(NarrowUse->getType())) + return 0; + + const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); + if (SE->getTypeSizeInBits(NarrowExpr->getType()) + >= SE->getTypeSizeInBits(WideType)) { + // NarrowUse implicitly widens its operand. e.g. a gep with a narrow + // index. So don't follow this use. + return 0; + } + + const SCEV *WideExpr = IsSigned ? + SE->getSignExtendExpr(NarrowExpr, WideType) : + SE->getZeroExtendExpr(NarrowExpr, WideType); + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); + if (!AddRec || AddRec->getLoop() != L) + return 0; + + return AddRec; +} + /// WidenIVUse - Determine whether an individual user of the narrow IV can be /// widened. If so, return the wide clone of the user. -Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, - Instruction *NarrowDef, +Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, Instruction *WideDef) { - // To be consistent with IVUsers, stop traversing the def-use chain at - // inner-loop phis or post-loop phis. - if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) - return 0; + Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser()); - // Handle data flow merges and bizarre phi cycles. - if (!Processed.insert(NarrowUse)) + // Stop traversing the def-use chain at inner-loop phis or post-loop phis. + if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) return 0; // Our raison d'etre! Eliminate sign and zero extension. @@ -755,7 +973,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, unsigned IVWidth = SE->getTypeSizeInBits(WideType); if (CastWidth < IVWidth) { // The cast isn't as wide as the IV, so insert a Trunc. - IRBuilder<> Builder(NarrowUse); + IRBuilder<> Builder(NarrowDefUse); NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType()); } else { @@ -775,23 +993,32 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, NarrowUse->replaceAllUsesWith(NewDef); DeadInsts.push_back(NarrowUse); } - // Now that the extend is gone, expose it's uses to IVUsers for potential - // further simplification within SimplifyIVUsers. - IU->AddUsersIfInteresting(WideDef, WidePhi); + // Now that the extend is gone, we want to expose it's uses for potential + // further simplification. We don't need to directly inform SimplifyIVUsers + // of the new users, because their parent IV will be processed later as a + // new loop phi. If we preserved IVUsers analysis, we would also want to + // push the uses of WideDef here. // No further widening is needed. The deceased [sz]ext had done it for us. return 0; } + + // Does this user itself evaluate to a recurrence after widening? const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse); if (!WideAddRec) { // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - IRBuilder<> Builder(NarrowUse); + IRBuilder<> Builder(NarrowDefUse); Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType()); NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); return 0; } + // We assume that block terminators are not SCEVable. We wouldn't want to + // insert a Trunc after a terminator if there happens to be a critical edge. + assert(NarrowUse != NarrowUse->getParent()->getTerminator() && + "SCEV is not expected to evaluate a block terminator"); + // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. Instruction *WideUse = 0; @@ -803,11 +1030,11 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, if (!WideUse) return 0; } - // GetWideRecurrence ensured that the narrow expression could be extended - // outside the loop without overflow. This suggests that the wide use + // Evaluation of WideAddRec ensured that the narrow expression could be + // extended outside the loop without overflow. This suggests that the wide use // evaluates to the same expression as the extended narrow use, but doesn't // absolutely guarantee it. Hence the following failsafe check. In rare cases - // where it fails, we simple throw away the newly created wide use. + // where it fails, we simply throw away the newly created wide use. if (WideAddRec != SE->getSCEV(WideUse)) { DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); @@ -819,21 +1046,36 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, return WideUse; } +/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers. +/// +void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { + for (Value::use_iterator UI = NarrowDef->use_begin(), + UE = NarrowDef->use_end(); UI != UE; ++UI) { + Use &U = UI.getUse(); + + // Handle data flow merges and bizarre phi cycles. + if (!Widened.insert(cast<Instruction>(U.getUser()))) + continue; + + NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef)); + } +} + /// CreateWideIV - Process a single induction variable. First use the /// SCEVExpander to create a wide induction variable that evaluates to the same /// recurrence as the original narrow IV. Then use a worklist to forward -/// traverse the narrow IV's def-use chain. After WidenIVUse as processed all +/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all /// interesting IV users, the narrow IV will be isolated for removal by /// DeleteDeadPHIs. /// /// It would be simpler to delete uses as they are processed, but we must avoid /// invalidating SCEV expressions. /// -bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) { +PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Is this phi an induction variable? const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); if (!AddRec) - return false; + return NULL; // Widen the induction variable expression. const SCEV *WideIVExpr = IsSigned ? @@ -846,9 +1088,9 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Can the IV be extended outside the loop without overflow? AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); if (!AddRec || AddRec->getLoop() != L) - return false; + return NULL; - // An AddRec must have loop-invariant operands. Since this AddRec it + // An AddRec must have loop-invariant operands. Since this AddRec is // materialized by a loop header phi, the expression cannot have any post-loop // operands, so they must dominate the loop header. assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) && @@ -876,39 +1118,37 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) { ++NumWidened; // Traverse the def-use chain using a worklist starting at the original IV. - assert(Processed.empty() && "expect initial state" ); + assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); + + Widened.insert(OrigPhi); + pushNarrowIVUsers(OrigPhi, WidePhi); - // Each worklist entry has a Narrow def-use link and Wide def. - SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers; - for (Value::use_iterator UI = OrigPhi->use_begin(), - UE = OrigPhi->use_end(); UI != UE; ++UI) { - NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi)); - } while (!NarrowIVUsers.empty()) { - Use *NarrowDefUse; + Use *UsePtr; Instruction *WideDef; - tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val(); + tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val(); + Use &NarrowDefUse = *UsePtr; // Process a def-use edge. This may replace the use, so don't hold a // use_iterator across it. - Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get()); - Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser()); - Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef); + Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get()); + Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef); // Follow all def-use edges from the previous narrow use. - if (WideUse) { - for (Value::use_iterator UI = NarrowUse->use_begin(), - UE = NarrowUse->use_end(); UI != UE; ++UI) { - NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse)); - } - } + if (WideUse) + pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse); + // WidenIVUse may have removed the def-use edge. if (NarrowDef->use_empty()) DeadInsts.push_back(NarrowDef); } - return true; + return WidePhi; } +//===----------------------------------------------------------------------===// +// Simplification of IV users based on SCEV evaluation. +//===----------------------------------------------------------------------===// + void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { unsigned IVOperIdx = 0; ICmpInst::Predicate Pred = ICmp->getPredicate(); @@ -945,8 +1185,7 @@ void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, - bool IsSigned, - PHINode *IVPhi) { + bool IsSigned) { // We're only interested in the case where we know something about // the numerator. if (IVOperand != Rem->getOperand(0)) @@ -989,15 +1228,465 @@ void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem, } // Inform IVUsers about the new users. - if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) - IU->AddUsersIfInteresting(I, IVPhi); - + if (IU) { + if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) + IU->AddUsersIfInteresting(I); + } DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); ++NumElimRem; Changed = true; DeadInsts.push_back(Rem); } +/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has +/// no observable side-effect given the range of IV values. +bool IndVarSimplify::EliminateIVUser(Instruction *UseInst, + Instruction *IVOperand) { + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + EliminateIVComparison(ICmp, IVOperand); + return true; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + EliminateIVRemainder(Rem, IVOperand, IsSigned); + return true; + } + } + + // Eliminate any operation that SCEV can prove is an identity function. + if (!SE->isSCEVable(UseInst->getType()) || + (UseInst->getType() != IVOperand->getType()) || + (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) + return false; + + DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); + + UseInst->replaceAllUsesWith(IVOperand); + ++NumElimIdentity; + Changed = true; + DeadInsts.push_back(UseInst); + return true; +} + +/// pushIVUsers - Add all uses of Def to the current IV's worklist. +/// +static void pushIVUsers( + Instruction *Def, + SmallPtrSet<Instruction*,16> &Simplified, + SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { + + for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + // Avoid infinite or exponential worklist processing. + // Also ensure unique worklist users. + // If Def is a LoopPhi, it may not be in the Simplified set, so check for + // self edges first. + if (User != Def && Simplified.insert(User)) + SimpleIVUsers.push_back(std::make_pair(User, Def)); + } +} + +/// isSimpleIVUser - Return true if this instruction generates a simple SCEV +/// expression in terms of that IV. +/// +/// This is similar to IVUsers' isInsteresting() but processes each instruction +/// non-recursively when the operand is already known to be a simpleIVUser. +/// +static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { + if (!SE->isSCEVable(I->getType())) + return false; + + // Get the symbolic expression for this instruction. + const SCEV *S = SE->getSCEV(I); + + // We assume that terminators are not SCEVable. + assert((!S || I != I->getParent()->getTerminator()) && + "can't fold terminators"); + + // Only consider affine recurrences. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); + if (AR && AR->getLoop() == L) + return true; + + return false; +} + +/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist +/// of IV users. Each successive simplification may push more users which may +/// themselves be candidates for simplification. +/// +/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it +/// simplifies instructions in-place during analysis. Rather than rewriting +/// induction variables bottom-up from their users, it transforms a chain of +/// IVUsers top-down, updating the IR only when it encouters a clear +/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still +/// needed, but only used to generate a new IV (phi) of wider type for sign/zero +/// extend elimination. +/// +/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. +/// +void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { + std::map<PHINode *, WideIVInfo> WideIVMap; + + SmallVector<PHINode*, 8> LoopPhis; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + LoopPhis.push_back(cast<PHINode>(I)); + } + // Each round of simplification iterates through the SimplifyIVUsers worklist + // for all current phis, then determines whether any IVs can be + // widened. Widening adds new phis to LoopPhis, inducing another round of + // simplification on the wide IVs. + while (!LoopPhis.empty()) { + // Evaluate as many IV expressions as possible before widening any IVs. This + // forces SCEV to set no-wrap flags before evaluating sign/zero + // extension. The first time SCEV attempts to normalize sign/zero extension, + // the result becomes final. So for the most predictable results, we delay + // evaluation of sign/zero extend evaluation until needed, and avoid running + // other SCEV based analysis prior to SimplifyIVUsersNoRewrite. + do { + PHINode *CurrIV = LoopPhis.pop_back_val(); + + // Information about sign/zero extensions of CurrIV. + WideIVInfo WI; + + // Instructions processed by SimplifyIVUsers for CurrIV. + SmallPtrSet<Instruction*,16> Simplified; + + // Use-def pairs if IV users waiting to be processed for CurrIV. + SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; + + // Push users of the current LoopPhi. In rare cases, pushIVUsers may be + // called multiple times for the same LoopPhi. This is the proper thing to + // do for loop header phis that use each other. + pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + + while (!SimpleIVUsers.empty()) { + Instruction *UseInst, *Operand; + tie(UseInst, Operand) = SimpleIVUsers.pop_back_val(); + // Bypass back edges to avoid extra work. + if (UseInst == CurrIV) continue; + + if (EliminateIVUser(UseInst, Operand)) { + pushIVUsers(Operand, Simplified, SimpleIVUsers); + continue; + } + if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { + bool IsSigned = Cast->getOpcode() == Instruction::SExt; + if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { + CollectExtend(Cast, IsSigned, WI, SE, TD); + } + continue; + } + if (isSimpleIVUser(UseInst, L, SE)) { + pushIVUsers(UseInst, Simplified, SimpleIVUsers); + } + } + if (WI.WidestNativeType) { + WideIVMap[CurrIV] = WI; + } + } while(!LoopPhis.empty()); + + for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(), + E = WideIVMap.end(); I != E; ++I) { + WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts); + if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) { + Changed = true; + LoopPhis.push_back(WidePhi); + } + } + WideIVMap.clear(); + } +} + +/// SimplifyCongruentIVs - Check for congruent phis in this loop header and +/// populate ExprToIVMap for use later. +/// +void IndVarSimplify::SimplifyCongruentIVs(Loop *L) { + DenseMap<const SCEV *, PHINode *> ExprToIVMap; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + PHINode *Phi = cast<PHINode>(I); + if (!SE->isSCEVable(Phi->getType())) + continue; + + const SCEV *S = SE->getSCEV(Phi); + DenseMap<const SCEV *, PHINode *>::const_iterator Pos; + bool Inserted; + tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi)); + if (Inserted) + continue; + PHINode *OrigPhi = Pos->second; + // Replacing the congruent phi is sufficient because acyclic redundancy + // elimination, CSE/GVN, should handle the rest. However, once SCEV proves + // that a phi is congruent, it's almost certain to be the head of an IV + // user cycle that is isomorphic with the original phi. So it's worth + // eagerly cleaning up the common case of a single IV increment. + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *OrigInc = + cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); + Instruction *IsomorphicInc = + cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); + if (OrigInc != IsomorphicInc && + SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) && + HoistStep(OrigInc, IsomorphicInc, DT)) { + DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: " + << *IsomorphicInc << '\n'); + IsomorphicInc->replaceAllUsesWith(OrigInc); + DeadInsts.push_back(IsomorphicInc); + } + } + DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); + ++NumElimIV; + Phi->replaceAllUsesWith(OrigPhi); + DeadInsts.push_back(Phi); + } +} + +//===----------------------------------------------------------------------===// +// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. +//===----------------------------------------------------------------------===// + +/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken +/// count expression can be safely and cheaply expanded into an instruction +/// sequence that can be used by LinearFunctionTestReplace. +static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || + BackedgeTakenCount->isZero()) + return false; + + if (!L->getExitingBlock()) + return false; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return false; + + // Special case: If the backedge-taken count is a UDiv, it's very likely a + // UDiv that ScalarEvolution produced in order to compute a precise + // expression, rather than a UDiv from the user's code. If we can't find a + // UDiv in the code with some simple searching, assume the former and forego + // rewriting the loop. + if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { + ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!OrigCond) return false; + const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); + R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); + if (R != BackedgeTakenCount) { + const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); + L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); + if (L != BackedgeTakenCount) + return false; + } + } + return true; +} + +/// getBackedgeIVType - Get the widest type used by the loop test after peeking +/// through Truncs. +/// +/// TODO: Unnecessary if LFTR does not force a canonical IV. +static const Type *getBackedgeIVType(Loop *L) { + if (!L->getExitingBlock()) + return 0; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return 0; + + ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cond) + return 0; + + const Type *Ty = 0; + for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); + OI != OE; ++OI) { + assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); + TruncInst *Trunc = dyn_cast<TruncInst>(*OI); + if (!Trunc) + continue; + + return Trunc->getSrcTy(); + } + return Ty; +} + +/// LinearFunctionTestReplace - This method rewrites the exit condition of the +/// loop to be a canonical != comparison against the incremented loop induction +/// variable. This pass is able to rewrite the exit tests of any loop where the +/// SCEV analysis can determine a loop-invariant trip count of the loop, which +/// is actually a much broader range than just linear tests. +ICmpInst *IndVarSimplify:: +LinearFunctionTestReplace(Loop *L, + const SCEV *BackedgeTakenCount, + PHINode *IndVar, + SCEVExpander &Rewriter) { + assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); + BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); + + // If the exiting block is not the same as the backedge block, we must compare + // against the preincremented value, otherwise we prefer to compare against + // the post-incremented value. + Value *CmpIndVar; + const SCEV *RHS = BackedgeTakenCount; + if (L->getExitingBlock() == L->getLoopLatch()) { + // Add one to the "backedge-taken" count to get the trip count. + // If this addition may overflow, we have to be more pessimistic and + // cast the induction variable before doing the add. + const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); + const SCEV *N = + SE->getAddExpr(BackedgeTakenCount, + SE->getConstant(BackedgeTakenCount->getType(), 1)); + if ((isa<SCEVConstant>(N) && !N->isZero()) || + SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { + // No overflow. Cast the sum. + RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); + } else { + // Potential overflow. Cast before doing the add. + RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, + IndVar->getType()); + RHS = SE->getAddExpr(RHS, + SE->getConstant(IndVar->getType(), 1)); + } + + // The BackedgeTaken expression contains the number of times that the + // backedge branches to the loop header. This is one less than the + // number of times the loop executes, so use the incremented indvar. + CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); + } else { + // We have to use the preincremented value... + RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, + IndVar->getType()); + CmpIndVar = IndVar; + } + + // Expand the code for the iteration count. + assert(SE->isLoopInvariant(RHS, L) && + "Computed iteration count is not loop invariant!"); + Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); + + // Insert a new icmp_ne or icmp_eq instruction before the branch. + ICmpInst::Predicate Opcode; + if (L->contains(BI->getSuccessor(0))) + Opcode = ICmpInst::ICMP_NE; + else + Opcode = ICmpInst::ICMP_EQ; + + DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" + << " LHS:" << *CmpIndVar << '\n' + << " op:\t" + << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" + << " RHS:\t" << *RHS << "\n"); + + ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); + Cond->setDebugLoc(BI->getDebugLoc()); + Value *OrigCond = BI->getCondition(); + // It's tempting to use replaceAllUsesWith here to fully replace the old + // comparison, but that's not immediately safe, since users of the old + // comparison may not be dominated by the new comparison. Instead, just + // update the branch to use the new comparison; in the common case this + // will make old comparison dead. + BI->setCondition(Cond); + DeadInsts.push_back(OrigCond); + + ++NumLFTR; + Changed = true; + return Cond; +} + +//===----------------------------------------------------------------------===// +// SinkUnusedInvariants. A late subpass to cleanup loop preheaders. +//===----------------------------------------------------------------------===// + +/// If there's a single exit block, sink any loop-invariant values that +/// were defined in the preheader but not used inside the loop into the +/// exit block to reduce register pressure in the loop. +void IndVarSimplify::SinkUnusedInvariants(Loop *L) { + BasicBlock *ExitBlock = L->getExitBlock(); + if (!ExitBlock) return; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) return; + + Instruction *InsertPt = ExitBlock->getFirstNonPHI(); + BasicBlock::iterator I = Preheader->getTerminator(); + while (I != Preheader->begin()) { + --I; + // New instructions were inserted at the end of the preheader. + if (isa<PHINode>(I)) + break; + + // Don't move instructions which might have side effects, since the side + // effects need to complete before instructions inside the loop. Also don't + // move instructions which might read memory, since the loop may modify + // memory. Note that it's okay if the instruction might have undefined + // behavior: LoopSimplify guarantees that the preheader dominates the exit + // block. + if (I->mayHaveSideEffects() || I->mayReadFromMemory()) + continue; + + // Skip debug info intrinsics. + if (isa<DbgInfoIntrinsic>(I)) + continue; + + // Don't sink static AllocaInsts out of the entry block, which would + // turn them into dynamic allocas! + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (AI->isStaticAlloca()) + continue; + + // Determine if there is a use in or before the loop (direct or + // otherwise). + bool UsedInLoop = false; + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + User *U = *UI; + BasicBlock *UseBB = cast<Instruction>(U)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(U)) { + unsigned i = + PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); + UseBB = P->getIncomingBlock(i); + } + if (UseBB == Preheader || L->contains(UseBB)) { + UsedInLoop = true; + break; + } + } + + // If there is, the def must remain in the preheader. + if (UsedInLoop) + continue; + + // Otherwise, sink it to the exit block. + Instruction *ToMove = I; + bool Done = false; + + if (I != Preheader->begin()) { + // Skip debug info intrinsics. + do { + --I; + } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); + + if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) + Done = true; + } else { + Done = true; + } + + ToMove->moveBefore(InsertPt); + if (Done) break; + InsertPt = ToMove; + } +} + +//===----------------------------------------------------------------------===// +// IndVarSimplify driver. Manage several subpasses of IV simplification. +//===----------------------------------------------------------------------===// + bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If LoopSimplify form is not available, stay out of trouble. Some notes: // - LSR currently only supports LoopSimplify-form loops. Indvars' @@ -1010,7 +1699,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (!L->isLoopSimplifyForm()) return false; - IU = &getAnalysis<IVUsers>(); + if (!DisableIVRewrite) + IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); @@ -1026,9 +1716,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. - SCEVExpander Rewriter(*SE); - if (DisableIVRewrite) + SCEVExpander Rewriter(*SE, "indvars"); + + // Eliminate redundant IV users. + // + // Simplification works best when run before other consumers of SCEV. We + // attempt to avoid evaluating SCEVs for sign/zero extend operations until + // other expressions involving loop IVs have been evaluated. This helps SCEV + // set no-wrap flags before normalizing sign/zero extension. + if (DisableIVRewrite) { Rewriter.disableCanonicalMode(); + SimplifyIVUsersNoRewrite(L, Rewriter); + } // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions @@ -1040,7 +1739,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { RewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV users. - SimplifyIVUsers(Rewriter); + if (!DisableIVRewrite) + SimplifyIVUsers(Rewriter); + + // Eliminate redundant IV cycles. + if (DisableIVRewrite) + SimplifyCongruentIVs(L); // Compute the type of the largest recurrence expression, and decide whether // a canonical induction variable should be inserted. @@ -1119,8 +1823,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { "canonical IV disrupted BackedgeTaken expansion"); assert(NeedCannIV && "LinearFunctionTestReplace requires a canonical induction variable"); - NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, - Rewriter); + // Check preconditions for proper SCEVExpander operation. SCEV does not + // express SCEVExpander's dependencies, such as LoopSimplify. Instead any + // pass that uses the SCEVExpander must do it. This does not work well for + // loop passes because SCEVExpander makes assumptions about all loops, while + // LoopPassManager only forces the current loop to be simplified. + // + // FIXME: SCEV expansion has no way to bail out, so the caller must + // explicitly check any assumptions made by SCEV. Brittle. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount); + if (!AR || AR->getLoop()->getLoopPreheader()) + NewICmp = + LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter); } // Rewrite IV-derived expressions. if (!DisableIVRewrite) @@ -1146,9 +1860,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // For completeness, inform IVUsers of the IV use in the newly-created // loop exit test instruction. - if (NewICmp) - IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)), - IndVar); + if (NewICmp && IU) + IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0))); // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader()); @@ -1156,428 +1869,3 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); return Changed; } - -// FIXME: It is an extremely bad idea to indvar substitute anything more -// complex than affine induction variables. Doing so will put expensive -// polynomial evaluations inside of the loop, and the str reduction pass -// currently can only reduce affine polynomials. For now just disable -// indvar subst on anything more complex than an affine addrec, unless -// it can be expanded to a trivial value. -static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { - // Loop-invariant values are safe. - if (SE->isLoopInvariant(S, L)) return true; - - // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how - // to transform them into efficient code. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) - return AR->isAffine(); - - // An add is safe it all its operands are safe. - if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { - for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), - E = Commutative->op_end(); I != E; ++I) - if (!isSafe(*I, L, SE)) return false; - return true; - } - - // A cast is safe if its operand is. - if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) - return isSafe(C->getOperand(), L, SE); - - // A udiv is safe if its operands are. - if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) - return isSafe(UD->getLHS(), L, SE) && - isSafe(UD->getRHS(), L, SE); - - // SCEVUnknown is always safe. - if (isa<SCEVUnknown>(S)) - return true; - - // Nothing else is safe. - return false; -} - -void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { - // Rewrite all induction variable expressions in terms of the canonical - // induction variable. - // - // If there were induction variables of other sizes or offsets, manually - // add the offsets to the primary induction variable and cast, avoiding - // the need for the code evaluation methods to insert induction variables - // of different sizes. - for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { - Value *Op = UI->getOperandValToReplace(); - const Type *UseTy = Op->getType(); - Instruction *User = UI->getUser(); - - // Compute the final addrec to expand into code. - const SCEV *AR = IU->getReplacementExpr(*UI); - - // Evaluate the expression out of the loop, if possible. - if (!L->contains(UI->getUser())) { - const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); - if (SE->isLoopInvariant(ExitVal, L)) - AR = ExitVal; - } - - // FIXME: It is an extremely bad idea to indvar substitute anything more - // complex than affine induction variables. Doing so will put expensive - // polynomial evaluations inside of the loop, and the str reduction pass - // currently can only reduce affine polynomials. For now just disable - // indvar subst on anything more complex than an affine addrec, unless - // it can be expanded to a trivial value. - if (!isSafe(AR, L, SE)) - continue; - - // Determine the insertion point for this user. By default, insert - // immediately before the user. The SCEVExpander class will automatically - // hoist loop invariants out of the loop. For PHI nodes, there may be - // multiple uses, so compute the nearest common dominator for the - // incoming blocks. - Instruction *InsertPt = User; - if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (PHI->getIncomingValue(i) == Op) { - if (InsertPt == User) - InsertPt = PHI->getIncomingBlock(i)->getTerminator(); - else - InsertPt = - DT->findNearestCommonDominator(InsertPt->getParent(), - PHI->getIncomingBlock(i)) - ->getTerminator(); - } - - // Now expand it into actual Instructions and patch it into place. - Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); - - DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' - << " into = " << *NewVal << "\n"); - - if (!isValidRewrite(Op, NewVal)) { - DeadInsts.push_back(NewVal); - continue; - } - // Inform ScalarEvolution that this value is changing. The change doesn't - // affect its value, but it does potentially affect which use lists the - // value will be on after the replacement, which affects ScalarEvolution's - // ability to walk use lists and drop dangling pointers when a value is - // deleted. - SE->forgetValue(User); - - // Patch the new value into place. - if (Op->hasName()) - NewVal->takeName(Op); - User->replaceUsesOfWith(Op, NewVal); - UI->setOperandValToReplace(NewVal); - - ++NumRemoved; - Changed = true; - - // The old value may be dead now. - DeadInsts.push_back(Op); - } -} - -/// If there's a single exit block, sink any loop-invariant values that -/// were defined in the preheader but not used inside the loop into the -/// exit block to reduce register pressure in the loop. -void IndVarSimplify::SinkUnusedInvariants(Loop *L) { - BasicBlock *ExitBlock = L->getExitBlock(); - if (!ExitBlock) return; - - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) return; - - Instruction *InsertPt = ExitBlock->getFirstNonPHI(); - BasicBlock::iterator I = Preheader->getTerminator(); - while (I != Preheader->begin()) { - --I; - // New instructions were inserted at the end of the preheader. - if (isa<PHINode>(I)) - break; - - // Don't move instructions which might have side effects, since the side - // effects need to complete before instructions inside the loop. Also don't - // move instructions which might read memory, since the loop may modify - // memory. Note that it's okay if the instruction might have undefined - // behavior: LoopSimplify guarantees that the preheader dominates the exit - // block. - if (I->mayHaveSideEffects() || I->mayReadFromMemory()) - continue; - - // Skip debug info intrinsics. - if (isa<DbgInfoIntrinsic>(I)) - continue; - - // Don't sink static AllocaInsts out of the entry block, which would - // turn them into dynamic allocas! - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) - if (AI->isStaticAlloca()) - continue; - - // Determine if there is a use in or before the loop (direct or - // otherwise). - bool UsedInLoop = false; - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) { - User *U = *UI; - BasicBlock *UseBB = cast<Instruction>(U)->getParent(); - if (PHINode *P = dyn_cast<PHINode>(U)) { - unsigned i = - PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); - UseBB = P->getIncomingBlock(i); - } - if (UseBB == Preheader || L->contains(UseBB)) { - UsedInLoop = true; - break; - } - } - - // If there is, the def must remain in the preheader. - if (UsedInLoop) - continue; - - // Otherwise, sink it to the exit block. - Instruction *ToMove = I; - bool Done = false; - - if (I != Preheader->begin()) { - // Skip debug info intrinsics. - do { - --I; - } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); - - if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) - Done = true; - } else { - Done = true; - } - - ToMove->moveBefore(InsertPt); - if (Done) break; - InsertPt = ToMove; - } -} - -/// ConvertToSInt - Convert APF to an integer, if possible. -static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { - bool isExact = false; - if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) - return false; - // See if we can convert this to an int64_t - uint64_t UIntVal; - if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero, - &isExact) != APFloat::opOK || !isExact) - return false; - IntVal = UIntVal; - return true; -} - -/// HandleFloatingPointIV - If the loop has floating induction variable -/// then insert corresponding integer induction variable if possible. -/// For example, -/// for(double i = 0; i < 10000; ++i) -/// bar(i) -/// is converted into -/// for(int i = 0; i < 10000; ++i) -/// bar((double)i); -/// -void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { - unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); - unsigned BackEdge = IncomingEdge^1; - - // Check incoming value. - ConstantFP *InitValueVal = - dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); - - int64_t InitValue; - if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) - return; - - // Check IV increment. Reject this PN if increment operation is not - // an add or increment value can not be represented by an integer. - BinaryOperator *Incr = - dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); - if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; - - // If this is not an add of the PHI with a constantfp, or if the constant fp - // is not an integer, bail out. - ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); - int64_t IncValue; - if (IncValueVal == 0 || Incr->getOperand(0) != PN || - !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) - return; - - // Check Incr uses. One user is PN and the other user is an exit condition - // used by the conditional terminator. - Value::use_iterator IncrUse = Incr->use_begin(); - Instruction *U1 = cast<Instruction>(*IncrUse++); - if (IncrUse == Incr->use_end()) return; - Instruction *U2 = cast<Instruction>(*IncrUse++); - if (IncrUse != Incr->use_end()) return; - - // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't - // only used by a branch, we can't transform it. - FCmpInst *Compare = dyn_cast<FCmpInst>(U1); - if (!Compare) - Compare = dyn_cast<FCmpInst>(U2); - if (Compare == 0 || !Compare->hasOneUse() || - !isa<BranchInst>(Compare->use_back())) - return; - - BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); - - // We need to verify that the branch actually controls the iteration count - // of the loop. If not, the new IV can overflow and no one will notice. - // The branch block must be in the loop and one of the successors must be out - // of the loop. - assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); - if (!L->contains(TheBr->getParent()) || - (L->contains(TheBr->getSuccessor(0)) && - L->contains(TheBr->getSuccessor(1)))) - return; - - - // If it isn't a comparison with an integer-as-fp (the exit value), we can't - // transform it. - ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); - int64_t ExitValue; - if (ExitValueVal == 0 || - !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) - return; - - // Find new predicate for integer comparison. - CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; - switch (Compare->getPredicate()) { - default: return; // Unknown comparison. - case CmpInst::FCMP_OEQ: - case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; - case CmpInst::FCMP_ONE: - case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; - case CmpInst::FCMP_OGT: - case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; - case CmpInst::FCMP_OGE: - case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; - case CmpInst::FCMP_OLT: - case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; - case CmpInst::FCMP_OLE: - case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; - } - - // We convert the floating point induction variable to a signed i32 value if - // we can. This is only safe if the comparison will not overflow in a way - // that won't be trapped by the integer equivalent operations. Check for this - // now. - // TODO: We could use i64 if it is native and the range requires it. - - // The start/stride/exit values must all fit in signed i32. - if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) - return; - - // If not actually striding (add x, 0.0), avoid touching the code. - if (IncValue == 0) - return; - - // Positive and negative strides have different safety conditions. - if (IncValue > 0) { - // If we have a positive stride, we require the init to be less than the - // exit value and an equality or less than comparison. - if (InitValue >= ExitValue || - NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) - return; - - uint32_t Range = uint32_t(ExitValue-InitValue); - if (NewPred == CmpInst::ICMP_SLE) { - // Normalize SLE -> SLT, check for infinite loop. - if (++Range == 0) return; // Range overflows. - } - - unsigned Leftover = Range % uint32_t(IncValue); - - // If this is an equality comparison, we require that the strided value - // exactly land on the exit value, otherwise the IV condition will wrap - // around and do things the fp IV wouldn't. - if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && - Leftover != 0) - return; - - // If the stride would wrap around the i32 before exiting, we can't - // transform the IV. - if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) - return; - - } else { - // If we have a negative stride, we require the init to be greater than the - // exit value and an equality or greater than comparison. - if (InitValue >= ExitValue || - NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) - return; - - uint32_t Range = uint32_t(InitValue-ExitValue); - if (NewPred == CmpInst::ICMP_SGE) { - // Normalize SGE -> SGT, check for infinite loop. - if (++Range == 0) return; // Range overflows. - } - - unsigned Leftover = Range % uint32_t(-IncValue); - - // If this is an equality comparison, we require that the strided value - // exactly land on the exit value, otherwise the IV condition will wrap - // around and do things the fp IV wouldn't. - if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && - Leftover != 0) - return; - - // If the stride would wrap around the i32 before exiting, we can't - // transform the IV. - if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) - return; - } - - const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); - - // Insert new integer induction variable. - PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); - NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), - PN->getIncomingBlock(IncomingEdge)); - - Value *NewAdd = - BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), - Incr->getName()+".int", Incr); - NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); - - ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, - ConstantInt::get(Int32Ty, ExitValue), - Compare->getName()); - - // In the following deletions, PN may become dead and may be deleted. - // Use a WeakVH to observe whether this happens. - WeakVH WeakPH = PN; - - // Delete the old floating point exit comparison. The branch starts using the - // new comparison. - NewCompare->takeName(Compare); - Compare->replaceAllUsesWith(NewCompare); - RecursivelyDeleteTriviallyDeadInstructions(Compare); - - // Delete the old floating point increment. - Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); - RecursivelyDeleteTriviallyDeadInstructions(Incr); - - // If the FP induction variable still has uses, this is because something else - // in the loop uses its value. In order to canonicalize the induction - // variable, we chose to eliminate the IV and rewrite it in terms of an - // int->fp cast. - // - // We give preference to sitofp over uitofp because it is faster on most - // platforms. - if (WeakPH) { - Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", - PN->getParent()->getFirstNonPHI()); - PN->replaceAllUsesWith(Conv); - RecursivelyDeleteTriviallyDeadInstructions(PN); - } - - // Add a new IVUsers entry for the newly-created integer PHI. - IU->AddUsersIfInteresting(NewPHI, NewPHI); -} diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index cf18ff0..b500d5b 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -600,8 +600,10 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { TestBB = BBTerm->getSuccessor(i); unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); - if (NumPreds < MinNumPreds) + if (NumPreds < MinNumPreds) { MinSucc = i; + MinNumPreds = NumPreds; + } } return MinSucc; diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 13bd022..66add6c 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -178,7 +178,7 @@ INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) Pass *llvm::createLICMPass() { return new LICM(); } /// Hoist expressions out of the specified loop. Note, alias info for inner -/// loop is not preserved so it is not a good idea to run LICM multiple +/// loop is not preserved so it is not a good idea to run LICM multiple /// times on one loop. /// bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -199,13 +199,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // What if InnerLoop was modified by other passes ? CurAST->add(*InnerAST); - + // Once we've incorporated the inner loop's AST into ours, we don't need the // subloop's anymore. delete InnerAST; LoopToAliasSetMap.erase(InnerL); } - + CurLoop = L; // Get the preheader block to move instructions into... @@ -245,7 +245,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { I != E; ++I) PromoteAliasSet(*I); } - + // Clear out loops state information for the next iteration CurLoop = 0; Preheader = 0; @@ -283,7 +283,7 @@ void LICM::SinkRegion(DomTreeNode *N) { for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { Instruction &I = *--II; - + // If the instruction is dead, we would try to sink it because it isn't used // in the loop, instead, just delete it. if (isInstructionTriviallyDead(&I)) { @@ -336,7 +336,7 @@ void LICM::HoistRegion(DomTreeNode *N) { I.eraseFromParent(); continue; } - + // Try hoisting the instruction out to the preheader. We can only do this // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. @@ -364,7 +364,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; - + // Don't hoist loads which have may-aliased stores in loop. uint64_t Size = 0; if (LI->getType()->isSized()) @@ -470,7 +470,7 @@ void LICM::sink(Instruction &I) { } return; } - + if (ExitBlocks.empty()) { // The instruction is actually dead if there ARE NO exit blocks. CurAST->deleteValue(&I); @@ -482,30 +482,30 @@ void LICM::sink(Instruction &I) { I.eraseFromParent(); return; } - + // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the // hard work of inserting PHI nodes as necessary. SmallVector<PHINode*, 8> NewPHIs; SSAUpdater SSA(&NewPHIs); - + if (!I.use_empty()) SSA.Initialize(I.getType(), I.getName()); - + // Insert a copy of the instruction in each exit block of the loop that is // dominated by the instruction. Each exit block is known to only be in the // ExitBlocks list once. BasicBlock *InstOrigBB = I.getParent(); unsigned NumInserted = 0; - + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; - + if (!DT->dominates(InstOrigBB, ExitBlock)) continue; - + // Insert the code after the last PHI node. BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); - + // If this is the first exit block processed, just move the original // instruction, otherwise clone the original instruction and insert // the copy. @@ -519,12 +519,12 @@ void LICM::sink(Instruction &I) { New->setName(I.getName()+".le"); ExitBlock->getInstList().insert(InsertPt, New); } - + // Now that we have inserted the instruction, inform SSAUpdater. if (!I.use_empty()) SSA.AddAvailableValue(ExitBlock, New); } - + // If the instruction doesn't dominate any exit blocks, it must be dead. if (NumInserted == 0) { CurAST->deleteValue(&I); @@ -533,7 +533,7 @@ void LICM::sink(Instruction &I) { I.eraseFromParent(); return; } - + // Next, rewrite uses of the instruction, inserting PHI nodes as needed. for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { // Grab the use before incrementing the iterator. @@ -542,12 +542,12 @@ void LICM::sink(Instruction &I) { ++UI; SSA.RewriteUseAfterInsertions(U); } - + // Update CurAST for NewPHIs if I had pointer type. if (I.getType()->isPointerTy()) for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) CurAST->copyValue(&I, NewPHIs[i]); - + // Finally, remove the instruction from CurAST. It is no longer in the loop. CurAST->deleteValue(&I); } @@ -606,15 +606,17 @@ namespace { SmallVectorImpl<BasicBlock*> &LoopExitBlocks; AliasSetTracker &AST; DebugLoc DL; + int Alignment; public: LoopPromoter(Value *SP, const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, SmallPtrSet<Value*, 4> &PMA, SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast, - DebugLoc dl) - : LoadAndStorePromoter(Insts, S, 0, 0), SomePtr(SP), - PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl) {} - + DebugLoc dl, int alignment) + : LoadAndStorePromoter(Insts, S), SomePtr(SP), + PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl), + Alignment(alignment) {} + virtual bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction*> &) const { Value *Ptr; @@ -624,7 +626,7 @@ namespace { Ptr = cast<StoreInst>(I)->getPointerOperand(); return PointerMustAliases.count(Ptr); } - + virtual void doExtraRewritesBeforeFinalDeletion() const { // Insert stores after in the loop exit blocks. Each exit block gets a // store of the live-out values that feed them. Since we've already told @@ -635,6 +637,7 @@ namespace { Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); Instruction *InsertPos = ExitBlock->getFirstNonPHI(); StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); + NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); } } @@ -661,7 +664,7 @@ void LICM::PromoteAliasSet(AliasSet &AS) { if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) return; - + assert(!AS.empty() && "Must alias set should have at least one pointer element in it!"); Value *SomePtr = AS.begin()->getValue(); @@ -676,60 +679,78 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; // // is not safe, because *P may only be valid to access if 'c' is true. - // + // // It is safe to promote P if all uses are direct load/stores and if at // least one is guaranteed to be executed. bool GuaranteedToExecute = false; - + SmallVector<Instruction*, 64> LoopUses; SmallPtrSet<Value*, 4> PointerMustAliases; + // We start with an alignment of one and try to find instructions that allow + // us to prove better alignment. + unsigned Alignment = 1; + // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { Value *ASIV = ASI->getValue(); PointerMustAliases.insert(ASIV); - + // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. if (SomePtr->getType() != ASIV->getType()) return; - + for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); UI != UE; ++UI) { // Ignore instructions that are outside the loop. Instruction *Use = dyn_cast<Instruction>(*UI); if (!Use || !CurLoop->contains(Use)) continue; - + // If there is an non-load/store instruction in the loop, we can't promote // it. - if (isa<LoadInst>(Use)) + unsigned InstAlignment; + if (LoadInst *load = dyn_cast<LoadInst>(Use)) { assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); - else if (isa<StoreInst>(Use)) { + InstAlignment = load->getAlignment(); + } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (Use->getOperand(1) != ASIV) continue; + InstAlignment = store->getAlignment(); assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); } else return; // Not a load or store. - + + // If the alignment of this instruction allows us to specify a more + // restrictive (and performant) alignment and if we are sure this + // instruction will be executed, update the alignment. + // Larger is better, with the exception of 0 being the best alignment. + if ((InstAlignment > Alignment || InstAlignment == 0) + && (Alignment != 0)) + if (isSafeToExecuteUnconditionally(*Use)) { + GuaranteedToExecute = true; + Alignment = InstAlignment; + } + if (!GuaranteedToExecute) GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); - + LoopUses.push_back(Use); } } - + // If there isn't a guaranteed-to-execute instruction, we can't promote. if (!GuaranteedToExecute) return; - + // Otherwise, this is safe to promote, lets do it! - DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); + DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); Changed = true; ++NumPromoted; @@ -741,18 +762,19 @@ void LICM::PromoteAliasSet(AliasSet &AS) { SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); - + // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - *CurAST, DL); - + *CurAST, DL, Alignment); + // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst(SomePtr, SomePtr->getName()+".promoted", Preheader->getTerminator()); + PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DL); SSA.AddAvailableValue(Preheader, PreheaderLoad); diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 753a558..f7f3298 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -190,7 +190,9 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { BasicBlock* exitingBlock = exitingBlocks[0]; BasicBlock::iterator BI = exitBlock->begin(); while (PHINode* P = dyn_cast<PHINode>(BI)) { - P->replaceUsesOfWith(exitingBlock, preheader); + int j = P->getBasicBlockIndex(exitingBlock); + assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); + P->setIncomingBlock(j, preheader); for (unsigned i = 1; i < exitingBlocks.size(); ++i) P->removeIncomingValue(exitingBlocks[i]); ++BI; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index dbf6eec..a0e41d9 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -167,12 +167,17 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { if (Instruction *I = dyn_cast<Instruction>(V)) if (isInstructionTriviallyDead(I)) - deleteDeadInstruction(I, SE); + deleteDeadInstruction(I, SE); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; + // Disable loop idiom recognition if the function's name is a common idiom. + StringRef Name = L->getHeader()->getParent()->getName(); + if (Name == "memset" || Name == "memcpy") + return false; + // The trip count of the loop must be analyzable. SE = &getAnalysis<ScalarEvolution>(); if (!SE->hasLoopInvariantBackedgeTakenCount(L)) @@ -467,8 +472,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE); - + SCEVExpander Expander(*SE, "loop-idiom"); + // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -488,7 +493,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, deleteIfDeadInstruction(BasePtr, *SE); return false; } - + // Okay, everything looks good, insert the memset. // The # stored bytes is (BECount+1)*Size. Expand the trip count out to @@ -556,8 +561,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE); - + SCEVExpander Expander(*SE, "loop-idiom"); + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -568,7 +573,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, Expander.expandCodeFor(StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()), Preheader->getTerminator()); - + if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, CurLoop, BECount, StoreSize, getAnalysis<AliasAnalysis>(), SI)) { @@ -593,9 +598,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, deleteIfDeadInstruction(StoreBasePtr, *SE); return false; } - + // Okay, everything is safe, we can transform this! - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. @@ -619,7 +624,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); - + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 47dced3..9fd0958 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -220,7 +220,7 @@ bool LoopRotate::rotateLoop(Loop *L) { // For PHI nodes, the value available in OldPreHeader is just the // incoming value from OldPreHeader. for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) - ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); + ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); // For the rest of the instructions, either hoist to the OrigPreheader if // possible or create a clone in the OldPreHeader if not. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 73ebd61..509d026 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1804,8 +1804,7 @@ LSRInstance::OptimizeLoopTermCond() { ExitingBlock->getInstList().insert(TermBr, Cond); // Clone the IVUse, as the old use still exists! - CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace(), - CondUse->getPhi()); + CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace()); TermBr->replaceUsesOfWith(OldCond, Cond); } } @@ -2768,7 +2767,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // value to the immediate would produce a value closer to zero than the // immediate itself, then the formula isn't worthwhile. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) - if (C->getValue()->getValue().isNegative() != + if (C->getValue()->isNegative() != (NewF.AM.BaseOffs < 0) && (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale)) .ule(abs64(NewF.AM.BaseOffs))) @@ -3699,7 +3698,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, // we can remove them after we are done working. SmallVector<WeakVH, 16> DeadInsts; - SCEVExpander Rewriter(SE); + SCEVExpander Rewriter(SE, "lsr"); Rewriter.disableCanonicalMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index e05f29c..840c4b6 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1021,6 +1021,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { while (PHINode *PN = dyn_cast<PHINode>(Succ->begin())) ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM); + // If Succ has any successors with PHI nodes, update them to have + // entries coming from Pred instead of Succ. + Succ->replaceAllUsesWith(Pred); + // Move all of the successor contents from Succ to Pred. Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(), Succ->end()); @@ -1028,10 +1032,6 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { BI->eraseFromParent(); RemoveFromWorklist(BI, Worklist); - // If Succ has any successors with PHI nodes, update them to have - // entries coming from Pred instead of Succ. - Succ->replaceAllUsesWith(Pred); - // Remove Succ from the loop tree. LI->removeBlock(Succ); LPM->deleteSimpleAnalysisValue(Succ, L); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index be5aa2e..7ed3db6 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -487,7 +487,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // happen to be using a load-store pair to implement it, rather than // a memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { - if (!LI->isVolatile() && LI->hasOneUse()) { + if (!LI->isVolatile() && LI->hasOneUse() && + LI->getParent() == SI->getParent()) { MemDepResult ldep = MD->getDependency(LI); CallInst *C = 0; if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) @@ -496,17 +497,14 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (C) { // Check that nothing touches the dest of the "copy" between // the call and the store. - MemDepResult sdep = MD->getDependency(SI); - if (!sdep.isNonLocal()) { - bool FoundCall = false; - for (BasicBlock::iterator I = SI, E = sdep.getInst(); I != E; --I) { - if (&*I == C) { - FoundCall = true; - break; - } - } - if (!FoundCall) + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + AliasAnalysis::Location StoreLoc = AA.getLocation(SI); + for (BasicBlock::iterator I = --BasicBlock::iterator(SI), + E = C; I != E; --I) { + if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { C = 0; + break; + } } } @@ -842,11 +840,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { // If not, then we know we can transform this. Module *Mod = M->getParent()->getParent()->getParent(); - const Type *ArgTys[3] = { M->getRawDest()->getType(), - M->getRawSource()->getType(), - M->getLength()->getType() }; + Type *ArgTys[3] = { M->getRawDest()->getType(), + M->getRawSource()->getType(), + M->getLength()->getType() }; M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, - ArgTys, 3)); + ArgTys)); // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp new file mode 100644 index 0000000..ee132d3 --- /dev/null +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -0,0 +1,3595 @@ +//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines ObjC ARC optimizations. ARC stands for +// Automatic Reference Counting and is a system for managing reference counts +// for objects in Objective C. +// +// The optimizations performed include elimination of redundant, partially +// redundant, and inconsequential reference count operations, elimination of +// redundant weak pointer operations, pattern-matching and replacement of +// low-level operations into higher-level operations, and numerous minor +// simplifications. +// +// This file also defines a simple ARC-aware AliasAnalysis. +// +// WARNING: This file knows about certain library functions. It recognizes them +// by name, and hardwires knowedge of their semantics. +// +// WARNING: This file knows about how certain Objective-C library functions are +// used. Naive LLVM IR transformations which would otherwise be +// behavior-preserving may break these assumptions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/GlobalVariable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +// A handy option to enable/disable all optimizations in this file. +static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true)); + +//===----------------------------------------------------------------------===// +// Misc. Utilities +//===----------------------------------------------------------------------===// + +namespace { + /// MapVector - An associative container with fast insertion-order + /// (deterministic) iteration over its elements. Plus the special + /// blot operation. + template<class KeyT, class ValueT> + class MapVector { + /// Map - Map keys to indices in Vector. + typedef DenseMap<KeyT, size_t> MapTy; + MapTy Map; + + /// Vector - Keys and values. + typedef std::vector<std::pair<KeyT, ValueT> > VectorTy; + VectorTy Vector; + + public: + typedef typename VectorTy::iterator iterator; + typedef typename VectorTy::const_iterator const_iterator; + iterator begin() { return Vector.begin(); } + iterator end() { return Vector.end(); } + const_iterator begin() const { return Vector.begin(); } + const_iterator end() const { return Vector.end(); } + +#ifdef XDEBUG + ~MapVector() { + assert(Vector.size() >= Map.size()); // May differ due to blotting. + for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); + I != E; ++I) { + assert(I->second < Vector.size()); + assert(Vector[I->second].first == I->first); + } + for (typename VectorTy::const_iterator I = Vector.begin(), + E = Vector.end(); I != E; ++I) + assert(!I->first || + (Map.count(I->first) && + Map[I->first] == size_t(I - Vector.begin()))); + } +#endif + + ValueT &operator[](KeyT Arg) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(Arg, size_t(0))); + if (Pair.second) { + Pair.first->second = Vector.size(); + Vector.push_back(std::make_pair(Arg, ValueT())); + return Vector.back().second; + } + return Vector[Pair.first->second].second; + } + + std::pair<iterator, bool> + insert(const std::pair<KeyT, ValueT> &InsertPair) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(InsertPair.first, size_t(0))); + if (Pair.second) { + Pair.first->second = Vector.size(); + Vector.push_back(InsertPair); + return std::make_pair(llvm::prior(Vector.end()), true); + } + return std::make_pair(Vector.begin() + Pair.first->second, false); + } + + const_iterator find(KeyT Key) const { + typename MapTy::const_iterator It = Map.find(Key); + if (It == Map.end()) return Vector.end(); + return Vector.begin() + It->second; + } + + /// blot - This is similar to erase, but instead of removing the element + /// from the vector, it just zeros out the key in the vector. This leaves + /// iterators intact, but clients must be prepared for zeroed-out keys when + /// iterating. + void blot(KeyT Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) return; + Vector[It->second].first = KeyT(); + Map.erase(It); + } + + void clear() { + Map.clear(); + Vector.clear(); + } + }; +} + +//===----------------------------------------------------------------------===// +// ARC Utilities. +//===----------------------------------------------------------------------===// + +namespace { + /// InstructionClass - A simple classification for instructions. + enum InstructionClass { + IC_Retain, ///< objc_retain + IC_RetainRV, ///< objc_retainAutoreleasedReturnValue + IC_RetainBlock, ///< objc_retainBlock + IC_Release, ///< objc_release + IC_Autorelease, ///< objc_autorelease + IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue + IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush + IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop + IC_NoopCast, ///< objc_retainedObject, etc. + IC_FusedRetainAutorelease, ///< objc_retainAutorelease + IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue + IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive) + IC_StoreWeak, ///< objc_storeWeak (primitive) + IC_InitWeak, ///< objc_initWeak (derived) + IC_LoadWeak, ///< objc_loadWeak (derived) + IC_MoveWeak, ///< objc_moveWeak (derived) + IC_CopyWeak, ///< objc_copyWeak (derived) + IC_DestroyWeak, ///< objc_destroyWeak (derived) + IC_CallOrUser, ///< could call objc_release and/or "use" pointers + IC_Call, ///< could call objc_release + IC_User, ///< could "use" a pointer + IC_None ///< anything else + }; +} + +/// IsPotentialUse - Test whether the given value is possible a +/// reference-counted pointer. +static bool IsPotentialUse(const Value *Op) { + // Pointers to static or stack storage are not reference-counted pointers. + if (isa<Constant>(Op) || isa<AllocaInst>(Op)) + return false; + // Special arguments are not reference-counted. + if (const Argument *Arg = dyn_cast<Argument>(Op)) + if (Arg->hasByValAttr() || + Arg->hasNestAttr() || + Arg->hasStructRetAttr()) + return false; + // Only consider values with pointer types, and not function pointers. + const PointerType *Ty = dyn_cast<PointerType>(Op->getType()); + if (!Ty || isa<FunctionType>(Ty->getElementType())) + return false; + // Conservatively assume anything else is a potential use. + return true; +} + +/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind +/// of construct CS is. +static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) + if (IsPotentialUse(*I)) + return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser; + + return CS.onlyReadsMemory() ? IC_None : IC_Call; +} + +/// GetFunctionClass - Determine if F is one of the special known Functions. +/// If it isn't, return IC_CallOrUser. +static InstructionClass GetFunctionClass(const Function *F) { + Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + + // No arguments. + if (AI == AE) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) + .Default(IC_CallOrUser); + + // One argument. + const Argument *A0 = AI++; + if (AI == AE) + // Argument is a pointer. + if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { + const Type *ETy = PTy->getElementType(); + // Argument is i8*. + if (ETy->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_retain", IC_Retain) + .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) + .Case("objc_retainBlock", IC_RetainBlock) + .Case("objc_release", IC_Release) + .Case("objc_autorelease", IC_Autorelease) + .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) + .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) + .Case("objc_retainedObject", IC_NoopCast) + .Case("objc_unretainedObject", IC_NoopCast) + .Case("objc_unretainedPointer", IC_NoopCast) + .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) + .Default(IC_CallOrUser); + + // Argument is i8** + if (const PointerType *Pte = dyn_cast<PointerType>(ETy)) + if (Pte->getElementType()->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_loadWeakRetained", IC_LoadWeakRetained) + .Case("objc_loadWeak", IC_LoadWeak) + .Case("objc_destroyWeak", IC_DestroyWeak) + .Default(IC_CallOrUser); + } + + // Two arguments, first is i8**. + const Argument *A1 = AI++; + if (AI == AE) + if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) + if (const PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) + if (Pte->getElementType()->isIntegerTy(8)) + if (const PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { + const Type *ETy1 = PTy1->getElementType(); + // Second argument is i8* + if (ETy1->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_storeWeak", IC_StoreWeak) + .Case("objc_initWeak", IC_InitWeak) + .Default(IC_CallOrUser); + // Second argument is i8**. + if (const PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) + if (Pte1->getElementType()->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_moveWeak", IC_MoveWeak) + .Case("objc_copyWeak", IC_CopyWeak) + .Default(IC_CallOrUser); + } + + // Anything else. + return IC_CallOrUser; +} + +/// GetInstructionClass - Determine what kind of construct V is. +static InstructionClass GetInstructionClass(const Value *V) { + if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Any instruction other than bitcast and gep with a pointer operand have a + // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer + // to a subsequent use, rather than using it themselves, in this sense. + // As a short cut, several other opcodes are known to have no pointer + // operands of interest. And ret is never followed by a release, so it's + // not interesting to examine. + switch (I->getOpcode()) { + case Instruction::Call: { + const CallInst *CI = cast<CallInst>(I); + // Check for calls to special functions. + if (const Function *F = CI->getCalledFunction()) { + InstructionClass Class = GetFunctionClass(F); + if (Class != IC_CallOrUser) + return Class; + + // None of the intrinsic functions do objc_release. For intrinsics, the + // only question is whether or not they may be users. + switch (F->getIntrinsicID()) { + case 0: break; + case Intrinsic::bswap: case Intrinsic::ctpop: + case Intrinsic::ctlz: case Intrinsic::cttz: + case Intrinsic::returnaddress: case Intrinsic::frameaddress: + case Intrinsic::stacksave: case Intrinsic::stackrestore: + case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: + // Don't let dbg info affect our results. + case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + // Short cut: Some intrinsics obviously don't use ObjC pointers. + return IC_None; + default: + for (Function::const_arg_iterator AI = F->arg_begin(), + AE = F->arg_end(); AI != AE; ++AI) + if (IsPotentialUse(AI)) + return IC_User; + return IC_None; + } + } + return GetCallSiteClass(CI); + } + case Instruction::Invoke: + return GetCallSiteClass(cast<InvokeInst>(I)); + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::Select: case Instruction::PHI: + case Instruction::Ret: case Instruction::Br: + case Instruction::Switch: case Instruction::IndirectBr: + case Instruction::Alloca: case Instruction::VAArg: + case Instruction::Add: case Instruction::FAdd: + case Instruction::Sub: case Instruction::FSub: + case Instruction::Mul: case Instruction::FMul: + case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: + case Instruction::SRem: case Instruction::URem: case Instruction::FRem: + case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: + case Instruction::And: case Instruction::Or: case Instruction::Xor: + case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc: + case Instruction::IntToPtr: case Instruction::FCmp: + case Instruction::FPTrunc: case Instruction::FPExt: + case Instruction::FPToUI: case Instruction::FPToSI: + case Instruction::UIToFP: case Instruction::SIToFP: + case Instruction::InsertElement: case Instruction::ExtractElement: + case Instruction::ShuffleVector: + case Instruction::ExtractValue: + break; + case Instruction::ICmp: + // Comparing a pointer with null, or any other constant, isn't an + // interesting use, because we don't care what the pointer points to, or + // about the values of any other dynamic reference-counted pointers. + if (IsPotentialUse(I->getOperand(1))) + return IC_User; + break; + default: + // For anything else, check all the operands. + for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (IsPotentialUse(*OI)) + return IC_User; + } + } + + // Otherwise, it's totally inert for ARC purposes. + return IC_None; +} + +/// GetBasicInstructionClass - Determine what kind of construct V is. This is +/// similar to GetInstructionClass except that it only detects objc runtine +/// calls. This allows it to be faster. +static InstructionClass GetBasicInstructionClass(const Value *V) { + if (const CallInst *CI = dyn_cast<CallInst>(V)) { + if (const Function *F = CI->getCalledFunction()) + return GetFunctionClass(F); + // Otherwise, be conservative. + return IC_CallOrUser; + } + + // Otherwise, be conservative. + return IC_User; +} + +/// IsRetain - Test if the the given class is objc_retain or +/// equivalent. +static bool IsRetain(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV; +} + +/// IsAutorelease - Test if the the given class is objc_autorelease or +/// equivalent. +static bool IsAutorelease(InstructionClass Class) { + return Class == IC_Autorelease || + Class == IC_AutoreleaseRV; +} + +/// IsForwarding - Test if the given class represents instructions which return +/// their argument verbatim. +static bool IsForwarding(InstructionClass Class) { + // objc_retainBlock technically doesn't always return its argument + // verbatim, but it doesn't matter for our purposes here. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock || + Class == IC_NoopCast; +} + +/// IsNoopOnNull - Test if the given class represents instructions which do +/// nothing if passed a null pointer. +static bool IsNoopOnNull(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock; +} + +/// IsAlwaysTail - Test if the given class represents instructions which are +/// always safe to mark with the "tail" keyword. +static bool IsAlwaysTail(InstructionClass Class) { + // IC_RetainBlock may be given a stack argument. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV; +} + +/// IsNoThrow - Test if the given class represents instructions which are always +/// safe to mark with the nounwind attribute.. +static bool IsNoThrow(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_RetainBlock || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_AutoreleasepoolPush || + Class == IC_AutoreleasepoolPop; +} + +/// EraseInstruction - Erase the given instruction. ObjC calls return their +/// argument verbatim, so if it's such a call and the return value has users, +/// replace them with the argument value. +static void EraseInstruction(Instruction *CI) { + Value *OldArg = cast<CallInst>(CI)->getArgOperand(0); + + bool Unused = CI->use_empty(); + + if (!Unused) { + // Replace the return value with the argument. + assert(IsForwarding(GetBasicInstructionClass(CI)) && + "Can't delete non-forwarding instruction with users!"); + CI->replaceAllUsesWith(OldArg); + } + + CI->eraseFromParent(); + + if (Unused) + RecursivelyDeleteTriviallyDeadInstructions(OldArg); +} + +/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which +/// also knows how to look through objc_retain and objc_autorelease calls, which +/// we know to return their argument verbatim. +static const Value *GetUnderlyingObjCPtr(const Value *V) { + for (;;) { + V = GetUnderlyingObject(V); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + + return V; +} + +/// StripPointerCastsAndObjCCalls - This is a wrapper around +/// Value::stripPointerCasts which also knows how to look through objc_retain +/// and objc_autorelease calls, which we know to return their argument verbatim. +static const Value *StripPointerCastsAndObjCCalls(const Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + return V; +} + +/// StripPointerCastsAndObjCCalls - This is a wrapper around +/// Value::stripPointerCasts which also knows how to look through objc_retain +/// and objc_autorelease calls, which we know to return their argument verbatim. +static Value *StripPointerCastsAndObjCCalls(Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + return V; +} + +/// GetObjCArg - Assuming the given instruction is one of the special calls such +/// as objc_retain or objc_release, return the argument value, stripped of no-op +/// casts and forwarding calls. +static Value *GetObjCArg(Value *Inst) { + return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0)); +} + +/// IsObjCIdentifiedObject - This is similar to AliasAnalysis' +/// isObjCIdentifiedObject, except that it uses special knowledge of +/// ObjC conventions... +static bool IsObjCIdentifiedObject(const Value *V) { + // Assume that call results and arguments have their own "provenance". + // Constants (including GlobalVariables) and Allocas are never + // reference-counted. + if (isa<CallInst>(V) || isa<InvokeInst>(V) || + isa<Argument>(V) || isa<Constant>(V) || + isa<AllocaInst>(V)) + return true; + + if (const LoadInst *LI = dyn_cast<LoadInst>(V)) { + const Value *Pointer = + StripPointerCastsAndObjCCalls(LI->getPointerOperand()); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) { + StringRef Name = GV->getName(); + // These special variables are known to hold values which are not + // reference-counted pointers. + if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") || + Name.startswith("\01L_OBJC_METH_VAR_NAME_") || + Name.startswith("\01l_objc_msgSend_fixup_")) + return true; + } + } + + return false; +} + +/// FindSingleUseIdentifiedObject - This is similar to +/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value +/// with multiple uses. +static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { + if (Arg->hasOneUse()) { + if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg)) + return FindSingleUseIdentifiedObject(BC->getOperand(0)); + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg)) + if (GEP->hasAllZeroIndices()) + return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); + if (IsForwarding(GetBasicInstructionClass(Arg))) + return FindSingleUseIdentifiedObject( + cast<CallInst>(Arg)->getArgOperand(0)); + if (!IsObjCIdentifiedObject(Arg)) + return 0; + return Arg; + } + + // If we found an identifiable object but it has multiple uses, but they + // are trivial uses, we can still consider this to be a single-use + // value. + if (IsObjCIdentifiedObject(Arg)) { + for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg) + return 0; + } + + return Arg; + } + + return 0; +} + +/// ModuleHasARC - Test if the given module looks interesting to run ARC +/// optimization on. +static bool ModuleHasARC(const Module &M) { + return + M.getNamedValue("objc_retain") || + M.getNamedValue("objc_release") || + M.getNamedValue("objc_autorelease") || + M.getNamedValue("objc_retainAutoreleasedReturnValue") || + M.getNamedValue("objc_retainBlock") || + M.getNamedValue("objc_autoreleaseReturnValue") || + M.getNamedValue("objc_autoreleasePoolPush") || + M.getNamedValue("objc_loadWeakRetained") || + M.getNamedValue("objc_loadWeak") || + M.getNamedValue("objc_destroyWeak") || + M.getNamedValue("objc_storeWeak") || + M.getNamedValue("objc_initWeak") || + M.getNamedValue("objc_moveWeak") || + M.getNamedValue("objc_copyWeak") || + M.getNamedValue("objc_retainedObject") || + M.getNamedValue("objc_unretainedObject") || + M.getNamedValue("objc_unretainedPointer"); +} + +//===----------------------------------------------------------------------===// +// ARC AliasAnalysis. +//===----------------------------------------------------------------------===// + +#include "llvm/Pass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" + +namespace { + /// ObjCARCAliasAnalysis - This is a simple alias analysis + /// implementation that uses knowledge of ARC constructs to answer queries. + /// + /// TODO: This class could be generalized to know about other ObjC-specific + /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing + /// even though their offsets are dynamic. + class ObjCARCAliasAnalysis : public ImmutablePass, + public AliasAnalysis { + public: + static char ID; // Class identification, replacement for typeinfo + ObjCARCAliasAnalysis() : ImmutablePass(ID) { + initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + private: + virtual void initializePass() { + InitializeAliasAnalysis(this); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual AliasResult alias(const Location &LocA, const Location &LocB); + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + virtual ModRefBehavior getModRefBehavior(const Function *F); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2); + }; +} // End of anonymous namespace + +// Register this pass... +char ObjCARCAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa", + "ObjC-ARC-Based Alias Analysis", false, true, false) + +ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { + return new ObjCARCAliasAnalysis(); +} + +void +ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +AliasAnalysis::AliasResult +ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { + if (!EnableARCOpts) + return AliasAnalysis::alias(LocA, LocB); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making a + // precise alias query. + const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr); + const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr); + AliasResult Result = + AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag), + Location(SB, LocB.Size, LocB.TBAATag)); + if (Result != MayAlias) + return Result; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *UA = GetUnderlyingObjCPtr(SA); + const Value *UB = GetUnderlyingObjCPtr(SB); + if (UA != SA || UB != SB) { + Result = AliasAnalysis::alias(Location(UA), Location(UB)); + // We can't use MustAlias or PartialAlias results here because + // GetUnderlyingObjCPtr may return an offsetted pointer value. + if (Result == NoAlias) + return NoAlias; + } + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return MayAlias; +} + +bool +ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + if (!EnableARCOpts) + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making + // a precise alias query. + const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr); + if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag), + OrLocal)) + return true; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *U = GetUnderlyingObjCPtr(S); + if (U != S) + return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return false; +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + // We have nothing to do. Just chain to the next AliasAnalysis. + return AliasAnalysis::getModRefBehavior(CS); +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefBehavior(F); + + switch (GetFunctionClass(F)) { + case IC_NoopCast: + return DoesNotAccessMemory; + default: + break; + } + + return AliasAnalysis::getModRefBehavior(F); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefInfo(CS, Loc); + + switch (GetBasicInstructionClass(CS.getInstruction())) { + case IC_Retain: + case IC_RetainRV: + case IC_RetainBlock: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_NoopCast: + case IC_AutoreleasepoolPush: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + // These functions don't access any memory visible to the compiler. + return NoModRef; + default: + break; + } + + return AliasAnalysis::getModRefInfo(CS, Loc); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // TODO: Theoretically we could check for dependencies between objc_* calls + // and OnlyAccessesArgumentPointees calls or other well-behaved calls. + return AliasAnalysis::getModRefInfo(CS1, CS2); +} + +//===----------------------------------------------------------------------===// +// ARC expansion. +//===----------------------------------------------------------------------===// + +#include "llvm/Support/InstIterator.h" +#include "llvm/Transforms/Scalar.h" + +namespace { + /// ObjCARCExpand - Early ARC transformations. + class ObjCARCExpand : public FunctionPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + /// Run - A flag indicating whether this optimization pass should run. + bool Run; + + public: + static char ID; + ObjCARCExpand() : FunctionPass(ID) { + initializeObjCARCExpandPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCExpand::ID = 0; +INITIALIZE_PASS(ObjCARCExpand, + "objc-arc-expand", "ObjC ARC expansion", false, false) + +Pass *llvm::createObjCARCExpandPass() { + return new ObjCARCExpand(); +} + +void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +bool ObjCARCExpand::doInitialization(Module &M) { + Run = ModuleHasARC(M); + return false; +} + +bool ObjCARCExpand::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + bool Changed = false; + + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + Instruction *Inst = &*I; + + switch (GetBasicInstructionClass(Inst)) { + case IC_Retain: + case IC_RetainRV: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + // These calls return their argument verbatim, as a low-level + // optimization. However, this makes high-level optimizations + // harder. Undo any uses of this optimization that the front-end + // emitted here. We'll redo them in a later pass. + Changed = true; + Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0)); + break; + default: + break; + } + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// ARC optimization. +//===----------------------------------------------------------------------===// + +// TODO: On code like this: +// +// objc_retain(%x) +// stuff_that_cannot_release() +// objc_autorelease(%x) +// stuff_that_cannot_release() +// objc_retain(%x) +// stuff_that_cannot_release() +// objc_autorelease(%x) +// +// The second retain and autorelease can be deleted. + +// TODO: It should be possible to delete +// objc_autoreleasePoolPush and objc_autoreleasePoolPop +// pairs if nothing is actually autoreleased between them. Also, autorelease +// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code +// after inlining) can be turned into plain release calls. + +// TODO: Critical-edge splitting. If the optimial insertion point is +// a critical edge, the current algorithm has to fail, because it doesn't +// know how to split edges. It should be possible to make the optimizer +// think in terms of edges, rather than blocks, and then split critical +// edges on demand. + +// TODO: OptimizeSequences could generalized to be Interprocedural. + +// TODO: Recognize that a bunch of other objc runtime calls have +// non-escaping arguments and non-releasing arguments, and may be +// non-autoreleasing. + +// TODO: Sink autorelease calls as far as possible. Unfortunately we +// usually can't sink them past other calls, which would be the main +// case where it would be useful. + +/// TODO: The pointer returned from objc_loadWeakRetained is retained. + +#include "llvm/GlobalAlias.h" +#include "llvm/Constants.h" +#include "llvm/LLVMContext.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CFG.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" + +STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); +STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); +STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); +STATISTIC(NumRets, "Number of return value forwarding " + "retain+autoreleaes eliminated"); +STATISTIC(NumRRs, "Number of retain+release paths eliminated"); +STATISTIC(NumPeeps, "Number of calls peephole-optimized"); + +namespace { + /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it + /// uses many of the same techniques, except it uses special ObjC-specific + /// reasoning about pointer relationships. + class ProvenanceAnalysis { + AliasAnalysis *AA; + + typedef std::pair<const Value *, const Value *> ValuePairTy; + typedef DenseMap<ValuePairTy, bool> CachedResultsTy; + CachedResultsTy CachedResults; + + bool relatedCheck(const Value *A, const Value *B); + bool relatedSelect(const SelectInst *A, const Value *B); + bool relatedPHI(const PHINode *A, const Value *B); + + // Do not implement. + void operator=(const ProvenanceAnalysis &); + ProvenanceAnalysis(const ProvenanceAnalysis &); + + public: + ProvenanceAnalysis() {} + + void setAA(AliasAnalysis *aa) { AA = aa; } + + AliasAnalysis *getAA() const { return AA; } + + bool related(const Value *A, const Value *B); + + void clear() { + CachedResults.clear(); + } + }; +} + +bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { + // If the values are Selects with the same condition, we can do a more precise + // check: just check for relations between the values on corresponding arms. + if (const SelectInst *SB = dyn_cast<SelectInst>(B)) + if (A->getCondition() == SB->getCondition()) { + if (related(A->getTrueValue(), SB->getTrueValue())) + return true; + if (related(A->getFalseValue(), SB->getFalseValue())) + return true; + return false; + } + + // Check both arms of the Select node individually. + if (related(A->getTrueValue(), B)) + return true; + if (related(A->getFalseValue(), B)) + return true; + + // The arms both checked out. + return false; +} + +bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { + // If the values are PHIs in the same block, we can do a more precise as well + // as efficient check: just check for relations between the values on + // corresponding edges. + if (const PHINode *PNB = dyn_cast<PHINode>(B)) + if (PNB->getParent() == A->getParent()) { + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) + if (related(A->getIncomingValue(i), + PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) + return true; + return false; + } + + // Check each unique source of the PHI node against B. + SmallPtrSet<const Value *, 4> UniqueSrc; + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { + const Value *PV1 = A->getIncomingValue(i); + if (UniqueSrc.insert(PV1) && related(PV1, B)) + return true; + } + + // All of the arms checked out. + return false; +} + +/// isStoredObjCPointer - Test if the value of P, or any value covered by its +/// provenance, is ever stored within the function (not counting callees). +static bool isStoredObjCPointer(const Value *P) { + SmallPtrSet<const Value *, 8> Visited; + SmallVector<const Value *, 8> Worklist; + Worklist.push_back(P); + Visited.insert(P); + do { + P = Worklist.pop_back_val(); + for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end(); + UI != UE; ++UI) { + const User *Ur = *UI; + if (isa<StoreInst>(Ur)) { + if (UI.getOperandNo() == 0) + // The pointer is stored. + return true; + // The pointed is stored through. + continue; + } + if (isa<CallInst>(Ur)) + // The pointer is passed as an argument, ignore this. + continue; + if (isa<PtrToIntInst>(P)) + // Assume the worst. + return true; + if (Visited.insert(Ur)) + Worklist.push_back(Ur); + } + } while (!Worklist.empty()); + + // Everything checked out. + return false; +} + +bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { + // Skip past provenance pass-throughs. + A = GetUnderlyingObjCPtr(A); + B = GetUnderlyingObjCPtr(B); + + // Quick check. + if (A == B) + return true; + + // Ask regular AliasAnalysis, for a first approximation. + switch (AA->alias(A, B)) { + case AliasAnalysis::NoAlias: + return false; + case AliasAnalysis::MustAlias: + case AliasAnalysis::PartialAlias: + return true; + case AliasAnalysis::MayAlias: + break; + } + + bool AIsIdentified = IsObjCIdentifiedObject(A); + bool BIsIdentified = IsObjCIdentifiedObject(B); + + // An ObjC-Identified object can't alias a load if it is never locally stored. + if (AIsIdentified) { + if (BIsIdentified) { + // If both pointers have provenance, they can be directly compared. + if (A != B) + return false; + } else { + if (isa<LoadInst>(B)) + return isStoredObjCPointer(A); + } + } else { + if (BIsIdentified && isa<LoadInst>(A)) + return isStoredObjCPointer(B); + } + + // Special handling for PHI and Select. + if (const PHINode *PN = dyn_cast<PHINode>(A)) + return relatedPHI(PN, B); + if (const PHINode *PN = dyn_cast<PHINode>(B)) + return relatedPHI(PN, A); + if (const SelectInst *S = dyn_cast<SelectInst>(A)) + return relatedSelect(S, B); + if (const SelectInst *S = dyn_cast<SelectInst>(B)) + return relatedSelect(S, A); + + // Conservative. + return true; +} + +bool ProvenanceAnalysis::related(const Value *A, const Value *B) { + // Begin by inserting a conservative value into the map. If the insertion + // fails, we have the answer already. If it succeeds, leave it there until we + // compute the real answer to guard against recursive queries. + if (A > B) std::swap(A, B); + std::pair<CachedResultsTy::iterator, bool> Pair = + CachedResults.insert(std::make_pair(ValuePairTy(A, B), true)); + if (!Pair.second) + return Pair.first->second; + + bool Result = relatedCheck(A, B); + CachedResults[ValuePairTy(A, B)] = Result; + return Result; +} + +namespace { + // Sequence - A sequence of states that a pointer may go through in which an + // objc_retain and objc_release are actually needed. + enum Sequence { + S_None, + S_Retain, ///< objc_retain(x) + S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement + S_Use, ///< any use of x + S_Stop, ///< like S_Release, but code motion is stopped + S_Release, ///< objc_release(x) + S_MovableRelease ///< objc_release(x), !clang.imprecise_release + }; +} + +static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { + // The easy cases. + if (A == B) + return A; + if (A == S_None || B == S_None) + return S_None; + + // Note that we can't merge S_CanRelease and S_Use. + if (A > B) std::swap(A, B); + if (TopDown) { + // Choose the side which is further along in the sequence. + if (A == S_Retain && (B == S_CanRelease || B == S_Use)) + return B; + } else { + // Choose the side which is further along in the sequence. + if ((A == S_Use || A == S_CanRelease) && + (B == S_Release || B == S_Stop || B == S_MovableRelease)) + return A; + // If both sides are releases, choose the more conservative one. + if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) + return A; + if (A == S_Release && B == S_MovableRelease) + return A; + } + + return S_None; +} + +namespace { + /// RRInfo - Unidirectional information about either a + /// retain-decrement-use-release sequence or release-use-decrement-retain + /// reverese sequence. + struct RRInfo { + /// KnownIncremented - After an objc_retain, the reference count of the + /// referenced object is known to be positive. Similarly, before an + /// objc_release, the reference count of the referenced object is known to + /// be positive. If there are retain-release pairs in code regions where the + /// retain count is known to be positive, they can be eliminated, regardless + /// of any side effects between them. + bool KnownIncremented; + + /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as + /// opposed to objc_retain calls). + bool IsRetainBlock; + + /// IsTailCallRelease - True of the objc_release calls are all marked + /// with the "tail" keyword. + bool IsTailCallRelease; + + /// ReleaseMetadata - If the Calls are objc_release calls and they all have + /// a clang.imprecise_release tag, this is the metadata tag. + MDNode *ReleaseMetadata; + + /// Calls - For a top-down sequence, the set of objc_retains or + /// objc_retainBlocks. For bottom-up, the set of objc_releases. + SmallPtrSet<Instruction *, 2> Calls; + + /// ReverseInsertPts - The set of optimal insert positions for + /// moving calls in the opposite sequence. + SmallPtrSet<Instruction *, 2> ReverseInsertPts; + + RRInfo() : + KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false), + ReleaseMetadata(0) {} + + void clear(); + }; +} + +void RRInfo::clear() { + KnownIncremented = false; + IsRetainBlock = false; + IsTailCallRelease = false; + ReleaseMetadata = 0; + Calls.clear(); + ReverseInsertPts.clear(); +} + +namespace { + /// PtrState - This class summarizes several per-pointer runtime properties + /// which are propogated through the flow graph. + class PtrState { + /// RefCount - The known minimum number of reference count increments. + unsigned RefCount; + + /// Seq - The current position in the sequence. + Sequence Seq; + + public: + /// RRI - Unidirectional information about the current sequence. + /// TODO: Encapsulate this better. + RRInfo RRI; + + PtrState() : RefCount(0), Seq(S_None) {} + + void IncrementRefCount() { + if (RefCount != UINT_MAX) ++RefCount; + } + + void DecrementRefCount() { + if (RefCount != 0) --RefCount; + } + + void ClearRefCount() { + RefCount = 0; + } + + bool IsKnownIncremented() const { + return RefCount > 0; + } + + void SetSeq(Sequence NewSeq) { + Seq = NewSeq; + } + + void SetSeqToRelease(MDNode *M) { + if (Seq == S_None || Seq == S_Use) { + Seq = M ? S_MovableRelease : S_Release; + RRI.ReleaseMetadata = M; + } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) { + Seq = S_Release; + RRI.ReleaseMetadata = 0; + } + } + + Sequence GetSeq() const { + return Seq; + } + + void ClearSequenceProgress() { + Seq = S_None; + RRI.clear(); + } + + void Merge(const PtrState &Other, bool TopDown); + }; +} + +void +PtrState::Merge(const PtrState &Other, bool TopDown) { + Seq = MergeSeqs(Seq, Other.Seq, TopDown); + RefCount = std::min(RefCount, Other.RefCount); + + // We can't merge a plain objc_retain with an objc_retainBlock. + if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) + Seq = S_None; + + if (Seq == S_None) { + RRI.clear(); + } else { + // Conservatively merge the ReleaseMetadata information. + if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) + RRI.ReleaseMetadata = 0; + + RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented; + RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease; + RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); + RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(), + Other.RRI.ReverseInsertPts.end()); + } +} + +namespace { + /// BBState - Per-BasicBlock state. + class BBState { + /// TopDownPathCount - The number of unique control paths from the entry + /// which can reach this block. + unsigned TopDownPathCount; + + /// BottomUpPathCount - The number of unique control paths to exits + /// from this block. + unsigned BottomUpPathCount; + + /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp. + typedef MapVector<const Value *, PtrState> MapTy; + + /// PerPtrTopDown - The top-down traversal uses this to record information + /// known about a pointer at the bottom of each block. + MapTy PerPtrTopDown; + + /// PerPtrBottomUp - The bottom-up traversal uses this to record information + /// known about a pointer at the top of each block. + MapTy PerPtrBottomUp; + + public: + BBState() : TopDownPathCount(0), BottomUpPathCount(0) {} + + typedef MapTy::iterator ptr_iterator; + typedef MapTy::const_iterator ptr_const_iterator; + + ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } + ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } + ptr_const_iterator top_down_ptr_begin() const { + return PerPtrTopDown.begin(); + } + ptr_const_iterator top_down_ptr_end() const { + return PerPtrTopDown.end(); + } + + ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } + ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } + ptr_const_iterator bottom_up_ptr_begin() const { + return PerPtrBottomUp.begin(); + } + ptr_const_iterator bottom_up_ptr_end() const { + return PerPtrBottomUp.end(); + } + + /// SetAsEntry - Mark this block as being an entry block, which has one + /// path from the entry by definition. + void SetAsEntry() { TopDownPathCount = 1; } + + /// SetAsExit - Mark this block as being an exit block, which has one + /// path to an exit by definition. + void SetAsExit() { BottomUpPathCount = 1; } + + PtrState &getPtrTopDownState(const Value *Arg) { + return PerPtrTopDown[Arg]; + } + + PtrState &getPtrBottomUpState(const Value *Arg) { + return PerPtrBottomUp[Arg]; + } + + void clearBottomUpPointers() { + PerPtrTopDown.clear(); + } + + void clearTopDownPointers() { + PerPtrTopDown.clear(); + } + + void InitFromPred(const BBState &Other); + void InitFromSucc(const BBState &Other); + void MergePred(const BBState &Other); + void MergeSucc(const BBState &Other); + + /// GetAllPathCount - Return the number of possible unique paths from an + /// entry to an exit which pass through this block. This is only valid + /// after both the top-down and bottom-up traversals are complete. + unsigned GetAllPathCount() const { + return TopDownPathCount * BottomUpPathCount; + } + }; +} + +void BBState::InitFromPred(const BBState &Other) { + PerPtrTopDown = Other.PerPtrTopDown; + TopDownPathCount = Other.TopDownPathCount; +} + +void BBState::InitFromSucc(const BBState &Other) { + PerPtrBottomUp = Other.PerPtrBottomUp; + BottomUpPathCount = Other.BottomUpPathCount; +} + +/// MergePred - The top-down traversal uses this to merge information about +/// predecessors to form the initial state for a new block. +void BBState::MergePred(const BBState &Other) { + // Other.TopDownPathCount can be 0, in which case it is either dead or a + // loop backedge. Loop backedges are special. + TopDownPathCount += Other.TopDownPathCount; + + // For each entry in the other set, if our set has an entry with the same key, + // merge the entries. Otherwise, copy the entry and merge it with an empty + // entry. + for (ptr_const_iterator MI = Other.top_down_ptr_begin(), + ME = Other.top_down_ptr_end(); MI != ME; ++MI) { + std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI); + Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + /*TopDown=*/true); + } + + // For each entry in our set, if the other set doens't have an entry with the + // same key, force it to merge with an empty entry. + for (ptr_iterator MI = top_down_ptr_begin(), + ME = top_down_ptr_end(); MI != ME; ++MI) + if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) + MI->second.Merge(PtrState(), /*TopDown=*/true); +} + +/// MergeSucc - The bottom-up traversal uses this to merge information about +/// successors to form the initial state for a new block. +void BBState::MergeSucc(const BBState &Other) { + // Other.BottomUpPathCount can be 0, in which case it is either dead or a + // loop backedge. Loop backedges are special. + BottomUpPathCount += Other.BottomUpPathCount; + + // For each entry in the other set, if our set has an entry with the + // same key, merge the entries. Otherwise, copy the entry and merge + // it with an empty entry. + for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(), + ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) { + std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI); + Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + /*TopDown=*/false); + } + + // For each entry in our set, if the other set doens't have an entry + // with the same key, force it to merge with an empty entry. + for (ptr_iterator MI = bottom_up_ptr_begin(), + ME = bottom_up_ptr_end(); MI != ME; ++MI) + if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) + MI->second.Merge(PtrState(), /*TopDown=*/false); +} + +namespace { + /// ObjCARCOpt - The main ARC optimization pass. + class ObjCARCOpt : public FunctionPass { + bool Changed; + ProvenanceAnalysis PA; + + /// Run - A flag indicating whether this optimization pass should run. + bool Run; + + /// RetainFunc, RelaseFunc - Declarations for objc_retain, + /// objc_retainBlock, and objc_release. + Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc; + + /// RetainRVCallee, etc. - Declarations for ObjC runtime + /// functions, for use in creating calls to them. These are initialized + /// lazily to avoid cluttering up the Module with unused declarations. + Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee, + *RetainCallee, *AutoreleaseCallee; + + /// UsedInThisFunciton - Flags which determine whether each of the + /// interesting runtine functions is in fact used in the current function. + unsigned UsedInThisFunction; + + /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release + /// metadata. + unsigned ImpreciseReleaseMDKind; + + Constant *getRetainRVCallee(Module *M); + Constant *getAutoreleaseRVCallee(Module *M); + Constant *getReleaseCallee(Module *M); + Constant *getRetainCallee(Module *M); + Constant *getAutoreleaseCallee(Module *M); + + void OptimizeRetainCall(Function &F, Instruction *Retain); + bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); + void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV); + void OptimizeIndividualCalls(Function &F); + + void CheckForCFGHazards(const BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + BBState &MyStates) const; + bool VisitBottomUp(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains); + bool VisitTopDown(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + DenseMap<Value *, RRInfo> &Releases); + bool Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases); + + void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + SmallVectorImpl<Instruction *> &DeadInsts); + + bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases); + + void OptimizeWeakCalls(Function &F); + + bool OptimizeSequences(Function &F); + + void OptimizeReturns(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + virtual void releaseMemory(); + + public: + static char ID; + ObjCARCOpt() : FunctionPass(ID) { + initializeObjCARCOptPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCOpt::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCOpt, + "objc-arc", "ObjC ARC optimization", false, false) +INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis) +INITIALIZE_PASS_END(ObjCARCOpt, + "objc-arc", "ObjC ARC optimization", false, false) + +Pass *llvm::createObjCARCOptPass() { + return new ObjCARCOpt(); +} + +void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<ObjCARCAliasAnalysis>(); + AU.addRequired<AliasAnalysis>(); + // ARC optimization doesn't currently split critical edges. + AU.setPreservesCFG(); +} + +Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { + if (!RetainRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + const FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainRVCallee = + M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, + Attributes); + } + return RetainRVCallee; +} + +Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { + if (!AutoreleaseRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + const FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + AutoreleaseRVCallee = + M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, + Attributes); + } + return AutoreleaseRVCallee; +} + +Constant *ObjCARCOpt::getReleaseCallee(Module *M) { + if (!ReleaseCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + ReleaseCallee = + M->getOrInsertFunction( + "objc_release", + FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), + Attributes); + } + return ReleaseCallee; +} + +Constant *ObjCARCOpt::getRetainCallee(Module *M) { + if (!RetainCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainCallee = + M->getOrInsertFunction( + "objc_retain", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attributes); + } + return RetainCallee; +} + +Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { + if (!AutoreleaseCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + AutoreleaseCallee = + M->getOrInsertFunction( + "objc_autorelease", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attributes); + } + return AutoreleaseCallee; +} + +/// CanAlterRefCount - Test whether the given instruction can result in a +/// reference count modification (positive or negative) for the pointer's +/// object. +static bool +CanAlterRefCount(const Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, InstructionClass Class) { + switch (Class) { + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_User: + // These operations never directly modify a reference count. + return false; + default: break; + } + + ImmutableCallSite CS = static_cast<const Value *>(Inst); + assert(CS && "Only calls can alter reference counts!"); + + // See if AliasAnalysis can help us with the call. + AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); + if (AliasAnalysis::onlyReadsMemory(MRB)) + return false; + if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + const Value *Op = *I; + if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + return true; + } + return false; + } + + // Assume the worst. + return true; +} + +/// CanUse - Test whether the given instruction can "use" the given pointer's +/// object in a way that requires the reference count to be positive. +static bool +CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, + InstructionClass Class) { + // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers. + if (Class == IC_Call) + return false; + + // Consider various instructions which may have pointer arguments which are + // not "uses". + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) { + // Comparing a pointer with null, or any other constant, isn't really a use, + // because we don't care what the pointer points to, or about the values + // of any other dynamic reference-counted pointers. + if (!IsPotentialUse(ICI->getOperand(1))) + return false; + } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { + // For calls, just check the arguments (and not the callee operand). + for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), + OE = CS.arg_end(); OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + return true; + } + return false; + } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // Special-case stores, because we don't care about the stored value, just + // the store address. + const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); + // If we can't tell what the underlying object was, assume there is a + // dependence. + return IsPotentialUse(Op) && PA.related(Op, Ptr); + } + + // Check each operand for a match. + for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); + OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + return true; + } + return false; +} + +/// CanInterruptRV - Test whether the given instruction can autorelease +/// any pointer or cause an autoreleasepool pop. +static bool +CanInterruptRV(InstructionClass Class) { + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_CallOrUser: + case IC_Call: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + return true; + default: + return false; + } +} + +namespace { + /// DependenceKind - There are several kinds of dependence-like concepts in + /// use here. + enum DependenceKind { + NeedsPositiveRetainCount, + CanChangeRetainCount, + RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. + RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. + RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue. + }; +} + +/// Depends - Test if there can be dependencies on Inst through Arg. This +/// function only tests dependencies relevant for removing pairs of calls. +static bool +Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, + ProvenanceAnalysis &PA) { + // If we've reached the definition of Arg, stop. + if (Inst == Arg) + return true; + + switch (Flavor) { + case NeedsPositiveRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanUse(Inst, Arg, PA, Class); + } + } + + case CanChangeRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + // Conservatively assume this can decrement any count. + return true; + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanAlterRefCount(Inst, Arg, PA, Class); + } + } + + case RetainAutoreleaseDep: + switch (GetBasicInstructionClass(Inst)) { + case IC_AutoreleasepoolPop: + // Don't merge an objc_autorelease with an objc_retain inside a different + // autoreleasepool scope. + return true; + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Nothing else matters for objc_retainAutorelease formation. + return false; + } + break; + + case RetainAutoreleaseRVDep: { + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Anything that can autorelease interrupts + // retainAutoreleaseReturnValue formation. + return CanInterruptRV(Class); + } + break; + } + + case RetainRVDep: + return CanInterruptRV(GetBasicInstructionClass(Inst)); + } + + llvm_unreachable("Invalid dependence flavor"); + return true; +} + +/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and +/// find local and non-local dependencies on Arg. +/// TODO: Cache results? +static void +FindDependencies(DependenceKind Flavor, + const Value *Arg, + BasicBlock *StartBB, Instruction *StartInst, + SmallPtrSet<Instruction *, 4> &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> &Visited, + ProvenanceAnalysis &PA) { + BasicBlock::iterator StartPos = StartInst; + + SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist; + Worklist.push_back(std::make_pair(StartBB, StartPos)); + do { + std::pair<BasicBlock *, BasicBlock::iterator> Pair = + Worklist.pop_back_val(); + BasicBlock *LocalStartBB = Pair.first; + BasicBlock::iterator LocalStartPos = Pair.second; + BasicBlock::iterator StartBBBegin = LocalStartBB->begin(); + for (;;) { + if (LocalStartPos == StartBBBegin) { + pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); + if (PI == PE) + // If we've reached the function entry, produce a null dependence. + DependingInstructions.insert(0); + else + // Add the predecessors to the worklist. + do { + BasicBlock *PredBB = *PI; + if (Visited.insert(PredBB)) + Worklist.push_back(std::make_pair(PredBB, PredBB->end())); + } while (++PI != PE); + break; + } + + Instruction *Inst = --LocalStartPos; + if (Depends(Flavor, Inst, Arg, PA)) { + DependingInstructions.insert(Inst); + break; + } + } + } while (!Worklist.empty()); + + // Determine whether the original StartBB post-dominates all of the blocks we + // visited. If not, insert a sentinal indicating that most optimizations are + // not safe. + for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(), + E = Visited.end(); I != E; ++I) { + const BasicBlock *BB = *I; + if (BB == StartBB) + continue; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + const BasicBlock *Succ = *SI; + if (Succ != StartBB && !Visited.count(Succ)) { + DependingInstructions.insert(reinterpret_cast<Instruction *>(-1)); + return; + } + } + } +} + +static bool isNullOrUndef(const Value *V) { + return isa<ConstantPointerNull>(V) || isa<UndefValue>(V); +} + +static bool isNoopInstruction(const Instruction *I) { + return isa<BitCastInst>(I) || + (isa<GetElementPtrInst>(I) && + cast<GetElementPtrInst>(I)->hasAllZeroIndices()); +} + +/// OptimizeRetainCall - Turn objc_retain into +/// objc_retainAutoreleasedReturnValue if the operand is a return value. +void +ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { + CallSite CS(GetObjCArg(Retain)); + Instruction *Call = CS.getInstruction(); + if (!Call) return; + if (Call->getParent() != Retain->getParent()) return; + + // Check that the call is next to the retain. + BasicBlock::iterator I = Call; + ++I; + while (isNoopInstruction(I)) ++I; + if (&*I != Retain) + return; + + // Turn it to an objc_retainAutoreleasedReturnValue.. + Changed = true; + ++NumPeeps; + cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); +} + +/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into +/// objc_retain if the operand is not a return value. Or, if it can be +/// paired with an objc_autoreleaseReturnValue, delete the pair and +/// return true. +bool +ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { + // Check for the argument being from an immediately preceding call. + Value *Arg = GetObjCArg(RetainRV); + CallSite CS(Arg); + if (Instruction *Call = CS.getInstruction()) + if (Call->getParent() == RetainRV->getParent()) { + BasicBlock::iterator I = Call; + ++I; + while (isNoopInstruction(I)) ++I; + if (&*I == RetainRV) + return false; + } + + // Check for being preceded by an objc_autoreleaseReturnValue on the same + // pointer. In this case, we can delete the pair. + BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); + if (I != Begin) { + do --I; while (I != Begin && isNoopInstruction(I)); + if (GetBasicInstructionClass(I) == IC_AutoreleaseRV && + GetObjCArg(I) == Arg) { + Changed = true; + ++NumPeeps; + EraseInstruction(I); + EraseInstruction(RetainRV); + return true; + } + } + + // Turn it to a plain objc_retain. + Changed = true; + ++NumPeeps; + cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent())); + return false; +} + +/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into +/// objc_autorelease if the result is not used as a return value. +void +ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) { + // Check for a return of the pointer value. + const Value *Ptr = GetObjCArg(AutoreleaseRV); + for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); + UI != UE; ++UI) { + const User *I = *UI; + if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV) + return; + } + + Changed = true; + ++NumPeeps; + cast<CallInst>(AutoreleaseRV)-> + setCalledFunction(getAutoreleaseCallee(F.getParent())); +} + +/// OptimizeIndividualCalls - Visit each call, one at a time, and make +/// simplifications without doing any additional analysis. +void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { + // Reset all the flags in preparation for recomputing them. + UsedInThisFunction = 0; + + // Visit all objc_* calls in F. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + InstructionClass Class = GetBasicInstructionClass(Inst); + + switch (Class) { + default: break; + + // Delete no-op casts. These function calls have special semantics, but + // the semantics are entirely implemented via lowering in the front-end, + // so by the time they reach the optimizer, they are just no-op calls + // which return their argument. + // + // There are gray areas here, as the ability to cast reference-counted + // pointers to raw void* and back allows code to break ARC assumptions, + // however these are currently considered to be unimportant. + case IC_NoopCast: + Changed = true; + ++NumNoops; + EraseInstruction(Inst); + continue; + + // If the pointer-to-weak-pointer is null, it's undefined behavior. + case IC_StoreWeak: + case IC_LoadWeak: + case IC_LoadWeakRetained: + case IC_InitWeak: + case IC_DestroyWeak: { + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(0))) { + const Type *Ty = CI->getArgOperand(0)->getType(); + new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), + Constant::getNullValue(Ty), + CI); + CI->replaceAllUsesWith(UndefValue::get(CI->getType())); + CI->eraseFromParent(); + continue; + } + break; + } + case IC_CopyWeak: + case IC_MoveWeak: { + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(0)) || + isNullOrUndef(CI->getArgOperand(1))) { + const Type *Ty = CI->getArgOperand(0)->getType(); + new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), + Constant::getNullValue(Ty), + CI); + CI->replaceAllUsesWith(UndefValue::get(CI->getType())); + CI->eraseFromParent(); + continue; + } + break; + } + case IC_Retain: + OptimizeRetainCall(F, Inst); + break; + case IC_RetainRV: + if (OptimizeRetainRVCall(F, Inst)) + continue; + break; + case IC_AutoreleaseRV: + OptimizeAutoreleaseRVCall(F, Inst); + break; + } + + // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. + if (IsAutorelease(Class) && Inst->use_empty()) { + CallInst *Call = cast<CallInst>(Inst); + const Value *Arg = Call->getArgOperand(0); + Arg = FindSingleUseIdentifiedObject(Arg); + if (Arg) { + Changed = true; + ++NumAutoreleases; + + // Create the declaration lazily. + LLVMContext &C = Inst->getContext(); + CallInst *NewCall = + CallInst::Create(getReleaseCallee(F.getParent()), + Call->getArgOperand(0), "", Call); + NewCall->setMetadata(ImpreciseReleaseMDKind, + MDNode::get(C, ArrayRef<Value *>())); + EraseInstruction(Call); + Inst = NewCall; + Class = IC_Release; + } + } + + // For functions which can never be passed stack arguments, add + // a tail keyword. + if (IsAlwaysTail(Class)) { + Changed = true; + cast<CallInst>(Inst)->setTailCall(); + } + + // Set nounwind as needed. + if (IsNoThrow(Class)) { + Changed = true; + cast<CallInst>(Inst)->setDoesNotThrow(); + } + + if (!IsNoopOnNull(Class)) { + UsedInThisFunction |= 1 << Class; + continue; + } + + const Value *Arg = GetObjCArg(Inst); + + // ARC calls with null are no-ops. Delete them. + if (isNullOrUndef(Arg)) { + Changed = true; + ++NumNoops; + EraseInstruction(Inst); + continue; + } + + // Keep track of which of retain, release, autorelease, and retain_block + // are actually present in this function. + UsedInThisFunction |= 1 << Class; + + // If Arg is a PHI, and one or more incoming values to the + // PHI are null, and the call is control-equivalent to the PHI, and there + // are no relevant side effects between the PHI and the call, the call + // could be pushed up to just those paths with non-null incoming values. + // For now, don't bother splitting critical edges for this. + SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist; + Worklist.push_back(std::make_pair(Inst, Arg)); + do { + std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val(); + Inst = Pair.first; + Arg = Pair.second; + + const PHINode *PN = dyn_cast<PHINode>(Arg); + if (!PN) continue; + + // Determine if the PHI has any null operands, or any incoming + // critical edges. + bool HasNull = false; + bool HasCriticalEdges = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = + StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); + if (isNullOrUndef(Incoming)) + HasNull = true; + else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back()) + .getNumSuccessors() != 1) { + HasCriticalEdges = true; + break; + } + } + // If we have null operands and no critical edges, optimize. + if (!HasCriticalEdges && HasNull) { + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + + // Check that there is nothing that cares about the reference + // count between the call and the phi. + FindDependencies(NeedsPositiveRetainCount, Arg, + Inst->getParent(), Inst, + DependingInstructions, Visited, PA); + if (DependingInstructions.size() == 1 && + *DependingInstructions.begin() == PN) { + Changed = true; + ++NumPartialNoops; + // Clone the call into each predecessor that has a non-null value. + CallInst *CInst = cast<CallInst>(Inst); + const Type *ParamTy = CInst->getArgOperand(0)->getType(); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = + StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); + if (!isNullOrUndef(Incoming)) { + CallInst *Clone = cast<CallInst>(CInst->clone()); + Value *Op = PN->getIncomingValue(i); + Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); + if (Op->getType() != ParamTy) + Op = new BitCastInst(Op, ParamTy, "", InsertPos); + Clone->setArgOperand(0, Op); + Clone->insertBefore(InsertPos); + Worklist.push_back(std::make_pair(Clone, Incoming)); + } + } + // Erase the original call. + EraseInstruction(CInst); + continue; + } + } + } while (!Worklist.empty()); + } +} + +/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible +/// control flow, or other CFG structures where moving code across the edge +/// would result in it being executed more. +void +ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + BBState &MyStates) const { + // If any top-down local-use or possible-dec has a succ which is earlier in + // the sequence, forget it. + for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(), + E = MyStates.top_down_ptr_end(); I != E; ++I) + switch (I->second.GetSeq()) { + default: break; + case S_Use: { + const Value *Arg = I->first; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) + switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) { + case S_None: + case S_CanRelease: + MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + SomeSuccHasSame = false; + break; + case S_Use: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + } + case S_CanRelease: { + const Value *Arg = I->first; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) + switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) { + case S_None: + MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + SomeSuccHasSame = false; + break; + case S_CanRelease: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + } + } +} + +bool +ObjCARCOpt::VisitBottomUp(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains) { + bool NestingDetected = false; + BBState &MyStates = BBStates[BB]; + + // Merge the states from each successor to compute the initial state + // for the current block. + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + succ_const_iterator SI(TI), SE(TI, false); + if (SI == SE) + MyStates.SetAsExit(); + else + do { + const BasicBlock *Succ = *SI++; + if (Succ == BB) + continue; + DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); + if (I == BBStates.end()) + continue; + MyStates.InitFromSucc(I->second); + while (SI != SE) { + Succ = *SI++; + if (Succ != BB) { + I = BBStates.find(Succ); + if (I != BBStates.end()) + MyStates.MergeSucc(I->second); + } + } + break; + } while (SI != SE); + + // Visit all the instructions, bottom-up. + for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { + Instruction *Inst = llvm::prior(I); + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + + // If we see two releases in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second release, which may allow us to + // eliminate the first release too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) + NestingDetected = true; + + S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind)); + S.RRI.clear(); + S.RRI.KnownIncremented = S.IsKnownIncremented(); + S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); + S.RRI.Calls.insert(Inst); + + S.IncrementRefCount(); + break; + } + case IC_RetainBlock: + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + S.DecrementRefCount(); + + switch (S.GetSeq()) { + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_CanRelease: + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + Retains[Inst] = S.RRI; + } + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + break; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearBottomUpPointers(); + continue; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + continue; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), + ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible retains and releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + // Check for a retain (we're going bottom-up here). + S.DecrementRefCount(); + + // Check for a release. + if (!IsRetain(Class) && Class != IC_RetainBlock) + switch (Seq) { + case S_Use: + S.SetSeq(S_CanRelease); + continue; + case S_CanRelease: + case S_Release: + case S_MovableRelease: + case S_Stop: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_Release: + case S_MovableRelease: + if (CanUse(Inst, Ptr, PA, Class)) { + S.RRI.ReverseInsertPts.clear(); + S.RRI.ReverseInsertPts.insert(Inst); + S.SetSeq(S_Use); + } else if (Seq == S_Release && + (Class == IC_User || Class == IC_CallOrUser)) { + // Non-movable releases depend on any possible objc pointer use. + S.SetSeq(S_Stop); + S.RRI.ReverseInsertPts.clear(); + S.RRI.ReverseInsertPts.insert(Inst); + } + break; + case S_Stop: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_CanRelease: + case S_Use: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + } + + return NestingDetected; +} + +bool +ObjCARCOpt::VisitTopDown(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + DenseMap<Value *, RRInfo> &Releases) { + bool NestingDetected = false; + BBState &MyStates = BBStates[BB]; + + // Merge the states from each predecessor to compute the initial state + // for the current block. + const_pred_iterator PI(BB), PE(BB, false); + if (PI == PE) + MyStates.SetAsEntry(); + else + do { + const BasicBlock *Pred = *PI++; + if (Pred == BB) + continue; + DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); + if (I == BBStates.end()) + continue; + MyStates.InitFromPred(I->second); + while (PI != PE) { + Pred = *PI++; + if (Pred != BB) { + I = BBStates.find(Pred); + if (I != BBStates.end()) + MyStates.MergePred(I->second); + } + } + break; + } while (PI != PE); + + // Visit all the instructions, top-down. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + Instruction *Inst = I; + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_RetainBlock: + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + // If we see two retains in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second retain, which may allow us to + // eliminate the first retain too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Retain) + NestingDetected = true; + + S.SetSeq(S_Retain); + S.RRI.clear(); + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + S.RRI.KnownIncremented = S.IsKnownIncremented(); + S.RRI.Calls.insert(Inst); + } + + S.IncrementRefCount(); + break; + } + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + S.DecrementRefCount(); + + switch (S.GetSeq()) { + case S_Retain: + case S_CanRelease: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_Use: + S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); + Releases[Inst] = S.RRI; + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + break; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearTopDownPointers(); + continue; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + continue; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), + ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (!IsRetain(Class) && Class != IC_RetainBlock && + CanAlterRefCount(Inst, Ptr, PA, Class)) { + // Check for a release. + S.DecrementRefCount(); + + // Check for a release. + switch (Seq) { + case S_Retain: + S.SetSeq(S_CanRelease); + S.RRI.ReverseInsertPts.clear(); + S.RRI.ReverseInsertPts.insert(Inst); + + // One call can't cause a transition from S_Retain to S_CanRelease + // and S_CanRelease to S_Use. If we've made the first transition, + // we're done. + continue; + case S_Use: + case S_CanRelease: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_CanRelease: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_Use: + case S_Retain: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + } + } + + CheckForCFGHazards(BB, BBStates, MyStates); + return NestingDetected; +} + +// Visit - Visit the function both top-down and bottom-up. +bool +ObjCARCOpt::Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases) { + // Use postorder for bottom-up, and reverse-postorder for top-down, because we + // magically know that loops will be well behaved, i.e. they won't repeatedly + // call retain on a single pointer without doing a release. + bool BottomUpNestingDetected = false; + SmallVector<BasicBlock *, 8> PostOrder; + for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) { + BasicBlock *BB = *I; + PostOrder.push_back(BB); + + BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); + } + + // Iterate through the post-order in reverse order, achieving a + // reverse-postorder traversal. We don't use the ReversePostOrderTraversal + // class here because it works by computing its own full postorder iteration, + // recording the sequence, and playing it back in reverse. Since we're already + // doing a full iteration above, we can just record the sequence manually and + // avoid the cost of having ReversePostOrderTraversal compute it. + bool TopDownNestingDetected = false; + for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator + RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI) + TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases); + + return TopDownNestingDetected && BottomUpNestingDetected; +} + +/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove. +void ObjCARCOpt::MoveCalls(Value *Arg, + RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + SmallVectorImpl<Instruction *> &DeadInsts) { + const Type *ArgTy = Arg->getType(); + const Type *ParamTy = + (RetainRVFunc ? RetainRVFunc : + RetainFunc ? RetainFunc : + RetainBlockFunc)->arg_begin()->getType(); + + // Insert the new retain and release calls. + for (SmallPtrSet<Instruction *, 2>::const_iterator + PI = ReleasesToMove.ReverseInsertPts.begin(), + PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) { + Instruction *InsertPt = *PI; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = + CallInst::Create(RetainsToMove.IsRetainBlock ? + RetainBlockFunc : RetainFunc, + MyArg, "", InsertPt); + Call->setDoesNotThrow(); + if (!RetainsToMove.IsRetainBlock) + Call->setTailCall(); + } + for (SmallPtrSet<Instruction *, 2>::const_iterator + PI = RetainsToMove.ReverseInsertPts.begin(), + PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) { + Instruction *LastUse = *PI; + Instruction *InsertPts[] = { 0, 0, 0 }; + if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) { + // We can't insert code immediately after an invoke instruction, so + // insert code at the beginning of both successor blocks instead. + // The invoke's return value isn't available in the unwind block, + // but our releases will never depend on it, because they must be + // paired with retains from before the invoke. + InsertPts[0] = II->getNormalDest()->getFirstNonPHI(); + InsertPts[1] = II->getUnwindDest()->getFirstNonPHI(); + } else { + // Insert code immediately after the last use. + InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse)); + } + + for (Instruction **I = InsertPts; *I; ++I) { + Instruction *InsertPt = *I; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt); + // Attach a clang.imprecise_release metadata tag, if appropriate. + if (MDNode *M = ReleasesToMove.ReleaseMetadata) + Call->setMetadata(ImpreciseReleaseMDKind, M); + Call->setDoesNotThrow(); + if (ReleasesToMove.IsTailCallRelease) + Call->setTailCall(); + } + } + + // Delete the original retain and release calls. + for (SmallPtrSet<Instruction *, 2>::const_iterator + AI = RetainsToMove.Calls.begin(), + AE = RetainsToMove.Calls.end(); AI != AE; ++AI) { + Instruction *OrigRetain = *AI; + Retains.blot(OrigRetain); + DeadInsts.push_back(OrigRetain); + } + for (SmallPtrSet<Instruction *, 2>::const_iterator + AI = ReleasesToMove.Calls.begin(), + AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) { + Instruction *OrigRelease = *AI; + Releases.erase(OrigRelease); + DeadInsts.push_back(OrigRelease); + } +} + +bool +ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> + &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases) { + bool AnyPairsCompletelyEliminated = false; + RRInfo RetainsToMove; + RRInfo ReleasesToMove; + SmallVector<Instruction *, 4> NewRetains; + SmallVector<Instruction *, 4> NewReleases; + SmallVector<Instruction *, 8> DeadInsts; + + for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), + E = Retains.end(); I != E; ) { + Value *V = (I++)->first; + if (!V) continue; // blotted + + Instruction *Retain = cast<Instruction>(V); + Value *Arg = GetObjCArg(Retain); + + // If the object being released is in static or stack storage, we know it's + // not being managed by ObjC reference counting, so we can delete pairs + // regardless of what possible decrements or uses lie between them. + bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); + + // If a pair happens in a region where it is known that the reference count + // is already incremented, we can similarly ignore possible decrements. + bool KnownIncrementedTD = true, KnownIncrementedBU = true; + + // Connect the dots between the top-down-collected RetainsToMove and + // bottom-up-collected ReleasesToMove to form sets of related calls. + // This is an iterative process so that we connect multiple releases + // to multiple retains if needed. + unsigned OldDelta = 0; + unsigned NewDelta = 0; + unsigned OldCount = 0; + unsigned NewCount = 0; + bool FirstRelease = true; + bool FirstRetain = true; + NewRetains.push_back(Retain); + for (;;) { + for (SmallVectorImpl<Instruction *>::const_iterator + NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { + Instruction *NewRetain = *NI; + MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain); + assert(It != Retains.end()); + const RRInfo &NewRetainRRI = It->second; + KnownIncrementedTD &= NewRetainRRI.KnownIncremented; + for (SmallPtrSet<Instruction *, 2>::const_iterator + LI = NewRetainRRI.Calls.begin(), + LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewRetainRelease = *LI; + DenseMap<Value *, RRInfo>::const_iterator Jt = + Releases.find(NewRetainRelease); + if (Jt == Releases.end()) + goto next_retain; + const RRInfo &NewRetainReleaseRRI = Jt->second; + assert(NewRetainReleaseRRI.Calls.count(NewRetain)); + if (ReleasesToMove.Calls.insert(NewRetainRelease)) { + OldDelta -= + BBStates[NewRetainRelease->getParent()].GetAllPathCount(); + + // Merge the ReleaseMetadata and IsTailCallRelease values. + if (FirstRelease) { + ReleasesToMove.ReleaseMetadata = + NewRetainReleaseRRI.ReleaseMetadata; + ReleasesToMove.IsTailCallRelease = + NewRetainReleaseRRI.IsTailCallRelease; + FirstRelease = false; + } else { + if (ReleasesToMove.ReleaseMetadata != + NewRetainReleaseRRI.ReleaseMetadata) + ReleasesToMove.ReleaseMetadata = 0; + if (ReleasesToMove.IsTailCallRelease != + NewRetainReleaseRRI.IsTailCallRelease) + ReleasesToMove.IsTailCallRelease = false; + } + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet<Instruction *, 2>::const_iterator + RI = NewRetainReleaseRRI.ReverseInsertPts.begin(), + RE = NewRetainReleaseRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (ReleasesToMove.ReverseInsertPts.insert(RIP)) + NewDelta -= BBStates[RIP->getParent()].GetAllPathCount(); + } + NewReleases.push_back(NewRetainRelease); + } + } + } + NewRetains.clear(); + if (NewReleases.empty()) break; + + // Back the other way. + for (SmallVectorImpl<Instruction *>::const_iterator + NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { + Instruction *NewRelease = *NI; + DenseMap<Value *, RRInfo>::const_iterator It = + Releases.find(NewRelease); + assert(It != Releases.end()); + const RRInfo &NewReleaseRRI = It->second; + KnownIncrementedBU &= NewReleaseRRI.KnownIncremented; + for (SmallPtrSet<Instruction *, 2>::const_iterator + LI = NewReleaseRRI.Calls.begin(), + LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewReleaseRetain = *LI; + MapVector<Value *, RRInfo>::const_iterator Jt = + Retains.find(NewReleaseRetain); + if (Jt == Retains.end()) + goto next_retain; + const RRInfo &NewReleaseRetainRRI = Jt->second; + assert(NewReleaseRetainRRI.Calls.count(NewRelease)); + if (RetainsToMove.Calls.insert(NewReleaseRetain)) { + unsigned PathCount = + BBStates[NewReleaseRetain->getParent()].GetAllPathCount(); + OldDelta += PathCount; + OldCount += PathCount; + + // Merge the IsRetainBlock values. + if (FirstRetain) { + RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; + FirstRetain = false; + } else if (ReleasesToMove.IsRetainBlock != + NewReleaseRetainRRI.IsRetainBlock) + // It's not possible to merge the sequences if one uses + // objc_retain and the other uses objc_retainBlock. + goto next_retain; + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet<Instruction *, 2>::const_iterator + RI = NewReleaseRetainRRI.ReverseInsertPts.begin(), + RE = NewReleaseRetainRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (RetainsToMove.ReverseInsertPts.insert(RIP)) { + PathCount = BBStates[RIP->getParent()].GetAllPathCount(); + NewDelta += PathCount; + NewCount += PathCount; + } + } + NewRetains.push_back(NewReleaseRetain); + } + } + } + NewReleases.clear(); + if (NewRetains.empty()) break; + } + + // If the pointer is known incremented, we can safely delete the pair + // regardless of what's between them. + if (KnownIncrementedTD || KnownIncrementedBU) { + RetainsToMove.ReverseInsertPts.clear(); + ReleasesToMove.ReverseInsertPts.clear(); + NewCount = 0; + } + + // Determine whether the original call points are balanced in the retain and + // release calls through the program. If not, conservatively don't touch + // them. + // TODO: It's theoretically possible to do code motion in this case, as + // long as the existing imbalances are maintained. + if (OldDelta != 0) + goto next_retain; + + // Determine whether the new insertion points we computed preserve the + // balance of retain and release calls through the program. + // TODO: If the fully aggressive solution isn't valid, try to find a + // less aggressive solution which is. + if (NewDelta != 0) + goto next_retain; + + // Ok, everything checks out and we're all set. Let's move some code! + Changed = true; + AnyPairsCompletelyEliminated = NewCount == 0; + NumRRs += OldCount - NewCount; + MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts); + + next_retain: + NewReleases.clear(); + NewRetains.clear(); + RetainsToMove.clear(); + ReleasesToMove.clear(); + } + + // Now that we're done moving everything, we can delete the newly dead + // instructions, as we no longer need them as insert points. + while (!DeadInsts.empty()) + EraseInstruction(DeadInsts.pop_back_val()); + + return AnyPairsCompletelyEliminated; +} + +/// OptimizeWeakCalls - Weak pointer optimizations. +void ObjCARCOpt::OptimizeWeakCalls(Function &F) { + // First, do memdep-style RLE and S2L optimizations. We can't use memdep + // itself because it uses AliasAnalysis and we need to do provenance + // queries instead. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + InstructionClass Class = GetBasicInstructionClass(Inst); + if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained) + continue; + + // Delete objc_loadWeak calls with no users. + if (Class == IC_LoadWeak && Inst->use_empty()) { + Inst->eraseFromParent(); + continue; + } + + // TODO: For now, just look for an earlier available version of this value + // within the same block. Theoretically, we could do memdep-style non-local + // analysis too, but that would want caching. A better approach would be to + // use the technique that EarlyCSE uses. + inst_iterator Current = llvm::prior(I); + BasicBlock *CurrentBB = Current.getBasicBlockIterator(); + for (BasicBlock::iterator B = CurrentBB->begin(), + J = Current.getInstructionIterator(); + J != B; --J) { + Instruction *EarlierInst = &*llvm::prior(J); + InstructionClass EarlierClass = GetInstructionClass(EarlierInst); + switch (EarlierClass) { + case IC_LoadWeak: + case IC_LoadWeakRetained: { + // If this is loading from the same pointer, replace this load's value + // with that one. + CallInst *Call = cast<CallInst>(Inst); + CallInst *EarlierCall = cast<CallInst>(EarlierInst); + Value *Arg = Call->getArgOperand(0); + Value *EarlierArg = EarlierCall->getArgOperand(0); + switch (PA.getAA()->alias(Arg, EarlierArg)) { + case AliasAnalysis::MustAlias: + Changed = true; + // If the load has a builtin retain, insert a plain retain for it. + if (Class == IC_LoadWeakRetained) { + CallInst *CI = + CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, + "", Call); + CI->setTailCall(); + } + // Zap the fully redundant load. + Call->replaceAllUsesWith(EarlierCall); + Call->eraseFromParent(); + goto clobbered; + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + goto clobbered; + case AliasAnalysis::NoAlias: + break; + } + break; + } + case IC_StoreWeak: + case IC_InitWeak: { + // If this is storing to the same pointer and has the same size etc. + // replace this load's value with the stored value. + CallInst *Call = cast<CallInst>(Inst); + CallInst *EarlierCall = cast<CallInst>(EarlierInst); + Value *Arg = Call->getArgOperand(0); + Value *EarlierArg = EarlierCall->getArgOperand(0); + switch (PA.getAA()->alias(Arg, EarlierArg)) { + case AliasAnalysis::MustAlias: + Changed = true; + // If the load has a builtin retain, insert a plain retain for it. + if (Class == IC_LoadWeakRetained) { + CallInst *CI = + CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, + "", Call); + CI->setTailCall(); + } + // Zap the fully redundant load. + Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); + Call->eraseFromParent(); + goto clobbered; + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + goto clobbered; + case AliasAnalysis::NoAlias: + break; + } + break; + } + case IC_MoveWeak: + case IC_CopyWeak: + // TOOD: Grab the copied value. + goto clobbered; + case IC_AutoreleasepoolPush: + case IC_None: + case IC_User: + // Weak pointers are only modified through the weak entry points + // (and arbitrary calls, which could call the weak entry points). + break; + default: + // Anything else could modify the weak pointer. + goto clobbered; + } + } + clobbered:; + } + + // Then, for each destroyWeak with an alloca operand, check to see if + // the alloca and all its users can be zapped. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + InstructionClass Class = GetBasicInstructionClass(Inst); + if (Class != IC_DestroyWeak) + continue; + + CallInst *Call = cast<CallInst>(Inst); + Value *Arg = Call->getArgOperand(0); + if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) { + for (Value::use_iterator UI = Alloca->use_begin(), + UE = Alloca->use_end(); UI != UE; ++UI) { + Instruction *UserInst = cast<Instruction>(*UI); + switch (GetBasicInstructionClass(UserInst)) { + case IC_InitWeak: + case IC_StoreWeak: + case IC_DestroyWeak: + continue; + default: + goto done; + } + } + Changed = true; + for (Value::use_iterator UI = Alloca->use_begin(), + UE = Alloca->use_end(); UI != UE; ) { + CallInst *UserInst = cast<CallInst>(*UI++); + if (!UserInst->use_empty()) + UserInst->replaceAllUsesWith(UserInst->getOperand(1)); + UserInst->eraseFromParent(); + } + Alloca->eraseFromParent(); + done:; + } + } +} + +/// OptimizeSequences - Identify program paths which execute sequences of +/// retains and releases which can be eliminated. +bool ObjCARCOpt::OptimizeSequences(Function &F) { + /// Releases, Retains - These are used to store the results of the main flow + /// analysis. These use Value* as the key instead of Instruction* so that the + /// map stays valid when we get around to rewriting code and calls get + /// replaced by arguments. + DenseMap<Value *, RRInfo> Releases; + MapVector<Value *, RRInfo> Retains; + + /// BBStates, This is used during the traversal of the function to track the + /// states for each identified object at each block. + DenseMap<const BasicBlock *, BBState> BBStates; + + // Analyze the CFG of the function, and all instructions. + bool NestingDetected = Visit(F, BBStates, Retains, Releases); + + // Transform. + return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected; +} + +/// OptimizeReturns - Look for this pattern: +/// +/// %call = call i8* @something(...) +/// %2 = call i8* @objc_retain(i8* %call) +/// %3 = call i8* @objc_autorelease(i8* %2) +/// ret i8* %3 +/// +/// And delete the retain and autorelease. +/// +/// Otherwise if it's just this: +/// +/// %3 = call i8* @objc_autorelease(i8* %2) +/// ret i8* %3 +/// +/// convert the autorelease to autoreleaseRV. +void ObjCARCOpt::OptimizeReturns(Function &F) { + if (!F.getReturnType()->isPointerTy()) + return; + + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + BasicBlock *BB = FI; + ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back()); + if (!Ret) continue; + + const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0)); + FindDependencies(NeedsPositiveRetainCount, Arg, + BB, Ret, DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Autorelease = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + if (!Autorelease) + goto next_block; + InstructionClass AutoreleaseClass = + GetBasicInstructionClass(Autorelease); + if (!IsAutorelease(AutoreleaseClass)) + goto next_block; + if (GetObjCArg(Autorelease) != Arg) + goto next_block; + + DependingInstructions.clear(); + Visited.clear(); + + // Check that there is nothing that can affect the reference + // count between the autorelease and the retain. + FindDependencies(CanChangeRetainCount, Arg, + BB, Autorelease, DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Retain = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + + // Check that we found a retain with the same argument. + if (!Retain || + !IsRetain(GetBasicInstructionClass(Retain)) || + GetObjCArg(Retain) != Arg) + goto next_block; + + DependingInstructions.clear(); + Visited.clear(); + + // Convert the autorelease to an autoreleaseRV, since it's + // returning the value. + if (AutoreleaseClass == IC_Autorelease) { + Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent())); + AutoreleaseClass = IC_AutoreleaseRV; + } + + // Check that there is nothing that can affect the reference + // count between the retain and the call. + FindDependencies(CanChangeRetainCount, Arg, BB, Retain, + DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Call = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + + // Check that the pointer is the return value of the call. + if (!Call || Arg != Call) + goto next_block; + + // Check that the call is a regular call. + InstructionClass Class = GetBasicInstructionClass(Call); + if (Class != IC_CallOrUser && Class != IC_Call) + goto next_block; + + // If so, we can zap the retain and autorelease. + Changed = true; + ++NumRets; + EraseInstruction(Retain); + EraseInstruction(Autorelease); + } + } + } + + next_block: + DependingInstructions.clear(); + Visited.clear(); + } +} + +bool ObjCARCOpt::doInitialization(Module &M) { + if (!EnableARCOpts) + return false; + + Run = ModuleHasARC(M); + if (!Run) + return false; + + // Identify the imprecise release metadata kind. + ImpreciseReleaseMDKind = + M.getContext().getMDKindID("clang.imprecise_release"); + + // Identify the declarations for objc_retain and friends. + RetainFunc = M.getFunction("objc_retain"); + RetainBlockFunc = M.getFunction("objc_retainBlock"); + RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue"); + ReleaseFunc = M.getFunction("objc_release"); + + // Intuitively, objc_retain and others are nocapture, however in practice + // they are not, because they return their argument value. And objc_release + // calls finalizers. + + // These are initialized lazily. + RetainRVCallee = 0; + AutoreleaseRVCallee = 0; + ReleaseCallee = 0; + RetainCallee = 0; + AutoreleaseCallee = 0; + + return false; +} + +bool ObjCARCOpt::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + Changed = false; + + PA.setAA(&getAnalysis<AliasAnalysis>()); + + // This pass performs several distinct transformations. As a compile-time aid + // when compiling code that isn't ObjC, skip these if the relevant ObjC + // library functions aren't declared. + + // Preliminary optimizations. This also computs UsedInThisFunction. + OptimizeIndividualCalls(F); + + // Optimizations for weak pointers. + if (UsedInThisFunction & ((1 << IC_LoadWeak) | + (1 << IC_LoadWeakRetained) | + (1 << IC_StoreWeak) | + (1 << IC_InitWeak) | + (1 << IC_CopyWeak) | + (1 << IC_MoveWeak) | + (1 << IC_DestroyWeak))) + OptimizeWeakCalls(F); + + // Optimizations for retain+release pairs. + if (UsedInThisFunction & ((1 << IC_Retain) | + (1 << IC_RetainRV) | + (1 << IC_RetainBlock))) + if (UsedInThisFunction & (1 << IC_Release)) + // Run OptimizeSequences until it either stops making changes or + // no retain+release pair nesting is detected. + while (OptimizeSequences(F)) {} + + // Optimizations if objc_autorelease is used. + if (UsedInThisFunction & + ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV))) + OptimizeReturns(F); + + return Changed; +} + +void ObjCARCOpt::releaseMemory() { + PA.clear(); +} + +//===----------------------------------------------------------------------===// +// ARC contraction. +//===----------------------------------------------------------------------===// + +// TODO: ObjCARCContract could insert PHI nodes when uses aren't +// dominated by single calls. + +#include "llvm/Operator.h" +#include "llvm/InlineAsm.h" +#include "llvm/Analysis/Dominators.h" + +STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); + +namespace { + /// ObjCARCContract - Late ARC optimizations. These change the IR in a way + /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late. + class ObjCARCContract : public FunctionPass { + bool Changed; + AliasAnalysis *AA; + DominatorTree *DT; + ProvenanceAnalysis PA; + + /// Run - A flag indicating whether this optimization pass should run. + bool Run; + + /// StoreStrongCallee, etc. - Declarations for ObjC runtime + /// functions, for use in creating calls to them. These are initialized + /// lazily to avoid cluttering up the Module with unused declarations. + Constant *StoreStrongCallee, + *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee; + + /// RetainRVMarker - The inline asm string to insert between calls and + /// RetainRV calls to make the optimization work on targets which need it. + const MDString *RetainRVMarker; + + Constant *getStoreStrongCallee(Module *M); + Constant *getRetainAutoreleaseCallee(Module *M); + Constant *getRetainAutoreleaseRVCallee(Module *M); + + bool ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet<Instruction *, 4> + &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> + &Visited); + + void ContractRelease(Instruction *Release, + inst_iterator &Iter); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + public: + static char ID; + ObjCARCContract() : FunctionPass(ID) { + initializeObjCARCContractPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCContract::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) + +Pass *llvm::createObjCARCContractPass() { + return new ObjCARCContract(); +} + +void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTree>(); + AU.setPreservesCFG(); +} + +Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { + if (!StoreStrongCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + std::vector<Type *> Params; + Params.push_back(I8XX); + Params.push_back(I8X); + + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + Attributes.addAttr(1, Attribute::NoCapture); + + StoreStrongCallee = + M->getOrInsertFunction( + "objc_storeStrong", + FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), + Attributes); + } + return StoreStrongCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { + if (!RetainAutoreleaseCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + const FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainAutoreleaseCallee = + M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes); + } + return RetainAutoreleaseCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { + if (!RetainAutoreleaseRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + const FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainAutoreleaseRVCallee = + M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, + Attributes); + } + return RetainAutoreleaseRVCallee; +} + +/// ContractAutorelease - Merge an autorelease with a retain into a fused +/// call. +bool +ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet<Instruction *, 4> + &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> + &Visited) { + const Value *Arg = GetObjCArg(Autorelease); + + // Check that there are no instructions between the retain and the autorelease + // (such as an autorelease_pop) which may change the count. + CallInst *Retain = 0; + if (Class == IC_AutoreleaseRV) + FindDependencies(RetainAutoreleaseRVDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + else + FindDependencies(RetainAutoreleaseDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + + Visited.clear(); + if (DependingInstructions.size() != 1) { + DependingInstructions.clear(); + return false; + } + + Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + DependingInstructions.clear(); + + if (!Retain || + GetBasicInstructionClass(Retain) != IC_Retain || + GetObjCArg(Retain) != Arg) + return false; + + Changed = true; + ++NumPeeps; + + if (Class == IC_AutoreleaseRV) + Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent())); + else + Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent())); + + EraseInstruction(Autorelease); + return true; +} + +/// ContractRelease - Attempt to merge an objc_release with a store, load, and +/// objc_retain to form an objc_storeStrong. This can be a little tricky because +/// the instructions don't always appear in order, and there may be unrelated +/// intervening instructions. +void ObjCARCContract::ContractRelease(Instruction *Release, + inst_iterator &Iter) { + LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release)); + if (!Load || Load->isVolatile()) return; + + // For now, require everything to be in one basic block. + BasicBlock *BB = Release->getParent(); + if (Load->getParent() != BB) return; + + // Walk down to find the store. + BasicBlock::iterator I = Load, End = BB->end(); + ++I; + AliasAnalysis::Location Loc = AA->getLocation(Load); + while (I != End && + (&*I == Release || + IsRetain(GetBasicInstructionClass(I)) || + !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod))) + ++I; + StoreInst *Store = dyn_cast<StoreInst>(I); + if (!Store || Store->isVolatile()) return; + if (Store->getPointerOperand() != Loc.Ptr) return; + + Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); + + // Walk up to find the retain. + I = Store; + BasicBlock::iterator Begin = BB->begin(); + while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) + --I; + Instruction *Retain = I; + if (GetBasicInstructionClass(Retain) != IC_Retain) return; + if (GetObjCArg(Retain) != New) return; + + Changed = true; + ++NumStoreStrongs; + + LLVMContext &C = Release->getContext(); + const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + const Type *I8XX = PointerType::getUnqual(I8X); + + Value *Args[] = { Load->getPointerOperand(), New }; + if (Args[0]->getType() != I8XX) + Args[0] = new BitCastInst(Args[0], I8XX, "", Store); + if (Args[1]->getType() != I8X) + Args[1] = new BitCastInst(Args[1], I8X, "", Store); + CallInst *StoreStrong = + CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), + Args, "", Store); + StoreStrong->setDoesNotThrow(); + StoreStrong->setDebugLoc(Store->getDebugLoc()); + + if (&*Iter == Store) ++Iter; + Store->eraseFromParent(); + Release->eraseFromParent(); + EraseInstruction(Retain); + if (Load->use_empty()) + Load->eraseFromParent(); +} + +bool ObjCARCContract::doInitialization(Module &M) { + Run = ModuleHasARC(M); + if (!Run) + return false; + + // These are initialized lazily. + StoreStrongCallee = 0; + RetainAutoreleaseCallee = 0; + RetainAutoreleaseRVCallee = 0; + + // Initialize RetainRVMarker. + RetainRVMarker = 0; + if (NamedMDNode *NMD = + M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) + if (NMD->getNumOperands() == 1) { + const MDNode *N = NMD->getOperand(0); + if (N->getNumOperands() == 1) + if (const MDString *S = dyn_cast<MDString>(N->getOperand(0))) + RetainRVMarker = S; + } + + return false; +} + +bool ObjCARCContract::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + Changed = false; + AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<DominatorTree>(); + + PA.setAA(&getAnalysis<AliasAnalysis>()); + + // For ObjC library calls which return their argument, replace uses of the + // argument with uses of the call return value, if it dominates the use. This + // reduces register pressure. + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + // Only these library routines return their argument. In particular, + // objc_retainBlock does not necessarily return its argument. + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + break; + case IC_Autorelease: + case IC_AutoreleaseRV: + if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) + continue; + break; + case IC_RetainRV: { + // If we're compiling for a target which needs a special inline-asm + // marker to do the retainAutoreleasedReturnValue optimization, + // insert it now. + if (!RetainRVMarker) + break; + BasicBlock::iterator BBI = Inst; + --BBI; + while (isNoopInstruction(BBI)) --BBI; + if (&*BBI == GetObjCArg(Inst)) { + InlineAsm *IA = + InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), + /*isVarArg=*/false), + RetainRVMarker->getString(), + /*Constraints=*/"", /*hasSideEffects=*/true); + CallInst::Create(IA, "", Inst); + } + break; + } + case IC_InitWeak: { + // objc_initWeak(p, null) => *p = null + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(1))) { + Value *Null = + ConstantPointerNull::get(cast<PointerType>(CI->getType())); + Changed = true; + new StoreInst(Null, CI->getArgOperand(0), CI); + CI->replaceAllUsesWith(Null); + CI->eraseFromParent(); + } + continue; + } + case IC_Release: + ContractRelease(Inst, I); + continue; + default: + continue; + } + + // Don't use GetObjCArg because we don't want to look through bitcasts + // and such; to do the replacement, the argument must have type i8*. + const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0); + for (;;) { + // If we're compiling bugpointed code, don't get in trouble. + if (!isa<Instruction>(Arg) && !isa<Argument>(Arg)) + break; + // Look through the uses of the pointer. + for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ) { + Use &U = UI.getUse(); + unsigned OperandNo = UI.getOperandNo(); + ++UI; // Increment UI now, because we may unlink its element. + if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser())) + if (Inst != UserInst && DT->dominates(Inst, UserInst)) { + Changed = true; + Instruction *Replacement = Inst; + const Type *UseTy = U.get()->getType(); + if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) { + // For PHI nodes, insert the bitcast in the predecessor block. + unsigned ValNo = + PHINode::getIncomingValueNumForOperand(OperandNo); + BasicBlock *BB = + PHI->getIncomingBlock(ValNo); + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + &BB->back()); + for (unsigned i = 0, e = PHI->getNumIncomingValues(); + i != e; ++i) + if (PHI->getIncomingBlock(i) == BB) { + // Keep the UI iterator valid. + if (&PHI->getOperandUse( + PHINode::getOperandNumForIncomingValue(i)) == + &UI.getUse()) + ++UI; + PHI->setIncomingValue(i, Replacement); + } + } else { + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", UserInst); + U.set(Replacement); + } + } + } + + // If Arg is a no-op casted pointer, strip one level of casts and + // iterate. + if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg)) + Arg = BI->getOperand(0); + else if (isa<GEPOperator>(Arg) && + cast<GEPOperator>(Arg)->hasAllZeroIndices()) + Arg = cast<GEPOperator>(Arg)->getPointerOperand(); + else if (isa<GlobalAlias>(Arg) && + !cast<GlobalAlias>(Arg)->mayBeOverridden()) + Arg = cast<GlobalAlias>(Arg)->getAliasee(); + else + break; + } + } + + return Changed; +} diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index c1dfe15..e6341ae 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -812,7 +812,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // because we can percolate the negate out. Watch for minint, which // cannot be positivified. if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) - if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) { + if (CI->isNegative() && !CI->isMinValue(true)) { Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); assert(!Duplicates.count(Factor) && "Shouldn't have two constant factors, missed a canonicalize"); diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 32a0506..302c287 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -48,7 +48,12 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopUnswitchPass(Registry); initializeLoopIdiomRecognizePass(Registry); initializeLowerAtomicPass(Registry); + initializeLowerExpectIntrinsicPass(Registry); initializeMemCpyOptPass(Registry); + initializeObjCARCAliasAnalysisPass(Registry); + initializeObjCARCExpandPass(Registry); + initializeObjCARCContractPass(Registry); + initializeObjCARCOptPass(Registry); initializeReassociatePass(Registry); initializeRegToMemPass(Registry); initializeSCCPPass(Registry); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 8938b28..7d6349c 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -30,6 +30,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/Loads.h" @@ -152,7 +153,8 @@ namespace { void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); - static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); + static MemTransferInst *isOnlyCopiedFromConstantGlobal( + AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete); }; // SROA_DT - SROA that uses DominatorTree. @@ -228,16 +230,30 @@ class ConvertToScalarInfo { /// which means that mem2reg can't promote it. bool IsNotTrivial; + /// ScalarKind - Tracks the kind of alloca being considered for promotion, + /// computed based on the uses of the alloca rather than the LLVM type system. + enum { + Unknown, + + // Accesses via GEPs that are consistent with element access of a vector + // type. This will not be converted into a vector unless there is a later + // access using an actual vector type. + ImplicitVector, + + // Accesses via vector operations and GEPs that are consistent with the + // layout of a vector type. + Vector, + + // An integer bag-of-bits with bitwise operations for insertion and + // extraction. Any combination of types can be converted into this kind + // of scalar. + Integer + } ScalarKind; + /// VectorTy - This tracks the type that we should promote the vector to if /// it is possible to turn it into a vector. This starts out null, and if it /// isn't possible to turn into a vector type, it gets set to VoidTy. - const Type *VectorTy; - - /// HadAVector - True if there is at least one vector access to the alloca. - /// We don't want to turn random arrays into vectors and use vector element - /// insert/extract, but if there are element accesses to something that is - /// also declared as a vector, we do want to promote to a vector. - bool HadAVector; + const VectorType *VectorTy; /// HadNonMemTransferAccess - True if there is at least one access to the /// alloca that is not a MemTransferInst. We don't want to turn structs into @@ -246,14 +262,14 @@ class ConvertToScalarInfo { public: explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) - : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0), - HadAVector(false), HadNonMemTransferAccess(false) { } + : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown), + VectorTy(0), HadNonMemTransferAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore); + void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset); bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); @@ -274,6 +290,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { if (!CanConvertToScalar(AI, 0) || !IsNotTrivial) return 0; + // If an alloca has only memset / memcpy uses, it may still have an Unknown + // ScalarKind. Treat it as an Integer below. + if (ScalarKind == Unknown) + ScalarKind = Integer; + + // FIXME: It should be possible to promote the vector type up to the alloca's + // size. + if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8) + ScalarKind = Integer; + // If we were able to find a vector type that can handle this with // insert/extract elements, and if there was at least one use that had // a vector type, promote this to a vector. We don't want to promote @@ -281,14 +307,15 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. const Type *NewTy; - if (VectorTy && VectorTy->isVectorTy() && HadAVector) { + if (ScalarKind == Vector) { + assert(VectorTy && "Missing type for vector scalar."); DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. } else { unsigned BitWidth = AllocaSize * 8; - if (!HadAVector && !HadNonMemTransferAccess && - !TD.fitsInLegalInteger(BitWidth)) + if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && + !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth)) return 0; DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); @@ -300,8 +327,9 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { return NewAI; } -/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy) -/// so far at the offset specified by Offset (which is specified in bytes). +/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type +/// (VectorTy) so far at the offset specified by Offset (which is specified in +/// bytes). /// /// There are three cases we handle here: /// 1) A union of vector types of the same size and potentially its elements. @@ -316,11 +344,11 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, - bool IsLoadOrStore) { +void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, + uint64_t Offset) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. - if (VectorTy && VectorTy->isVoidTy()) + if (ScalarKind == Integer) return; // If this could be contributing to a vector, analyze it. @@ -336,7 +364,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, // Full width accesses can be ignored, because they can always be turned // into bitcasts. unsigned EltSize = In->getPrimitiveSizeInBits()/8; - if (IsLoadOrStore && EltSize == AllocaSize) + if (EltSize == AllocaSize) return; // If we're accessing something that could be an element of a vector, see @@ -345,11 +373,12 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) { if (!VectorTy) { + ScalarKind = ImplicitVector; VectorTy = VectorType::get(In, AllocaSize/EltSize); return; } - unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType() + unsigned CurrentEltSize = VectorTy->getElementType() ->getPrimitiveSizeInBits()/8; if (EltSize == CurrentEltSize) return; @@ -361,16 +390,13 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, // Otherwise, we have a case that we can't handle with an optimized vector // form. We can still turn this into a large integer. - VectorTy = Type::getVoidTy(In->getContext()); + ScalarKind = Integer; } -/// MergeInVectorType - Handles the vector case of MergeInType, returning true -/// if the type was successfully merged and false otherwise. +/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore, +/// returning true if the type was successfully merged and false otherwise. bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, uint64_t Offset) { - // Remember if we saw a vector type. - HadAVector = true; - // TODO: Support nonzero offsets? if (Offset != 0) return false; @@ -382,19 +408,22 @@ bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, // If this the first vector we see, remember the type so that we know the // element size. if (!VectorTy) { + ScalarKind = Vector; VectorTy = VInTy; return true; } - unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth(); + unsigned BitWidth = VectorTy->getBitWidth(); unsigned InBitWidth = VInTy->getBitWidth(); // Vectors of the same size can be converted using a simple bitcast. - if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) + if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) { + ScalarKind = Vector; return true; + } - const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType(); - const Type *InElementTy = cast<VectorType>(VInTy)->getElementType(); + const Type *ElementTy = VectorTy->getElementType(); + const Type *InElementTy = VInTy->getElementType(); // Do not allow mixed integer and floating-point accesses from vectors of // different sizes. @@ -429,6 +458,7 @@ bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, } // Pick the largest of the two vector types. + ScalarKind = Vector; if (InBitWidth > BitWidth) VectorTy = VInTy; @@ -456,7 +486,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (LI->getType()->isX86_MMXTy()) return false; HadNonMemTransferAccess = true; - MergeInType(LI->getType(), Offset, true); + MergeInTypeForLoadOrStore(LI->getType(), Offset); continue; } @@ -467,7 +497,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; HadNonMemTransferAccess = true; - MergeInType(SI->getOperand(0)->getType(), Offset, true); + MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset); continue; } @@ -498,10 +528,22 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // If this is a constant sized memset of a constant value (e.g. 0) we can // handle it. if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { - // Store of constant value and constant size. - if (!isa<ConstantInt>(MSI->getValue()) || - !isa<ConstantInt>(MSI->getLength())) + // Store of constant value. + if (!isa<ConstantInt>(MSI->getValue())) + return false; + + // Store of constant size. + ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength()); + if (!Len) return false; + + // If the size differs from the alloca, we can only convert the alloca to + // an integer bag-of-bits. + // FIXME: This should handle all of the cases that are currently accepted + // as vector element insertions. + if (Len->getZExtValue() != AllocaSize || Offset != 0) + ScalarKind = Integer; + IsNotTrivial = true; // Can't be mem2reg'd. HadNonMemTransferAccess = true; continue; @@ -1053,16 +1095,37 @@ bool SROA::runOnFunction(Function &F) { namespace { class AllocaPromoter : public LoadAndStorePromoter { AllocaInst *AI; + DIBuilder *DIB; + SmallVector<DbgDeclareInst *, 4> DDIs; + SmallVector<DbgValueInst *, 4> DVIs; public: AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, - DbgDeclareInst *DD, DIBuilder *&DB) - : LoadAndStorePromoter(Insts, S, DD, DB), AI(0) {} + DIBuilder *DB) + : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {} void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { // Remember which alloca we're promoting (for isInstInList). this->AI = AI; + if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI)) + for (Value::use_iterator UI = DebugNode->use_begin(), + E = DebugNode->use_end(); UI != E; ++UI) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) + DDIs.push_back(DDI); + else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI)) + DVIs.push_back(DVI); + LoadAndStorePromoter::run(Insts); AI->eraseFromParent(); + for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), + E = DDIs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + DDI->eraseFromParent(); + } + for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), + E = DVIs.end(); I != E; ++I) { + DbgValueInst *DVI = *I; + DVI->eraseFromParent(); + } } virtual bool isInstInList(Instruction *I, @@ -1071,6 +1134,45 @@ public: return LI->getOperand(0) == AI; return cast<StoreInst>(I)->getPointerOperand() == AI; } + + virtual void updateDebugInfo(Instruction *Inst) const { + for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), + E = DDIs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + ConvertDebugDeclareToDebugValue(DDI, LI, *DIB); + } + for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), + E = DVIs.end(); I != E; ++I) { + DbgValueInst *DVI = *I; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + Instruction *DbgVal = NULL; + // If an argument is zero extended then use argument directly. The ZExt + // may be zapped by an optimization pass in future. + Argument *ExtendedArg = NULL; + if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); + if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); + if (ExtendedArg) + DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0, + DIVariable(DVI->getVariable()), + SI); + else + DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, + DIVariable(DVI->getVariable()), + SI); + DbgVal->setDebugLoc(DVI->getDebugLoc()); + } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + Instruction *DbgVal = + DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0, + DIVariable(DVI->getVariable()), LI); + DbgVal->setDebugLoc(DVI->getDebugLoc()); + } + } + } }; } // end anon namespace @@ -1262,7 +1364,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { LoadInst *TrueLoad = Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); LoadInst *FalseLoad = - Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t"); + Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f"); // Transfer alignment and TBAA info if present. TrueLoad->setAlignment(LI->getAlignment()); @@ -1340,10 +1442,9 @@ bool SROA::performPromotion(Function &F) { DT = &getAnalysis<DominatorTree>(); BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function - + DIBuilder DIB(*F.getParent()); bool Changed = false; SmallVector<Instruction*, 64> Insts; - DIBuilder *DIB = 0; while (1) { Allocas.clear(); @@ -1367,11 +1468,7 @@ bool SROA::performPromotion(Function &F) { for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) Insts.push_back(cast<Instruction>(*UI)); - - DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); - if (DDI && !DIB) - DIB = new DIBuilder(*AI->getParent()->getParent()->getParent()); - AllocaPromoter(Insts, SSA, DDI, DIB).run(AI, Insts); + AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts); Insts.clear(); } } @@ -1379,10 +1476,6 @@ bool SROA::performPromotion(Function &F) { Changed = true; } - // FIXME: Is there a better way to handle the lazy initialization of DIB - // so that there doesn't need to be an explicit delete? - delete DIB; - return Changed; } @@ -1403,8 +1496,8 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) { // performScalarRepl - This algorithm is a simple worklist driven algorithm, -// which runs on all of the malloc/alloca instructions in the function, removing -// them if they are only used by getelementptr instructions. +// which runs on all of the alloca instructions in the function, removing them +// if they are only used by getelementptr instructions. // bool SROA::performScalarRepl(Function &F) { std::vector<AllocaInst*> WorkList; @@ -1438,12 +1531,15 @@ bool SROA::performScalarRepl(Function &F) { // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. - if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); - Constant *TheSrc = cast<Constant>(TheCopy->getSource()); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + ToDelete[i]->eraseFromParent(); + Constant *TheSrc = cast<Constant>(Copy->getSource()); AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); - TheCopy->eraseFromParent(); // Don't mutate the global. + Copy->eraseFromParent(); // Don't mutate the global. AI->eraseFromParent(); ++NumGlobals; Changed = true; @@ -2467,8 +2563,14 @@ static bool PointsToConstantGlobal(Value *V) { /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to /// the alloca, and if the source pointer is a pointer to a constant global, we /// can optimize this. -static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - bool isOffset) { +static bool +isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, + bool isOffset, + SmallVector<Instruction *, 4> &LifetimeMarkers) { + // We track lifetime intrinsics as we encounter them. If we decide to go + // ahead and replace the value with the global, this lets the caller quickly + // eliminate the markers. + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { User *U = cast<Instruction>(*UI); @@ -2480,7 +2582,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset, + LifetimeMarkers)) return false; continue; } @@ -2488,7 +2591,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, - isOffset || !GEP->hasAllZeroIndices())) + isOffset || !GEP->hasAllZeroIndices(), + LifetimeMarkers)) return false; continue; } @@ -2514,6 +2618,16 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, continue; } + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + LifetimeMarkers.push_back(II); + continue; + } + } + // If this is isn't our memcpy/memmove, reject it as something we can't // handle. MemTransferInst *MI = dyn_cast<MemTransferInst>(U); @@ -2550,9 +2664,11 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only /// modified by a copy from a constant global. If we can prove this, we can /// replace any uses of the alloca with uses of the global directly. -MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { +MemTransferInst * +SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI, + SmallVector<Instruction*, 4> &ToDelete) { MemTransferInst *TheCopy = 0; - if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) + if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete)) return TheCopy; return 0; } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 7e9cc80..a66b3e3 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -91,8 +91,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { static void ChangeToCall(InvokeInst *II) { BasicBlock *BB = II->getParent(); SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); - CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(), - Args.end(), "", II); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 6247b03..7c415e5 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -992,9 +992,9 @@ struct FFSOpt : public LibCallOptimization { } // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 - const Type *ArgType = Op->getType(); + Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), - Intrinsic::cttz, &ArgType, 1); + Intrinsic::cttz, ArgType); Value *V = B.CreateCall(F, Op, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp"); V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp"); diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 92464e8..b4f74f9 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -153,13 +153,13 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); - // Move all definitions in the successor to the predecessor... - PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); - // Make all PHI nodes that referred to BB now refer to Pred as their // source... BB->replaceAllUsesWith(PredBB); + // Move all definitions in the successor to the predecessor... + PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + // Inherit predecessors name if it exists. if (!PredBB->hasName()) PredBB->takeName(BB); diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index d6206a3..92ce500 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -193,44 +193,22 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. - if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) { - // This conceptually does: - // foreach (PHINode *PN in DestBB) - // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB); - // but is optimized for two cases. - - if (APHI->getNumIncomingValues() <= 8) { // Small # preds case. - unsigned BBIdx = 0; - for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { - // We no longer enter through TIBB, now we come in through NewBB. - // Revector exactly one entry in the PHI node that used to come from - // TIBB to come from NewBB. - PHINode *PN = cast<PHINode>(I); - - // Reuse the previous value of BBIdx if it lines up. In cases where we - // have multiple phi nodes with *lots* of predecessors, this is a speed - // win because we don't have to scan the PHI looking for TIBB. This - // happens because the BB list of PHI nodes are usually in the same - // order. - if (PN->getIncomingBlock(BBIdx) != TIBB) - BBIdx = PN->getBasicBlockIndex(TIBB); - PN->setIncomingBlock(BBIdx, NewBB); - } - } else { - // However, the foreach loop is slow for blocks with lots of predecessors - // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk - // the user list of TIBB to find the PHI nodes. - SmallPtrSet<PHINode*, 16> UpdatedPHIs; - - for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); - UI != E; ) { - Value::use_iterator Use = UI++; - if (PHINode *PN = dyn_cast<PHINode>(*Use)) { - // Remove one entry from each PHI. - if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) - PN->setOperand(Use.getOperandNo(), NewBB); - } - } + { + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { + // We no longer enter through TIBB, now we come in through NewBB. + // Revector exactly one entry in the PHI node that used to come from + // TIBB to come from NewBB. + PHINode *PN = cast<PHINode>(I); + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != TIBB) + BBIdx = PN->getBasicBlockIndex(TIBB); + PN->setIncomingBlock(BBIdx, NewBB); } } diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 5b76bb2..204c2c6 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -5,7 +5,6 @@ add_llvm_library(LLVMTransformUtils BreakCriticalEdges.cpp BuildLibCalls.cpp CloneFunction.cpp - CloneLoop.cpp CloneModule.cpp CodeExtractor.cpp DemoteRegToStack.cpp @@ -15,6 +14,7 @@ add_llvm_library(LLVMTransformUtils Local.cpp LoopSimplify.cpp LoopUnroll.cpp + LowerExpectIntrinsic.cpp LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index d967ceb..6ea831f 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -140,7 +140,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, bool ModuleLevelChanges, ClonedCodeInfo *CodeInfo) { - std::vector<const Type*> ArgTypes; + std::vector<Type*> ArgTypes; // The user might be deleting arguments to the function by specifying them in // the VMap. If so, we need to not add the arguments to the arg ty vector @@ -342,18 +342,6 @@ ConstantFoldMappedInstruction(const Instruction *I) { Ops.size(), TD); } -static DebugLoc -UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL, - LLVMContext &Ctx) { - DebugLoc NewLoc = TheCallDL; - if (MDNode *IA = InsnDL.getInlinedAt(Ctx)) - NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL, - Ctx); - - return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(), - InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx)); -} - /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for @@ -418,50 +406,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, if (PHINode *PN = dyn_cast<PHINode>(I)) { // Skip over all PHI nodes, remembering them for later. BasicBlock::const_iterator OldI = BI->begin(); - for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) { - if (I->hasMetadata()) { - if (!TheCallDL.isUnknown()) { - DebugLoc IDL = I->getDebugLoc(); - if (!IDL.isUnknown()) { - DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL, - I->getContext()); - I->setDebugLoc(NewDL); - } - } else { - // The cloned instruction has dbg info but the call instruction - // does not have dbg info. Remove dbg info from cloned instruction. - I->setDebugLoc(DebugLoc()); - } - } + for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) PHIToResolve.push_back(cast<PHINode>(OldI)); - } } - // FIXME: - // FIXME: - // FIXME: Unclone all this metadata stuff. - // FIXME: - // FIXME: - // Otherwise, remap the rest of the instructions normally. - for (; I != NewBB->end(); ++I) { - if (I->hasMetadata()) { - if (!TheCallDL.isUnknown()) { - DebugLoc IDL = I->getDebugLoc(); - if (!IDL.isUnknown()) { - DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL, - I->getContext()); - I->setDebugLoc(NewDL); - } - } else { - // The cloned instruction has dbg info but the call instruction - // does not have dbg info. Remove dbg info from cloned instruction. - I->setDebugLoc(DebugLoc()); - } - } + for (; I != NewBB->end(); ++I) RemapInstruction(I, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); - } } // Defer PHI resolution until rest of function is resolved, PHI resolution @@ -572,12 +524,12 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // removed, so we just need to splice the blocks. BI->eraseFromParent(); - // Move all the instructions in the succ to the pred. - I->getInstList().splice(I->end(), Dest->getInstList()); - // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(I); + // Move all the instructions in the succ to the pred. + I->getInstList().splice(I->end(), Dest->getInstList()); + // Remove the dest block. Dest->eraseFromParent(); diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp deleted file mode 100644 index 87dd141..0000000 --- a/lib/Transforms/Utils/CloneLoop.cpp +++ /dev/null @@ -1,128 +0,0 @@ -//===- CloneLoop.cpp - Clone loop nest ------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the CloneLoop interface which makes a copy of a loop. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/BasicBlock.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/Dominators.h" - - -using namespace llvm; - -/// CloneDominatorInfo - Clone a basic block's dominator tree. It is expected -/// that the basic block is already cloned. -static void CloneDominatorInfo(BasicBlock *BB, - ValueToValueMapTy &VMap, - DominatorTree *DT) { - - assert (DT && "DominatorTree is not available"); - ValueToValueMapTy::iterator BI = VMap.find(BB); - assert (BI != VMap.end() && "BasicBlock clone is missing"); - BasicBlock *NewBB = cast<BasicBlock>(BI->second); - - // NewBB already got dominator info. - if (DT->getNode(NewBB)) - return; - - assert (DT->getNode(BB) && "BasicBlock does not have dominator info"); - // Entry block is not expected here. Infinite loops are not to cloned. - assert (DT->getNode(BB)->getIDom() && "BasicBlock does not have immediate dominator"); - BasicBlock *BBDom = DT->getNode(BB)->getIDom()->getBlock(); - - // NewBB's dominator is either BB's dominator or BB's dominator's clone. - BasicBlock *NewBBDom = BBDom; - ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom); - if (BBDomI != VMap.end()) { - NewBBDom = cast<BasicBlock>(BBDomI->second); - if (!DT->getNode(NewBBDom)) - CloneDominatorInfo(BBDom, VMap, DT); - } - DT->addNewBlock(NewBB, NewBBDom); -} - -/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap -/// using old blocks to new blocks mapping. -Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, - ValueToValueMapTy &VMap, Pass *P) { - - DominatorTree *DT = NULL; - if (P) - DT = P->getAnalysisIfAvailable<DominatorTree>(); - - SmallVector<BasicBlock *, 16> NewBlocks; - - // Populate loop nest. - SmallVector<Loop *, 8> LoopNest; - LoopNest.push_back(OrigL); - - - Loop *NewParentLoop = NULL; - do { - Loop *L = LoopNest.pop_back_val(); - Loop *NewLoop = new Loop(); - - if (!NewParentLoop) - NewParentLoop = NewLoop; - - LPM->insertLoop(NewLoop, L->getParentLoop()); - - // Clone Basic Blocks. - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone"); - VMap[BB] = NewBB; - if (P) - LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L); - NewLoop->addBasicBlockToLoop(NewBB, LI->getBase()); - NewBlocks.push_back(NewBB); - } - - // Clone dominator info. - if (DT) - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - CloneDominatorInfo(BB, VMap, DT); - } - - // Process sub loops - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) - LoopNest.push_back(*I); - } while (!LoopNest.empty()); - - // Remap instructions to reference operands from VMap. - for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(), - NBE = NewBlocks.end(); NBItr != NBE; ++NBItr) { - BasicBlock *NB = *NBItr; - for(BasicBlock::iterator BI = NB->begin(), BE = NB->end(); - BI != BE; ++BI) { - Instruction *Insn = BI; - for (unsigned index = 0, num_ops = Insn->getNumOperands(); - index != num_ops; ++index) { - Value *Op = Insn->getOperand(index); - ValueToValueMapTy::iterator OpItr = VMap.find(Op); - if (OpItr != VMap.end()) - Insn->setOperand(index, OpItr->second); - } - } - } - - BasicBlock *Latch = OrigL->getLoopLatch(); - Function *F = Latch->getParent(); - F->getBasicBlockList().insert(OrigL->getHeader(), - NewBlocks.begin(), NewBlocks.end()); - - - return NewParentLoop; -} diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 1046c38..a08fa35 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -15,7 +15,6 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Module.h" #include "llvm/DerivedTypes.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/Constant.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -32,20 +31,13 @@ Module *llvm::CloneModule(const Module *M) { return CloneModule(M, VMap); } -Module *llvm::CloneModule(const Module *M, - ValueToValueMapTy &VMap) { - // First off, we need to create the new module... +Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { + // First off, we need to create the new module. Module *New = new Module(M->getModuleIdentifier(), M->getContext()); New->setDataLayout(M->getDataLayout()); New->setTargetTriple(M->getTargetTriple()); New->setModuleInlineAsm(M->getModuleInlineAsm()); - - // Copy all of the type symbol table entries over. - const TypeSymbolTable &TST = M->getTypeSymbolTable(); - for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); - TI != TE; ++TI) - New->addTypeName(TI->first, TI->second); - + // Copy all of the dependent libraries over. for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I) New->addLibrary(*I); @@ -88,8 +80,7 @@ Module *llvm::CloneModule(const Module *M, I != E; ++I) { GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); if (I->hasInitializer()) - GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(), - VMap, RF_None))); + GV->setInitializer(MapValue(I->getInitializer(), VMap)); GV->setLinkage(I->getLinkage()); GV->setThreadLocal(I->isThreadLocal()); GV->setConstant(I->isConstant()); @@ -119,8 +110,8 @@ Module *llvm::CloneModule(const Module *M, I != E; ++I) { GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); GA->setLinkage(I->getLinkage()); - if (const Constant* C = I->getAliasee()) - GA->setAliasee(cast<Constant>(MapValue(C, VMap, RF_None))); + if (const Constant *C = I->getAliasee()) + GA->setAliasee(MapValue(C, VMap)); } // And named metadata.... @@ -129,8 +120,7 @@ Module *llvm::CloneModule(const Module *M, const NamedMDNode &NMD = *I; NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, - RF_None))); + NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap)); } return New; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 8c133ea..0813523 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -258,7 +258,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, default: RetTy = Type::getInt16Ty(header->getContext()); break; } - std::vector<const Type*> paramTy; + std::vector<Type*> paramTy; // Add the types of the input values to the function's argument list for (Values::const_iterator i = inputs.begin(), @@ -279,7 +279,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, } DEBUG(dbgs() << "Function type: " << *RetTy << " f("); - for (std::vector<const Type*>::iterator i = paramTy.begin(), + for (std::vector<Type*>::iterator i = paramTy.begin(), e = paramTy.end(); i != e; ++i) DEBUG(dbgs() << **i << ", "); DEBUG(dbgs() << ")\n"); @@ -403,7 +403,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, AllocaInst *Struct = 0; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector<const Type*> ArgTypes; + std::vector<Type*> ArgTypes; for (Values::iterator v = StructValues.begin(), ve = StructValues.end(); v != ve; ++v) ArgTypes.push_back((*v)->getType()); @@ -429,7 +429,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } // Emit the call to the function - CallInst *call = CallInst::Create(newFunction, params.begin(), params.end(), + CallInst *call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); codeReplacer->getInstList().push_back(call); diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 8416170..d5b382e 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -449,11 +449,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i) NewSelector.push_back(Outer->getArgOperand(i)); - CallInst *NewInner = CallInst::Create(Inner->getCalledValue(), - NewSelector.begin(), - NewSelector.end(), - "", - Inner); + CallInst *NewInner = + IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector); // No need to copy attributes, calling convention, etc. NewInner->takeName(Inner); Inner->replaceAllUsesWith(NewInner); @@ -489,8 +486,7 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, Invoke.getOuterUnwindDest(), - InvokeArgs.begin(), InvokeArgs.end(), - CI->getName(), BB); + InvokeArgs, CI->getName(), BB); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); @@ -664,7 +660,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, LLVMContext &Context = Arg->getContext(); - const Type *VoidPtrTy = Type::getInt8PtrTy(Context); + Type *VoidPtrTy = Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; @@ -681,10 +677,10 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), &*Caller->begin()->begin()); // Emit a memcpy. - const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; + Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::memcpy, - Tys, 3); + Tys); Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); @@ -703,7 +699,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, ConstantInt::get(Type::getInt32Ty(Context), 1), ConstantInt::getFalse(Context) // isVolatile }; - CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); + IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs); // Uses of the argument in the function should use our new alloca // instead. @@ -734,17 +730,52 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { if (AI->getType() == Int8PtrTy) return isUsedByLifetimeMarker(AI); - // Do a scan to find all the bitcasts to i8*. + // Do a scan to find all the casts to i8*. for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E; ++I) { if (I->getType() != Int8PtrTy) continue; - if (!isa<BitCastInst>(*I)) continue; + if (I->stripPointerCasts() != AI) continue; if (isUsedByLifetimeMarker(*I)) return true; } return false; } +/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively +/// update InlinedAtEntry of a DebugLoc. +static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, + const DebugLoc &InlinedAtDL, + LLVMContext &Ctx) { + if (MDNode *IA = DL.getInlinedAt(Ctx)) { + DebugLoc NewInlinedAtDL + = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx); + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), + NewInlinedAtDL.getAsMDNode(Ctx)); + } + + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), + InlinedAtDL.getAsMDNode(Ctx)); +} + + +/// fixupLineNumbers - Update inlined instructions' line numbers to +/// to encode location where these instructions are inlined. +static void fixupLineNumbers(Function *Fn, Function::iterator FI, + Instruction *TheCall) { + DebugLoc TheCallDL = TheCall->getDebugLoc(); + if (TheCallDL.isUnknown()) + return; + + for (; FI != Fn->end(); ++FI) { + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + DebugLoc DL = BI->getDebugLoc(); + if (!DL.isUnknown()) + BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); + } + } +} + // InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. @@ -847,6 +878,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); + + // Update inlined instructions' line number information. + fixupLineNumbers(Caller, FirstNewBlock, TheCall); } // If there are any alloca instructions in the block that used to be the entry @@ -920,13 +954,13 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. - CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack", - FirstNewBlock->begin()); + CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) + .CreateCall(StackSave, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { - CallInst::Create(StackRestore, SavedPtr, "", Returns[i]); + IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); } // Count the number of StackRestore calls we insert. @@ -938,7 +972,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - CallInst::Create(StackRestore, SavedPtr, "", UI); + IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr); ++NumStackRestores; } } @@ -1098,15 +1132,15 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } + // Update PHI nodes that use the ReturnBB to use the AfterCallBB. + BasicBlock *ReturnBB = Returns[0]->getParent(); + ReturnBB->replaceAllUsesWith(AfterCallBB); + // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. - BasicBlock *ReturnBB = Returns[0]->getParent(); AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); - // Update PHI nodes that use the ReturnBB to use the AfterCallBB. - ReturnBB->replaceAllUsesWith(AfterCallBB); - // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); @@ -1126,8 +1160,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Splice the code entry block into calling block, right before the // unconditional branch. - OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes + OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); // Remove the unconditional branch. OrigBB->getInstList().erase(Br); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 3bdbaa5..0f6d9ae 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -427,10 +427,6 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { BasicBlock *PredBB = DestBB->getSinglePredecessor(); assert(PredBB && "Block doesn't have a single predecessor!"); - // Splice all the instructions from PredBB to DestBB. - PredBB->getTerminator()->eraseFromParent(); - DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); - // Zap anything that took the address of DestBB. Not doing this will give the // address an invalid value. if (DestBB->hasAddressTaken()) { @@ -445,6 +441,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { // Anything that branched to PredBB now branches to DestBB. PredBB->replaceAllUsesWith(DestBB); + // Splice all the instructions from PredBB to DestBB. + PredBB->getTerminator()->eraseFromParent(); + DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); + if (P) { DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); if (DT) { @@ -536,9 +536,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an /// unconditional branch, and contains no instructions other than PHI nodes, -/// potential debug intrinsics and the branch. If possible, eliminate BB by -/// rewriting all the predecessors to branch to the successor block and return -/// true. If we can't transform, return false. +/// potential side-effect free intrinsics and the branch. If possible, +/// eliminate BB by rewriting all the predecessors to branch to the successor +/// block and return true. If we can't transform, return false. bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { assert(BB != &BB->getParent()->getEntryBlock() && "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); @@ -613,13 +613,15 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { } } - while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { - if (Succ->getSinglePredecessor()) { - // BB is the only predecessor of Succ, so Succ will end up with exactly - // the same predecessors BB had. - Succ->getInstList().splice(Succ->begin(), - BB->getInstList(), BB->begin()); - } else { + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + + // Copy over any phi, debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList()); + } else { + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. assert(PN->use_empty() && "There shouldn't be any uses here!"); PN->eraseFromParent(); @@ -642,7 +644,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { bool Changed = false; // This implementation doesn't currently consider undef operands - // specially. Theroetically, two phis which are identical except for + // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. // Map from PHI hash values to PHI nodes. If multiple PHIs have @@ -660,12 +662,17 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { // them, which helps expose duplicates, but we have to check all the // operands to be safe in case instcombine hasn't run. uintptr_t Hash = 0; + // This hash algorithm is quite weak as hash functions go, but it seems + // to do a good enough job for this particular purpose, and is very quick. for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) { - // This hash algorithm is quite weak as hash functions go, but it seems - // to do a good enough job for this particular purpose, and is very quick. Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I)); Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); } + for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end(); + I != E; ++I) { + Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I)); + Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); + } // Avoid colliding with the DenseMap sentinels ~0 and ~0-1. Hash >>= 1; // If we've never seen this hash value before, it's a unique PHI. @@ -706,39 +713,15 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// static unsigned enforceKnownAlignment(Value *V, unsigned Align, unsigned PrefAlign) { + V = V->stripPointerCasts(); - User *U = dyn_cast<User>(V); - if (!U) return Align; - - switch (Operator::getOpcode(U)) { - default: break; - case Instruction::BitCast: - return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - case Instruction::GetElementPtr: { - // If all indexes are zero, it is just the alignment of the base pointer. - bool AllZeroOperands = true; - for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) - if (!isa<Constant>(*i) || - !cast<Constant>(*i)->isNullValue()) { - AllZeroOperands = false; - break; - } - - if (AllZeroOperands) { - // Treat this like a bitcast. - return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - } - return Align; - } - case Instruction::Alloca: { - AllocaInst *AI = cast<AllocaInst>(V); + if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { // If there is a requested alignment and if this is an alloca, round up. if (AI->getAlignment() >= PrefAlign) return AI->getAlignment(); AI->setAlignment(PrefAlign); return PrefAlign; } - } if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { // If there is a large requested alignment and we can, bump up the alignment diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index f02ffd2..e79fb5a 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -375,6 +375,7 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), ".preheader", this); + NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName() << "\n"); diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 7da7271..6772511 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -47,6 +47,14 @@ static inline void RemapInstruction(Instruction *I, if (It != VMap.end()) I->setOperand(op, It->second); } + + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i)); + if (It != VMap.end()) + PN->setIncomingBlock(i, cast<BasicBlock>(It->second)); + } + } } /// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it @@ -75,13 +83,13 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { // Delete the unconditional branch from the predecessor... OnlyPred->getInstList().pop_back(); - // Move all definitions in the successor to the predecessor... - OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); - // Make all PHI nodes that referred to BB now refer to Pred as their // source... BB->replaceAllUsesWith(OnlyPred); + // Move all definitions in the successor to the predecessor... + OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); + std::string OldName = BB->getName(); // Erase basic block from the function... @@ -247,16 +255,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // the successor of the latch block. The successor of the exit block will // be updated specially after unrolling all the way. if (*BB != LatchBlock) - for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end(); - UI != UE;) { - Instruction *UseInst = cast<Instruction>(*UI); - ++UI; - if (isa<PHINode>(UseInst) && !L->contains(UseInst)) { - PHINode *phi = cast<PHINode>(UseInst); - Value *Incoming = phi->getIncomingValueForBlock(*BB); - phi->addIncoming(Incoming, New); - } - } + for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; + ++SI) + if (!L->contains(*SI)) + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { + Value *Incoming = phi->getIncomingValueForBlock(*BB); + phi->addIncoming(Incoming, New); + } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. @@ -288,24 +294,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // successor blocks, update them to use the appropriate values computed as the // last iteration of the loop. if (Count != 1) { - SmallPtrSet<PHINode*, 8> Users; - for (Value::use_iterator UI = LatchBlock->use_begin(), - UE = LatchBlock->use_end(); UI != UE; ++UI) - if (PHINode *phi = dyn_cast<PHINode>(*UI)) - Users.insert(phi); - BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]); - for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end(); + for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock); SI != SE; ++SI) { - PHINode *PN = *SI; - Value *InVal = PN->removeIncomingValue(LatchBlock, false); - // If this value was defined in the loop, take the value defined by the - // last iteration of the loop. - if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { - if (L->contains(InValI)) - InVal = LastValueMap[InVal]; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { + Value *InVal = PN->removeIncomingValue(LatchBlock, false); + // If this value was defined in the loop, take the value defined by the + // last iteration of the loop. + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { + if (L->contains(InValI)) + InVal = LastValueMap[InVal]; + } + PN->addIncoming(InVal, LastIterationBB); } - PN->addIncoming(InVal, LastIterationBB); } } @@ -352,11 +354,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); - // Merge adjacent basic blocks, if possible. - if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) { + } + } + + // Merge adjacent basic blocks, if possible. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); + if (Term->isUnconditional()) { + BasicBlock *Dest = Term->getSuccessor(0); + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); - std::replace(Headers.begin(), Headers.end(), Dest, Fold); - } } } diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp new file mode 100644 index 0000000..c1213fa --- /dev/null +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -0,0 +1,166 @@ +#define DEBUG_TYPE "lower-expect-intrinsic" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/BasicBlock.h" +#include "llvm/LLVMContext.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include <vector> + +using namespace llvm; + +STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled"); + +static cl::opt<uint32_t> +LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64), + cl::desc("Weight of the branch likely to be taken (default = 64)")); +static cl::opt<uint32_t> +UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4), + cl::desc("Weight of the branch unlikely to be taken (default = 4)")); + +namespace { + + class LowerExpectIntrinsic : public FunctionPass { + + bool HandleSwitchExpect(SwitchInst *SI); + + bool HandleIfExpect(BranchInst *BI); + + public: + static char ID; + LowerExpectIntrinsic() : FunctionPass(ID) { + initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + }; +} + + +bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { + CallInst *CI = dyn_cast<CallInst>(SI->getCondition()); + if (!CI) + return false; + + Function *Fn = CI->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) + return false; + + Value *ArgValue = CI->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!ExpectedValue) + return false; + + LLVMContext &Context = CI->getContext(); + const Type *Int32Ty = Type::getInt32Ty(Context); + + unsigned caseNo = SI->findCaseValue(ExpectedValue); + std::vector<Value *> Vec; + unsigned n = SI->getNumCases(); + Vec.resize(n + 1); // +1 for MDString + + Vec[0] = MDString::get(Context, "branch_weights"); + for (unsigned i = 0; i < n; ++i) { + Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight); + } + + MDNode *WeightsNode = llvm::MDNode::get(Context, Vec); + SI->setMetadata(LLVMContext::MD_prof, WeightsNode); + + SI->setCondition(ArgValue); + return true; +} + + +bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { + if (BI->isUnconditional()) + return false; + + // Handle non-optimized IR code like: + // %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1) + // %tobool = icmp ne i64 %expval, 0 + // br i1 %tobool, label %if.then, label %if.end + + ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition()); + if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE) + return false; + + CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0)); + if (!CI) + return false; + + Function *Fn = CI->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) + return false; + + Value *ArgValue = CI->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!ExpectedValue) + return false; + + LLVMContext &Context = CI->getContext(); + const Type *Int32Ty = Type::getInt32Ty(Context); + bool Likely = ExpectedValue->isOne(); + + // If expect value is equal to 1 it means that we are more likely to take + // branch 0, in other case more likely is branch 1. + Value *Ops[] = { + MDString::get(Context, "branch_weights"), + ConstantInt::get(Int32Ty, Likely ? LikelyBranchWeight : UnlikelyBranchWeight), + ConstantInt::get(Int32Ty, Likely ? UnlikelyBranchWeight : LikelyBranchWeight) + }; + + MDNode *WeightsNode = MDNode::get(Context, Ops); + BI->setMetadata(LLVMContext::MD_prof, WeightsNode); + + CmpI->setOperand(0, ArgValue); + return true; +} + + +bool LowerExpectIntrinsic::runOnFunction(Function &F) { + for (Function::iterator I = F.begin(), E = F.end(); I != E;) { + BasicBlock *BB = I++; + + // Create "block_weights" metadata. + if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + if (HandleIfExpect(BI)) + IfHandled++; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + if (HandleSwitchExpect(SI)) + IfHandled++; + } + + // remove llvm.expect intrinsics. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ) { + CallInst *CI = dyn_cast<CallInst>(BI++); + if (!CI) + continue; + + Function *Fn = CI->getCalledFunction(); + if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) { + Value *Exp = CI->getArgOperand(0); + CI->replaceAllUsesWith(Exp); + CI->eraseFromParent(); + } + } + } + + return false; +} + + +char LowerExpectIntrinsic::ID = 0; +INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' " + "Intrinsics", false, false) + +FunctionPass *llvm::createLowerExpectIntrinsicPass() { + return new LowerExpectIntrinsic(); +} diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 025ae0d..f77d19d 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -66,7 +66,7 @@ namespace { Constant *AbortFn; // Used for expensive EH support. - const Type *JBLinkTy; + StructType *JBLinkTy; GlobalVariable *JBListHead; Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn; bool useExpensiveEHSupport; @@ -120,24 +120,16 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, // doInitialization - Make sure that there is a prototype for abort in the // current module. bool LowerInvoke::doInitialization(Module &M) { - const Type *VoidPtrTy = - Type::getInt8PtrTy(M.getContext()); + const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; JBSize = JBSize ? JBSize : 200; - const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); - - { // The type is recursive, so use a type holder. - std::vector<const Type*> Elements; - Elements.push_back(JmpBufTy); - OpaqueType *OT = OpaqueType::get(M.getContext()); - Elements.push_back(PointerType::getUnqual(OT)); - PATypeHolder JBLType(StructType::get(M.getContext(), Elements)); - OT->refineAbstractTypeTo(JBLType.get()); // Complete the cycle. - JBLinkTy = JBLType.get(); - M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy); - } + Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); + + JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty"); + Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) }; + JBLinkTy->setBody(Elts); const Type *PtrJBList = PointerType::getUnqual(JBLinkTy); @@ -184,8 +176,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); // Insert a normal call instruction... CallInst *NewCall = CallInst::Create(II->getCalledValue(), - CallArgs.begin(), CallArgs.end(), - "",II); + CallArgs, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); @@ -265,8 +256,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, // Insert a normal call instruction. SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), - CallArgs.begin(), CallArgs.end(), "", - II); + CallArgs, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); @@ -573,7 +563,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { Type::getInt8PtrTy(F.getContext()), "tmp", UnwindBlock); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock); + CallInst::Create(LongJmpFn, Idx, "", UnwindBlock); new UnreachableInst(F.getContext(), UnwindBlock); // Set up the term block ("throw without a catch"). diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index a1736b9..e5a00f4 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -92,6 +93,22 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { return false; // Don't allow a store OF the AI, only INTO the AI. if (SI->isVolatile()) return false; + } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!onlyUsedByLifetimeMarkers(BCI)) + return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkers(GEPI)) + return false; } else { return false; } @@ -335,6 +352,31 @@ namespace { }; } // end of anonymous namespace +static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { + // Knowing that this alloca is promotable, we know that it's safe to kill all + // instructions except for load and store. + + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE;) { + Instruction *I = cast<Instruction>(*UI); + ++UI; + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + continue; + + if (!I->getType()->isVoidTy()) { + // The only users of this bitcast/GEP instruction are lifetime intrinsics. + // Follow the use/def chain to erase them now instead of leaving it for + // dead code elimination later. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE;) { + Instruction *Inst = cast<Instruction>(*UI); + ++UI; + Inst->eraseFromParent(); + } + } + I->eraseFromParent(); + } +} void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); @@ -353,6 +395,8 @@ void PromoteMem2Reg::run() { assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); + removeLifetimeIntrinsicUsers(AI); + if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index b336194..b47a7cc 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -16,7 +16,6 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" @@ -358,8 +357,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { LoadAndStorePromoter:: LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts, - SSAUpdater &S, DbgDeclareInst *DD, DIBuilder *DB, - StringRef BaseName) : SSA(S), DDI(DD), DIB(DB) { + SSAUpdater &S, StringRef BaseName) : SSA(S) { if (Insts.empty()) return; Value *SomeVal; @@ -407,8 +405,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { if (BlockUses.size() == 1) { // If it is a store, it is a trivial def of the value in the block. if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - if (DDI) - ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + updateDebugInfo(SI); SSA.AddAvailableValue(BB, SI->getOperand(0)); } else // Otherwise it is a load, queue it to rewrite as a live-in load. @@ -462,9 +459,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { if (StoreInst *SI = dyn_cast<StoreInst>(II)) { // If this is a store to an unrelated pointer, ignore it. if (!isInstInList(SI, Insts)) continue; - - if (DDI) - ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + updateDebugInfo(SI); // Remember that this is the active value in the block. StoredValue = SI->getOperand(0); @@ -522,7 +517,4 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { instructionDeleted(User); User->eraseFromParent(); } - - if (DDI) - DDI->eraseFromParent(); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 6df846c..9d9c324 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2211,8 +2211,7 @@ bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) { SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); CallInst *CI = Builder.CreateCall(II->getCalledValue(), - Args.begin(), Args.end(), - II->getName()); + Args, II->getName()); CI->setCallingConv(II->getCallingConv()); CI->setAttributes(II->getAttributes()); // If the invoke produced a value, the Call now does instead. @@ -2355,8 +2354,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); CallInst *CI = Builder.CreateCall(II->getCalledValue(), - Args.begin(), Args.end(), - II->getName()); + Args, II->getName()); CI->setCallingConv(II->getCallingConv()); CI->setAttributes(II->getAttributes()); // If the invoke produced a value, the call does now instead. @@ -2450,6 +2448,77 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { return !DeadCases.empty(); } +/// FindPHIForConditionForwarding - If BB would be eligible for simplification +/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated +/// by an unconditional branch), look at the phi node for BB in the successor +/// block and see if the incoming value is equal to CaseValue. If so, return +/// the phi node, and set PhiIndex to BB's index in the phi node. +static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, + BasicBlock *BB, + int *PhiIndex) { + if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) + return NULL; // BB must be empty to be a candidate for simplification. + if (!BB->getSinglePredecessor()) + return NULL; // BB must be dominated by the switch. + + BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator()); + if (!Branch || !Branch->isUnconditional()) + return NULL; // Terminator must be unconditional branch. + + BasicBlock *Succ = Branch->getSuccessor(0); + + BasicBlock::iterator I = Succ->begin(); + while (PHINode *PHI = dyn_cast<PHINode>(I++)) { + int Idx = PHI->getBasicBlockIndex(BB); + assert(Idx >= 0 && "PHI has no entry for predecessor?"); + + Value *InValue = PHI->getIncomingValue(Idx); + if (InValue != CaseValue) continue; + + *PhiIndex = Idx; + return PHI; + } + + return NULL; +} + +/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch +/// instruction to a phi node dominated by the switch, if that would mean that +/// some of the destination blocks of the switch can be folded away. +/// Returns true if a change is made. +static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { + typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap; + ForwardingNodesMap ForwardingNodes; + + for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case. + ConstantInt *CaseValue = SI->getCaseValue(I); + BasicBlock *CaseDest = SI->getSuccessor(I); + + int PhiIndex; + PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest, + &PhiIndex); + if (!PHI) continue; + + ForwardingNodes[PHI].push_back(PhiIndex); + } + + bool Changed = false; + + for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(), + E = ForwardingNodes.end(); I != E; ++I) { + PHINode *Phi = I->first; + SmallVector<int,4> &Indexes = I->second; + + if (Indexes.size() < 2) continue; + + for (size_t I = 0, E = Indexes.size(); I != E; ++I) + Phi->setIncomingValue(Indexes[I], SI->getCondition()); + Changed = true; + } + + return Changed; +} + bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // If this switch is too complex to want to look at, ignore it. if (!isValueEqualityComparison(SI)) @@ -2486,6 +2555,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (EliminateDeadSwitchCases(SI)) return SimplifyCFG(BB) | true; + if (ForwardSwitchConditionToPHI(SI)) + return SimplifyCFG(BB) | true; + return false; } @@ -2530,7 +2602,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); // If the Terminator is the only non-phi instruction, simplify the block. - BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(); + BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index a73bf04..973b105 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -13,15 +13,18 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Type.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" #include "llvm/Metadata.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; -Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, - RemapFlags Flags) { +// Out of line method to get vtable etc for class. +void ValueMapTypeRemapper::Anchor() {} + +Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper) { ValueToValueMapTy::iterator I = VM.find(V); // If the value already exists in the map, use it. @@ -29,8 +32,23 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, // Global values do not need to be seeded into the VM if they // are using the identity mapping. - if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V)) + if (isa<GlobalValue>(V) || isa<MDString>(V)) return VM[V] = const_cast<Value*>(V); + + if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { + // Inline asm may need *type* remapping. + FunctionType *NewTy = IA->getFunctionType(); + if (TypeMapper) { + NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy)); + + if (NewTy != IA->getFunctionType()) + V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), + IA->hasSideEffects(), IA->isAlignStack()); + } + + return VM[V] = const_cast<Value*>(V); + } + if (const MDNode *MD = dyn_cast<MDNode>(V)) { // If this is a module-level metadata and we know that nothing at the module @@ -45,14 +63,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); - if (OP == 0 || MapValue(OP, VM, Flags) == OP) continue; + if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue; // Ok, at least one operand needs remapping. SmallVector<Value*, 4> Elts; Elts.reserve(MD->getNumOperands()); for (i = 0; i != e; ++i) { Value *Op = MD->getOperand(i); - Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0); + Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0); } MDNode *NewMD = MDNode::get(V->getContext(), Elts); Dummy->replaceAllUsesWith(NewMD); @@ -75,51 +93,75 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, return 0; if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { - Function *F = cast<Function>(MapValue(BA->getFunction(), VM, Flags)); + Function *F = + cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper)); BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM, - Flags)); + Flags, TypeMapper)); return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); } - for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { - Value *Op = C->getOperand(i); - Value *Mapped = MapValue(Op, VM, Flags); - if (Mapped == C) continue; - - // Okay, the operands don't all match. We've already processed some or all - // of the operands, set them up now. - std::vector<Constant*> Ops; - Ops.reserve(C->getNumOperands()); - for (unsigned j = 0; j != i; ++j) - Ops.push_back(cast<Constant>(C->getOperand(i))); + // Otherwise, we have some other constant to remap. Start by checking to see + // if all operands have an identity remapping. + unsigned OpNo = 0, NumOperands = C->getNumOperands(); + Value *Mapped = 0; + for (; OpNo != NumOperands; ++OpNo) { + Value *Op = C->getOperand(OpNo); + Mapped = MapValue(Op, VM, Flags, TypeMapper); + if (Mapped != C) break; + } + + // See if the type mapper wants to remap the type as well. + Type *NewTy = C->getType(); + if (TypeMapper) + NewTy = TypeMapper->remapType(NewTy); + + // If the result type and all operands match up, then just insert an identity + // mapping. + if (OpNo == NumOperands && NewTy == C->getType()) + return VM[V] = C; + + // Okay, we need to create a new constant. We've already processed some or + // all of the operands, set them all up now. + SmallVector<Constant*, 8> Ops; + Ops.reserve(NumOperands); + for (unsigned j = 0; j != OpNo; ++j) + Ops.push_back(cast<Constant>(C->getOperand(j))); + + // If one of the operands mismatch, push it and the other mapped operands. + if (OpNo != NumOperands) { Ops.push_back(cast<Constant>(Mapped)); - + // Map the rest of the operands that aren't processed yet. - for (++i; i != e; ++i) - Ops.push_back(cast<Constant>(MapValue(C->getOperand(i), VM, Flags))); - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - return VM[V] = CE->getWithOperands(Ops); - if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) - return VM[V] = ConstantArray::get(CA->getType(), Ops); - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) - return VM[V] = ConstantStruct::get(CS->getType(), Ops); - assert(isa<ConstantVector>(C) && "Unknown mapped constant type"); - return VM[V] = ConstantVector::get(Ops); + for (++OpNo; OpNo != NumOperands; ++OpNo) + Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM, + Flags, TypeMapper)); } - - // If we reach here, all of the operands of the constant match. - return VM[V] = C; + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return VM[V] = CE->getWithOperands(Ops, NewTy); + if (isa<ConstantArray>(C)) + return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops); + if (isa<ConstantStruct>(C)) + return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops); + if (isa<ConstantVector>(C)) + return VM[V] = ConstantVector::get(Ops); + // If this is a no-operand constant, it must be because the type was remapped. + if (isa<UndefValue>(C)) + return VM[V] = UndefValue::get(NewTy); + if (isa<ConstantAggregateZero>(C)) + return VM[V] = ConstantAggregateZero::get(NewTy); + assert(isa<ConstantPointerNull>(C)); + return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); } /// RemapInstruction - Convert the instruction operands from referencing the /// current values into those specified by VMap. /// void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, - RemapFlags Flags) { + RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){ // Remap operands. for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, VMap, Flags); + Value *V = MapValue(*op, VMap, Flags, TypeMapper); // If we aren't ignoring missing entries, assert that something happened. if (V != 0) *op = V; @@ -128,14 +170,32 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, "Referenced value not in value map!"); } - // Remap attached metadata. + // Remap phi nodes' incoming blocks. + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags); + // If we aren't ignoring missing entries, assert that something happened. + if (V != 0) + PN->setIncomingBlock(i, cast<BasicBlock>(V)); + else + assert((Flags & RF_IgnoreMissingEntries) && + "Referenced block not in value map!"); + } + } + + // Remap attached metadata. Don't bother remapping DebugLoc, it can never + // have mappings to do. SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; - I->getAllMetadata(MDs); + I->getAllMetadataOtherThanDebugLoc(MDs); for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { - Value *Old = MI->second; - Value *New = MapValue(Old, VMap, Flags); + MDNode *Old = MI->second; + MDNode *New = MapValue(Old, VMap, Flags, TypeMapper); if (New != Old) - I->setMetadata(MI->first, cast<MDNode>(New)); + I->setMetadata(MI->first, New); } + + // If the instruction's type is being remapped, do so now. + if (TypeMapper) + I->mutateType(TypeMapper->remapType(I->getType())); } |