diff options
Diffstat (limited to 'lib/Transforms')
81 files changed, 5370 insertions, 3831 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index fa007cf..e160f63 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -155,12 +155,12 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { for (unsigned i = 0; i != PointerArgs.size(); ++i) { bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal); Argument *PtrArg = PointerArgs[i].first; - const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); + Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe. if (isByVal) { - if (const StructType *STy = dyn_cast<StructType>(AgTy)) { + if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" @@ -190,7 +190,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // If the argument is a recursive type and we're in a recursive // function, we could end up infinitely peeling the function argument. if (isSelfRecursive) { - if (const StructType *STy = dyn_cast<StructType>(AgTy)) { + if (StructType *STy = dyn_cast<StructType>(AgTy)) { bool RecursiveType = false; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { if (STy->getElementType(i) == PtrArg->getType()) { @@ -382,7 +382,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { User *U = *UI; Operands.clear(); if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - if (LI->isVolatile()) return false; // Don't hack volatile loads + // Don't hack volatile/atomic loads + if (!LI->isSimple()) return false; Loads.push_back(LI); // Direct loads are equivalent to a GEP with a zero index and then a load. Operands.push_back(0); @@ -410,7 +411,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end(); UI != E; ++UI) if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { - if (LI->isVolatile()) return false; // Don't hack volatile loads + // Don't hack volatile/atomic loads + if (!LI->isSimple()) return false; Loads.push_back(LI); } else { // Other uses than load? @@ -492,7 +494,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. - const FunctionType *FTy = F->getFunctionType(); + FunctionType *FTy = F->getFunctionType(); std::vector<Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; @@ -527,8 +529,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ++I, ++ArgIndex) { if (ByValArgsToTransform.count(I)) { // Simple byval argument? Just add all the struct element types. - const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); - const StructType *STy = cast<StructType>(AgTy); + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + StructType *STy = cast<StructType>(AgTy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) Params.push_back(STy->getElementType(i)); ++NumByValArgsPromoted; @@ -576,9 +578,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 - Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), - SI->begin(), - SI->end())); + Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI)); assert(Params.back()); } @@ -593,7 +593,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, if (Attributes attrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); - const Type *RetTy = FTy->getReturnType(); + Type *RetTy = FTy->getReturnType(); // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which // have zero fixed arguments. @@ -662,13 +662,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. - const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); - const StructType *STy = cast<StructType>(AgTy); + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, + Value *Idx = GetElementPtrInst::Create(*AI, Idxs, (*AI)->getName()+"."+utostr(i), Call); // TODO: Tell AA about the new values? @@ -686,12 +686,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, LoadInst *OrigLoad = OriginalLoads[*SI]; if (!SI->empty()) { Ops.reserve(SI->size()); - const Type *ElTy = V->getType(); + Type *ElTy = V->getType(); for (IndicesVector::const_iterator II = SI->begin(), IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. - const Type *IdxTy = (ElTy->isStructTy() ? + Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); @@ -699,8 +699,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. - V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), - V->getName()+".idx", Call); + V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } @@ -792,16 +791,16 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Instruction *InsertPt = NF->begin()->begin(); // Just add all the struct element types. - const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); + Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); - const StructType *STy = cast<StructType>(AgTy); + StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = - GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, + GetElementPtrInst::Create(TheAlloca, Idxs, TheAlloca->getName()+"."+Twine(i), InsertPt); I2->setName(I->getName()+"."+Twine(i)); diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 3de7bfc..4d8dbc2 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -13,10 +13,20 @@ add_llvm_library(LLVMipo Inliner.cpp Internalize.cpp LoopExtractor.cpp - LowerSetJmp.cpp MergeFunctions.cpp PartialInlining.cpp + PassManagerBuilder.cpp PruneEH.cpp StripDeadPrototypes.cpp StripSymbols.cpp ) + +add_llvm_library_dependencies(LLVMipo + LLVMAnalysis + LLVMCore + LLVMScalarOpts + LLVMSupport + LLVMTarget + LLVMTransformUtils + LLVMipa + ) diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index a21efce..c3ecb7a 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -23,7 +23,9 @@ #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -37,10 +39,18 @@ namespace { initializeConstantMergePass(*PassRegistry::getPassRegistry()); } - // run - For this pass, process all of the globals in the module, - // eliminating duplicate constants. - // + // For this pass, process all of the globals in the module, eliminating + // duplicate constants. bool runOnModule(Module &M); + + // Return true iff we can determine the alignment of this global variable. + bool hasKnownAlignment(GlobalVariable *GV) const; + + // Return the alignment of the global, including converting the default + // alignment to a concrete value. + unsigned getAlignment(GlobalVariable *GV) const; + + const TargetData *TD; }; } @@ -77,15 +87,28 @@ static bool IsBetterCannonical(const GlobalVariable &A, return A.hasUnnamedAddr(); } +bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const { + return TD || GV->getAlignment() != 0; +} + +unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const { + if (TD) + return TD->getPreferredAlignment(GV); + return GV->getAlignment(); +} + bool ConstantMerge::runOnModule(Module &M) { + TD = getAnalysisIfAvailable<TargetData>(); + // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet<const GlobalValue*, 8> UsedGlobals; FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); - // Map unique constant/section pairs to globals. We don't want to merge - // globals in different sections. - DenseMap<Constant*, GlobalVariable*> CMap; + // Map unique <constants, has-unknown-alignment> pairs to globals. We don't + // want to merge globals of unknown alignment with those of explicit + // alignment. If we have TargetData, we always know the alignment. + DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap; // Replacements - This vector contains a list of replacements to perform. SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements; @@ -120,7 +143,8 @@ bool ConstantMerge::runOnModule(Module &M) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. - GlobalVariable *&Slot = CMap[Init]; + PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); + GlobalVariable *&Slot = CMap[Pair]; // If this is the first constant we find or if the old on is local, // replace with the current one. It the current is externally visible @@ -152,7 +176,8 @@ bool ConstantMerge::runOnModule(Module &M) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. - GlobalVariable *Slot = CMap[Init]; + PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); + GlobalVariable *Slot = CMap[Pair]; if (!Slot || Slot == GV) continue; @@ -175,6 +200,14 @@ bool ConstantMerge::runOnModule(Module &M) { // now. This avoid invalidating the pointers in CMap, which are unneeded // now. for (unsigned i = 0, e = Replacements.size(); i != e; ++i) { + // Bump the alignment if necessary. + if (Replacements[i].first->getAlignment() || + Replacements[i].second->getAlignment()) { + Replacements[i].second->setAlignment(std::max( + Replacements[i].first->getAlignment(), + Replacements[i].second->getAlignment())); + } + // Eliminate any uses of the dead global. Replacements[i].first->replaceAllUsesWith(Replacements[i].second); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 1517765..4bb6f7a 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -206,7 +206,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Start by computing a new prototype for the function, which is the same as // the old function, but doesn't have isVarArg set. - const FunctionType *FTy = Fn.getFunctionType(); + FunctionType *FTy = Fn.getFunctionType(); std::vector<Type*> Params(FTy->param_begin(), FTy->param_end()); FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), @@ -344,7 +344,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) static unsigned NumRetVals(const Function *F) { if (F->getReturnType()->isVoidTy()) return 0; - else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) + else if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) return STy->getNumElements(); else return 1; @@ -491,7 +491,7 @@ void DAE::SurveyFunction(const Function &F) { // Keep track of the number of live retvals, so we can skip checks once all // of them turn out to be live. unsigned NumLiveRetVals = 0; - const Type *STy = dyn_cast<StructType>(F.getReturnType()); + Type *STy = dyn_cast<StructType>(F.getReturnType()); // Loop all uses of the function. for (Value::const_use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I) { @@ -646,7 +646,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Start by computing a new prototype for the function, which is the same as // the old function, but has fewer arguments and a different return type. - const FunctionType *FTy = F->getFunctionType(); + FunctionType *FTy = F->getFunctionType(); std::vector<Type*> Params; // Set up to build a new list of parameter attributes. @@ -660,7 +660,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Find out the new return value. Type *RetTy = FTy->getReturnType(); - const Type *NRetTy = NULL; + Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); // -1 means unused, other numbers are the new index @@ -669,7 +669,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (RetTy->isVoidTy()) { NRetTy = RetTy; } else { - const StructType *STy = dyn_cast<StructType>(RetTy); + StructType *STy = dyn_cast<StructType>(RetTy); if (STy) // Look at each of the original return values individually. for (unsigned i = 0; i != RetCount; ++i) { diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 95decec..0edf342 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -163,14 +163,14 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { ReadsMemory = true; continue; } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - // Ignore non-volatile loads from local memory. + // Ignore non-volatile loads from local memory. (Atomic is okay here.) if (!LI->isVolatile()) { AliasAnalysis::Location Loc = AA->getLocation(LI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - // Ignore non-volatile stores to local memory. + // Ignore non-volatile stores to local memory. (Atomic is okay here.) if (!SI->isVolatile()) { AliasAnalysis::Location Loc = AA->getLocation(SI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 4ac721d..3552d03 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -195,12 +195,14 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, } if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { GS.isLoaded = true; - if (LI->isVolatile()) return true; // Don't hack on volatile loads. + // Don't hack on volatile/atomic loads. + if (!LI->isSimple()) return true; } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { // Don't allow a store OF the address, only stores TO the address. if (SI->getOperand(0) == V) return true; - if (SI->isVolatile()) return true; // Don't hack on volatile stores. + // Don't hack on volatile/atomic stores. + if (!SI->isSimple()) return true; // If this is a direct store to the global (i.e., the global is a scalar // value, not an aggregate), keep more specific information about @@ -281,18 +283,18 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) { if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); } else if (isa<ConstantAggregateZero>(Agg)) { - if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { + if (StructType *STy = dyn_cast<StructType>(Agg->getType())) { if (IdxV < STy->getNumElements()) return Constant::getNullValue(STy->getElementType(IdxV)); - } else if (const SequentialType *STy = + } else if (SequentialType *STy = dyn_cast<SequentialType>(Agg->getType())) { return Constant::getNullValue(STy->getElementType()); } } else if (isa<UndefValue>(Agg)) { - if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { + if (StructType *STy = dyn_cast<StructType>(Agg->getType())) { if (IdxV < STy->getNumElements()) return UndefValue::get(STy->getElementType(IdxV)); - } else if (const SequentialType *STy = + } else if (SequentialType *STy = dyn_cast<SequentialType>(Agg->getType())) { return UndefValue::get(STy->getElementType()); } @@ -430,7 +432,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { ++GEPI; // Skip over the pointer index. // If this is a use of an array allocation, do a bit more checking for sanity. - if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { + if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { uint64_t NumElements = AT->getNumElements(); ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); @@ -451,9 +453,9 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { GEPI != E; ++GEPI) { uint64_t NumElements; - if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) + if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) NumElements = SubArrayTy->getNumElements(); - else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) + else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) NumElements = SubVectorTy->getNumElements(); else { assert((*GEPI)->isStructTy() && @@ -498,7 +500,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { assert(GV->hasLocalLinkage() && !GV->isConstant()); Constant *Init = GV->getInitializer(); - const Type *Ty = Init->getType(); + Type *Ty = Init->getType(); std::vector<GlobalVariable*> NewGlobals; Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); @@ -508,7 +510,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (StartAlignment == 0) StartAlignment = TD.getABITypeAlignment(GV->getType()); - if (const StructType *STy = dyn_cast<StructType>(Ty)) { + if (StructType *STy = dyn_cast<StructType>(Ty)) { NewGlobals.reserve(STy->getNumElements()); const StructLayout &Layout = *TD.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { @@ -531,9 +533,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) NGV->setAlignment(NewAlign); } - } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) { + } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) { unsigned NumElements = 0; - if (const ArrayType *ATy = dyn_cast<ArrayType>(STy)) + if (ArrayType *ATy = dyn_cast<ArrayType>(STy)) NumElements = ATy->getNumElements(); else NumElements = cast<VectorType>(STy)->getNumElements(); @@ -596,15 +598,14 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { Idxs.push_back(NullInt); for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) Idxs.push_back(CE->getOperand(i)); - NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), - &Idxs[0], Idxs.size()); + NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs); } else { GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); SmallVector<Value*, 8> Idxs; Idxs.push_back(NullInt); for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) Idxs.push_back(GEPI->getOperand(i)); - NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), + NewPtr = GetElementPtrInst::Create(NewPtr, Idxs, GEPI->getName()+"."+Twine(Val),GEPI); } } @@ -753,8 +754,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { break; if (Idxs.size() == GEPI->getNumOperands()-1) Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, - ConstantExpr::getGetElementPtr(NewV, &Idxs[0], - Idxs.size())); + ConstantExpr::getGetElementPtr(NewV, Idxs)); if (GEPI->use_empty()) { Changed = true; GEPI->eraseFromParent(); @@ -846,12 +846,12 @@ static void ConstantPropUsersOf(Value *V) { /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, - const Type *AllocTy, + Type *AllocTy, ConstantInt *NElements, TargetData* TD) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - const Type *GlobalType; + Type *GlobalType; if (NElements->getZExtValue() == 1) GlobalType = AllocTy; else @@ -1192,7 +1192,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, } else if (PHINode *PN = dyn_cast<PHINode>(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. - const StructType *ST = + StructType *ST = cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); PHINode *NewPN = @@ -1245,8 +1245,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, GEPIdx.push_back(GEPI->getOperand(1)); GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); - Value *NGEPI = GetElementPtrInst::Create(NewPtr, - GEPIdx.begin(), GEPIdx.end(), + Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx, GEPI->getName(), GEPI); GEPI->replaceAllUsesWith(NGEPI); GEPI->eraseFromParent(); @@ -1260,11 +1259,9 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // already been seen first by another load, so its uses have already been // processed. PHINode *PN = cast<PHINode>(LoadUser); - bool Inserted; - DenseMap<Value*, std::vector<Value*> >::iterator InsertPos; - tie(InsertPos, Inserted) = - InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>())); - if (!Inserted) return; + if (!InsertedScalarizedValues.insert(std::make_pair(PN, + std::vector<Value*>())).second) + return; // If this is the first time we've seen this PHI, recursively process all // users. @@ -1298,8 +1295,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Value* NElems, TargetData *TD) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); - const Type* MAT = getMallocAllocatedType(CI); - const StructType *STy = cast<StructType>(MAT); + Type* MAT = getMallocAllocatedType(CI); + StructType *STy = cast<StructType>(MAT); // There is guaranteed to be at least one use of the malloc (storing // it into GV). If there are other uses, change them to be uses of @@ -1313,8 +1310,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, std::vector<Value*> FieldMallocs; for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ - const Type *FieldTy = STy->getElementType(FieldNo); - const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + Type *FieldTy = STy->getElementType(FieldNo); + PointerType *PFieldTy = PointerType::getUnqual(FieldTy); GlobalVariable *NGV = new GlobalVariable(*GV->getParent(), @@ -1325,9 +1322,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, FieldGlobals.push_back(NGV); unsigned TypeSize = TD->getTypeAllocSize(FieldTy); - if (const StructType *ST = dyn_cast<StructType>(FieldTy)) + if (StructType *ST = dyn_cast<StructType>(FieldTy)) TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), NElems, 0, @@ -1379,8 +1376,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, - Constant::getNullValue(GVVal->getType()), - "tmp"); + Constant::getNullValue(GVVal->getType())); BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", OrigBB->getParent()); BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", @@ -1428,7 +1424,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Insert a store of null into each global. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); + PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); Constant *Null = Constant::getNullValue(PT->getElementType()); new StoreInst(Null, FieldGlobals[i], SI); } @@ -1485,7 +1481,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// cast of malloc. static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, - const Type *AllocTy, + Type *AllocTy, Module::global_iterator &GVI, TargetData *TD) { if (!TD) @@ -1538,10 +1534,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is an allocation of a fixed size array of structs, analyze as a // variable size array. malloc [100 x struct],1 -> malloc struct, 100 if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) - if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) + if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) AllocTy = AT->getElementType(); - const StructType *AllocSTy = dyn_cast<StructType>(AllocTy); + StructType *AllocSTy = dyn_cast<StructType>(AllocTy); if (!AllocSTy) return false; @@ -1552,8 +1548,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); @@ -1596,7 +1592,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { - const Type* MallocType = getMallocAllocatedType(CI); + Type* MallocType = getMallocAllocatedType(CI); if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, GVI, TD)) return true; @@ -1611,7 +1607,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, /// can shrink the global into a boolean and select between the two values /// whenever it is used. This exposes the values to other scalar optimizations. static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { - const Type *GVElType = GV->getType()->getElementType(); + Type *GVElType = GV->getType()->getElementType(); // If GVElType is already i1, it is already shrunk. If the type of the GV is // an FP value, pointer or vector, don't do this optimization because a select @@ -1761,7 +1757,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction ->getEntryBlock().begin()); - const Type* ElemTy = GV->getType()->getElementType(); + Type* ElemTy = GV->getType()->getElementType(); // FIXME: Pass Global's alignment when globals have alignment AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); if (!isa<UndefValue>(GV->getInitializer())) @@ -2003,7 +1999,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535); CSVals[1] = 0; - const StructType *StructTy = + StructType *StructTy = cast <StructType>( cast<ArrayType>(GCL->getType()->getElementType())->getElementType()); @@ -2013,9 +2009,9 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, if (Ctors[i]) { CSVals[1] = Ctors[i]; } else { - const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), + Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), false); - const PointerType *PFTy = PointerType::getUnqual(FTy); + PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 0x7fffffff); @@ -2196,7 +2192,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, } std::vector<Constant*> Elts; - if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { + if (StructType *STy = dyn_cast<StructType>(Init->getType())) { // Break up the constant into its elements. if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { @@ -2224,10 +2220,10 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, } ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); - const SequentialType *InitTy = cast<SequentialType>(Init->getType()); + SequentialType *InitTy = cast<SequentialType>(Init->getType()); uint64_t NumElts; - if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) + if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) NumElts = ATy->getNumElements(); else NumElts = cast<VectorType>(InitTy)->getNumElements(); @@ -2338,7 +2334,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, Constant *InstResult = 0; if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { - if (SI->isVolatile()) return false; // no volatile accesses. + if (!SI->isSimple()) return false; // no volatile/atomic accesses. Constant *Ptr = getVal(Values, SI->getOperand(1)); if (!isSimpleEnoughPointerToCommit(Ptr)) // If this is too complex for us to commit, reject it. @@ -2358,7 +2354,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, // stored value. Ptr = CE->getOperand(0); - const Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType(); + Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType(); // In order to push the bitcast onto the stored value, a bitcast // from NewTy to Val's type must be legal. If it's not, we can try @@ -2367,14 +2363,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, // If NewTy is a struct, we can convert the pointer to the struct // into a pointer to its first member. // FIXME: This could be extended to support arrays as well. - if (const StructType *STy = dyn_cast<StructType>(NewTy)) { + if (StructType *STy = dyn_cast<StructType>(NewTy)) { NewTy = STy->getTypeAtIndex(0U); - const IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32); + IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32); Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); Constant * const IdxList[] = {IdxZero, IdxZero}; - Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2); + Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList); // If we can't improve the situation by introspecting NewTy, // we have to give up. @@ -2411,17 +2407,17 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; ++i) GEPOps.push_back(getVal(Values, *i)); - InstResult = cast<GEPOperator>(GEP)->isInBounds() ? - ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : - ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); + InstResult = + ConstantExpr::getGetElementPtr(P, GEPOps, + cast<GEPOperator>(GEP)->isInBounds()); } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { - if (LI->isVolatile()) return false; // no volatile accesses. + if (!LI->isSimple()) return false; // no volatile/atomic accesses. InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), MutatedMemory); if (InstResult == 0) return false; // Could not evaluate load. } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. - const Type *Ty = AI->getType()->getElementType(); + Type *Ty = AI->getType()->getElementType(); AllocaTmps.push_back(new GlobalVariable(Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), @@ -2465,8 +2461,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals.data(), - Formals.size())) { + if (Constant *C = ConstantFoldCall(Callee, Formals)) { InstResult = C; } else { return false; @@ -2512,7 +2507,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, CallStack.pop_back(); // return from fn. return true; // We succeeded at evaluating this ctor! } else { - // invoke, unwind, unreachable. + // invoke, unwind, resume, unreachable. return false; // Cannot handle this terminator. } @@ -2711,7 +2706,7 @@ static Function *FindCXAAtExit(Module &M) { if (!Fn) return 0; - const FunctionType *FTy = Fn->getFunctionType(); + FunctionType *FTy = Fn->getFunctionType(); // Checking that the function has the right return type, the right number of // parameters and that they all have pointer types should be enough. diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index 25c0134..d757e1f 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -167,7 +167,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { // Check to see if this function returns a constant. SmallVector<Value *,4> RetVals; - const StructType *STy = dyn_cast<StructType>(F.getReturnType()); + StructType *STy = dyn_cast<StructType>(F.getReturnType()); if (STy) for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) RetVals.push_back(UndefValue::get(STy->getElementType(i))); diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 31ce95f..6233922 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm-c/Initialization.h" #include "llvm-c/Transforms/IPO.h" #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" @@ -35,7 +36,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeLoopExtractorPass(Registry); initializeBlockExtractorPassPass(Registry); initializeSingleLoopExtractorPass(Registry); - initializeLowerSetJmpPass(Registry); initializeMergeFunctionsPass(Registry); initializePartialInlinerPass(Registry); initializePruneEHPass(Registry); @@ -70,6 +70,10 @@ void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createFunctionInliningPass()); } +void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(llvm::createAlwaysInlinerPass()); +} + void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createGlobalDCEPass()); } @@ -82,10 +86,6 @@ void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createIPConstantPropagationPass()); } -void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLowerSetJmpPass()); -} - void LLVMAddPruneEHPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createPruneEHPass()); } @@ -98,11 +98,6 @@ void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { unwrap(PM)->add(createInternalizePass(AllButMain != 0)); } - -void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) { - // FIXME: Remove in LLVM 3.0. -} - void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createStripDeadPrototypesPass()); } diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index ce795b7..c0426da 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -32,10 +33,10 @@ namespace { // AlwaysInliner only inlines functions that are mark as "always inline". class AlwaysInliner : public Inliner { // Functions that are never inlined - SmallPtrSet<const Function*, 16> NeverInline; + SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: - // Use extremely low threshold. + // Use extremely low threshold. AlwaysInliner() : Inliner(ID, -2000000000) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } @@ -52,8 +53,8 @@ namespace { void growCachedCostInfo(Function* Caller, Function* Callee) { CA.growCachedCostInfo(Caller, Callee); } - virtual bool doFinalization(CallGraph &CG) { - return removeDeadFunctions(CG, &NeverInline); + virtual bool doFinalization(CallGraph &CG) { + return removeDeadFunctions(CG, &NeverInline); } virtual bool doInitialization(CallGraph &CG); void releaseMemory() { @@ -71,11 +72,13 @@ INITIALIZE_PASS_END(AlwaysInliner, "always-inline", Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); } -// doInitialization - Initializes the vector of functions that have not +// doInitialization - Initializes the vector of functions that have not // been annotated with the "always inline" attribute. bool AlwaysInliner::doInitialization(CallGraph &CG) { + CA.setTargetData(getAnalysisIfAvailable<TargetData>()); + Module &M = CG.getModule(); - + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline)) diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 0c5b3be..84dd4fd 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -30,7 +31,7 @@ namespace { class SimpleInliner : public Inliner { // Functions that are never inlined - SmallPtrSet<const Function*, 16> NeverInline; + SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: SimpleInliner() : Inliner(ID) { @@ -68,16 +69,17 @@ INITIALIZE_PASS_END(SimpleInliner, "inline", Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } -Pass *llvm::createFunctionInliningPass(int Threshold) { +Pass *llvm::createFunctionInliningPass(int Threshold) { return new SimpleInliner(Threshold); } // doInitialization - Initializes the vector of functions that have been // annotated with the noinline attribute. bool SimpleInliner::doInitialization(CallGraph &CG) { - + CA.setTargetData(getAnalysisIfAvailable<TargetData>()); + Module &M = CG.getModule(); - + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline)) @@ -85,34 +87,34 @@ bool SimpleInliner::doInitialization(CallGraph &CG) { // Get llvm.noinline GlobalVariable *GV = M.getNamedGlobal("llvm.noinline"); - + if (GV == 0) return false; // Don't crash on invalid code if (!GV->hasDefinitiveInitializer()) return false; - + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - + if (InitList == 0) return false; // Iterate over each element and add to the NeverInline set for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - + // Get Source const Constant *Elt = InitList->getOperand(i); - + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt)) - if (CE->getOpcode() == Instruction::BitCast) + if (CE->getOpcode() == Instruction::BitCast) Elt = CE->getOperand(0); - + // Insert into set of functions to never inline if (const Function *F = dyn_cast<Function>(Elt)) NeverInline.insert(F); } - + return false; } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 57f3e77..f00935b 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -62,7 +62,7 @@ void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { } -typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> > +typedef DenseMap<ArrayType*, std::vector<AllocaInst*> > InlinedArrayAllocasTy; /// InlineCallIfPossible - If it is possible to inline the specified call site, @@ -139,7 +139,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). - const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); + ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); if (ATy == 0 || AI->isArrayAllocation()) continue; diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 848944d..4f96afe4 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/FunctionUtils.h" #include "llvm/ADT/Statistic.h" #include <fstream> @@ -53,12 +54,12 @@ namespace { char LoopExtractor::ID = 0; INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract", - "Extract loops into new functions", false, false) + "Extract loops into new functions", false, false) INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_END(LoopExtractor, "loop-extract", - "Extract loops into new functions", false, false) + "Extract loops into new functions", false, false) namespace { /// SingleLoopExtractor - For bugpoint. @@ -100,9 +101,9 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { L->getHeader()->getParent()->getEntryBlock().getTerminator(); if (!isa<BranchInst>(EntryTI) || !cast<BranchInst>(EntryTI)->isUnconditional() || - EntryTI->getSuccessor(0) != L->getHeader()) + EntryTI->getSuccessor(0) != L->getHeader()) { ShouldExtractLoop = true; - else { + } else { // Check to see if any exits from the loop are more than just return // blocks. SmallVector<BasicBlock*, 8> ExitBlocks; @@ -113,6 +114,21 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { break; } } + + if (ShouldExtractLoop) { + // We must omit landing pads. Landing pads must accompany the invoke + // instruction. But this would result in a loop in the extracted + // function. An infinite cycle occurs when it tries to extract that loop as + // well. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i]->isLandingPad()) { + ShouldExtractLoop = false; + break; + } + } + if (ShouldExtractLoop) { if (NumLoops == 0) return Changed; --NumLoops; @@ -149,6 +165,7 @@ namespace { /// BlocksToNotExtract list. class BlockExtractorPass : public ModulePass { void LoadFile(const char *Filename); + void SplitLandingPadPreds(Function *F); std::vector<BasicBlock*> BlocksToNotExtract; std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName; @@ -171,8 +188,7 @@ INITIALIZE_PASS(BlockExtractorPass, "extract-blocks", // createBlockExtractorPass - This pass extracts all blocks (except those // specified in the argument list) from the functions in the module. // -ModulePass *llvm::createBlockExtractorPass() -{ +ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractorPass(); } @@ -194,6 +210,37 @@ void BlockExtractorPass::LoadFile(const char *Filename) { } } +/// SplitLandingPadPreds - The landing pad needs to be extracted with the invoke +/// instruction. The critical edge breaker will refuse to break critical edges +/// to a landing pad. So do them here. After this method runs, all landing pads +/// should have only one predecessor. +void BlockExtractorPass::SplitLandingPadPreds(Function *F) { + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + InvokeInst *II = dyn_cast<InvokeInst>(I); + if (!II) continue; + BasicBlock *Parent = II->getParent(); + BasicBlock *LPad = II->getUnwindDest(); + + // Look through the landing pad's predecessors. If one of them ends in an + // 'invoke', then we want to split the landing pad. + bool Split = false; + for (pred_iterator + PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) { + BasicBlock *BB = *PI; + if (BB->isLandingPad() && BB != Parent && + isa<InvokeInst>(Parent->getTerminator())) { + Split = true; + break; + } + } + + if (!Split) continue; + + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs); + } +} + bool BlockExtractorPass::runOnModule(Module &M) { std::set<BasicBlock*> TranslatedBlocksToNotExtract; for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) { @@ -236,13 +283,21 @@ bool BlockExtractorPass::runOnModule(Module &M) { // Now that we know which blocks to not extract, figure out which ones we WANT // to extract. std::vector<BasicBlock*> BlocksToExtract; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + SplitLandingPadPreds(&*F); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (!TranslatedBlocksToNotExtract.count(BB)) BlocksToExtract.push_back(BB); + } - for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) - ExtractBasicBlock(BlocksToExtract[i]); + for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) { + SmallVector<BasicBlock*, 2> BlocksToExtractVec; + BlocksToExtractVec.push_back(BlocksToExtract[i]); + if (const InvokeInst *II = + dyn_cast<InvokeInst>(BlocksToExtract[i]->getTerminator())) + BlocksToExtractVec.push_back(II->getUnwindDest()); + ExtractBasicBlock(BlocksToExtractVec); + } return !BlocksToExtract.empty(); } diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp deleted file mode 100644 index 659476b..0000000 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ /dev/null @@ -1,547 +0,0 @@ -//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the lowering of setjmp and longjmp to use the -// LLVM invoke and unwind instructions as necessary. -// -// Lowering of longjmp is fairly trivial. We replace the call with a -// call to the LLVM library function "__llvm_sjljeh_throw_longjmp()". -// This unwinds the stack for us calling all of the destructors for -// objects allocated on the stack. -// -// At a setjmp call, the basic block is split and the setjmp removed. -// The calls in a function that have a setjmp are converted to invoke -// where the except part checks to see if it's a longjmp exception and, -// if so, if it's handled in the function. If it is, then it gets the -// value returned by the longjmp and goes to where the basic block was -// split. Invoke instructions are handled in a similar fashion with the -// original except block being executed if it isn't a longjmp except -// that is handled by that function. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// FIXME: This pass doesn't deal with PHI statements just yet. That is, -// we expect this to occur before SSAification is done. This would seem -// to make sense, but in general, it might be a good idea to make this -// pass invokable via the "opt" command at will. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "lowersetjmp" -#include "llvm/Transforms/IPO.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/Statistic.h" -#include <map> -using namespace llvm; - -STATISTIC(LongJmpsTransformed, "Number of longjmps transformed"); -STATISTIC(SetJmpsTransformed , "Number of setjmps transformed"); -STATISTIC(CallsTransformed , "Number of calls invokified"); -STATISTIC(InvokesTransformed , "Number of invokes modified"); - -namespace { - //===--------------------------------------------------------------------===// - // LowerSetJmp pass implementation. - class LowerSetJmp : public ModulePass, public InstVisitor<LowerSetJmp> { - // LLVM library functions... - Constant *InitSJMap; // __llvm_sjljeh_init_setjmpmap - Constant *DestroySJMap; // __llvm_sjljeh_destroy_setjmpmap - Constant *AddSJToMap; // __llvm_sjljeh_add_setjmp_to_map - Constant *ThrowLongJmp; // __llvm_sjljeh_throw_longjmp - Constant *TryCatchLJ; // __llvm_sjljeh_try_catching_longjmp_exception - Constant *IsLJException; // __llvm_sjljeh_is_longjmp_exception - Constant *GetLJValue; // __llvm_sjljeh_get_longjmp_value - - typedef std::pair<SwitchInst*, CallInst*> SwitchValuePair; - - // Keep track of those basic blocks reachable via a depth-first search of - // the CFG from a setjmp call. We only need to transform those "call" and - // "invoke" instructions that are reachable from the setjmp call site. - std::set<BasicBlock*> DFSBlocks; - - // The setjmp map is going to hold information about which setjmps - // were called (each setjmp gets its own number) and with which - // buffer it was called. - std::map<Function*, AllocaInst*> SJMap; - - // The rethrow basic block map holds the basic block to branch to if - // the exception isn't handled in the current function and needs to - // be rethrown. - std::map<const Function*, BasicBlock*> RethrowBBMap; - - // The preliminary basic block map holds a basic block that grabs the - // exception and determines if it's handled by the current function. - std::map<const Function*, BasicBlock*> PrelimBBMap; - - // The switch/value map holds a switch inst/call inst pair. The - // switch inst controls which handler (if any) gets called and the - // value is the value returned to that handler by the call to - // __llvm_sjljeh_get_longjmp_value. - std::map<const Function*, SwitchValuePair> SwitchValMap; - - // A map of which setjmps we've seen so far in a function. - std::map<const Function*, unsigned> SetJmpIDMap; - - AllocaInst* GetSetJmpMap(Function* Func); - BasicBlock* GetRethrowBB(Function* Func); - SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow); - - void TransformLongJmpCall(CallInst* Inst); - void TransformSetJmpCall(CallInst* Inst); - - bool IsTransformableFunction(StringRef Name); - public: - static char ID; // Pass identification, replacement for typeid - LowerSetJmp() : ModulePass(ID) { - initializeLowerSetJmpPass(*PassRegistry::getPassRegistry()); - } - - void visitCallInst(CallInst& CI); - void visitInvokeInst(InvokeInst& II); - void visitReturnInst(ReturnInst& RI); - void visitUnwindInst(UnwindInst& UI); - - bool runOnModule(Module& M); - bool doInitialization(Module& M); - }; -} // end anonymous namespace - -char LowerSetJmp::ID = 0; -INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false) - -// run - Run the transformation on the program. We grab the function -// prototypes for longjmp and setjmp. If they are used in the program, -// then we can go directly to the places they're at and transform them. -bool LowerSetJmp::runOnModule(Module& M) { - bool Changed = false; - - // These are what the functions are called. - Function* SetJmp = M.getFunction("llvm.setjmp"); - Function* LongJmp = M.getFunction("llvm.longjmp"); - - // This program doesn't have longjmp and setjmp calls. - if ((!LongJmp || LongJmp->use_empty()) && - (!SetJmp || SetJmp->use_empty())) return false; - - // Initialize some values and functions we'll need to transform the - // setjmp/longjmp functions. - doInitialization(M); - - if (SetJmp) { - for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end(); - B != E; ++B) { - BasicBlock* BB = cast<Instruction>(*B)->getParent(); - for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks), - E = df_ext_end(BB, DFSBlocks); I != E; ++I) - /* empty */; - } - - while (!SetJmp->use_empty()) { - assert(isa<CallInst>(SetJmp->use_back()) && - "User of setjmp intrinsic not a call?"); - TransformSetJmpCall(cast<CallInst>(SetJmp->use_back())); - Changed = true; - } - } - - if (LongJmp) - while (!LongJmp->use_empty()) { - assert(isa<CallInst>(LongJmp->use_back()) && - "User of longjmp intrinsic not a call?"); - TransformLongJmpCall(cast<CallInst>(LongJmp->use_back())); - Changed = true; - } - - // Now go through the affected functions and convert calls and invokes - // to new invokes... - for (std::map<Function*, AllocaInst*>::iterator - B = SJMap.begin(), E = SJMap.end(); B != E; ++B) { - Function* F = B->first; - for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB) - for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) { - visit(*IB++); - if (IB != BB->end() && IB->getParent() != BB) - break; // The next instruction got moved to a different block! - } - } - - DFSBlocks.clear(); - SJMap.clear(); - RethrowBBMap.clear(); - PrelimBBMap.clear(); - SwitchValMap.clear(); - SetJmpIDMap.clear(); - - return Changed; -} - -// doInitialization - For the lower long/setjmp pass, this ensures that a -// module contains a declaration for the intrisic functions we are going -// to call to convert longjmp and setjmp calls. -// -// This function is always successful, unless it isn't. -bool LowerSetJmp::doInitialization(Module& M) -{ - const Type *SBPTy = Type::getInt8PtrTy(M.getContext()); - const Type *SBPPTy = PointerType::getUnqual(SBPTy); - - // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for - // a description of the following library functions. - - // void __llvm_sjljeh_init_setjmpmap(void**) - InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap", - Type::getVoidTy(M.getContext()), - SBPPTy, (Type *)0); - // void __llvm_sjljeh_destroy_setjmpmap(void**) - DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap", - Type::getVoidTy(M.getContext()), - SBPPTy, (Type *)0); - - // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned) - AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map", - Type::getVoidTy(M.getContext()), - SBPPTy, SBPTy, - Type::getInt32Ty(M.getContext()), - (Type *)0); - - // void __llvm_sjljeh_throw_longjmp(int*, int) - ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp", - Type::getVoidTy(M.getContext()), SBPTy, - Type::getInt32Ty(M.getContext()), - (Type *)0); - - // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **) - TryCatchLJ = - M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception", - Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0); - - // bool __llvm_sjljeh_is_longjmp_exception() - IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception", - Type::getInt1Ty(M.getContext()), - (Type *)0); - - // int __llvm_sjljeh_get_longjmp_value() - GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value", - Type::getInt32Ty(M.getContext()), - (Type *)0); - return true; -} - -// IsTransformableFunction - Return true if the function name isn't one -// of the ones we don't want transformed. Currently, don't transform any -// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error -// handling functions (beginning with __llvm_sjljeh_...they don't throw -// exceptions). -bool LowerSetJmp::IsTransformableFunction(StringRef Name) { - return !Name.startswith("__llvm_sjljeh_"); -} - -// TransformLongJmpCall - Transform a longjmp call into a call to the -// internal __llvm_sjljeh_throw_longjmp function. It then takes care of -// throwing the exception for us. -void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) -{ - const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext()); - - // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the - // same parameters as "longjmp", except that the buffer is cast to a - // char*. It returns "void", so it doesn't need to replace any of - // Inst's uses and doesn't get a name. - CastInst* CI = - new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst); - Value *Args[] = { CI, Inst->getArgOperand(1) }; - CallInst::Create(ThrowLongJmp, Args, "", Inst); - - SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()]; - - // If the function has a setjmp call in it (they are transformed first) - // we should branch to the basic block that determines if this longjmp - // is applicable here. Otherwise, issue an unwind. - if (SVP.first) - BranchInst::Create(SVP.first->getParent(), Inst); - else - new UnwindInst(Inst->getContext(), Inst); - - // Remove all insts after the branch/unwind inst. Go from back to front to - // avoid replaceAllUsesWith if possible. - BasicBlock *BB = Inst->getParent(); - Instruction *Removed; - do { - Removed = &BB->back(); - // If the removed instructions have any users, replace them now. - if (!Removed->use_empty()) - Removed->replaceAllUsesWith(UndefValue::get(Removed->getType())); - Removed->eraseFromParent(); - } while (Removed != Inst); - - ++LongJmpsTransformed; -} - -// GetSetJmpMap - Retrieve (create and initialize, if necessary) the -// setjmp map. This map is going to hold information about which setjmps -// were called (each setjmp gets its own number) and with which buffer it -// was called. There can be only one! -AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func) -{ - if (SJMap[Func]) return SJMap[Func]; - - // Insert the setjmp map initialization before the first instruction in - // the function. - Instruction* Inst = Func->getEntryBlock().begin(); - assert(Inst && "Couldn't find even ONE instruction in entry block!"); - - // Fill in the alloca and call to initialize the SJ map. - const Type *SBPTy = - Type::getInt8PtrTy(Func->getContext()); - AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst); - CallInst::Create(InitSJMap, Map, "", Inst); - return SJMap[Func] = Map; -} - -// GetRethrowBB - Only one rethrow basic block is needed per function. -// If this is a longjmp exception but not handled in this block, this BB -// performs the rethrow. -BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func) -{ - if (RethrowBBMap[Func]) return RethrowBBMap[Func]; - - // The basic block we're going to jump to if we need to rethrow the - // exception. - BasicBlock* Rethrow = - BasicBlock::Create(Func->getContext(), "RethrowExcept", Func); - - // Fill in the "Rethrow" BB with a call to rethrow the exception. This - // is the last instruction in the BB since at this point the runtime - // should exit this function and go to the next function. - new UnwindInst(Func->getContext(), Rethrow); - return RethrowBBMap[Func] = Rethrow; -} - -// GetSJSwitch - Return the switch statement that controls which handler -// (if any) gets called and the value returned to that handler. -LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func, - BasicBlock* Rethrow) -{ - if (SwitchValMap[Func].first) return SwitchValMap[Func]; - - BasicBlock* LongJmpPre = - BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func); - - // Keep track of the preliminary basic block for some of the other - // transformations. - PrelimBBMap[Func] = LongJmpPre; - - // Grab the exception. - CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre); - - // The "decision basic block" gets the number associated with the - // setjmp call returning to switch on and the value returned by - // longjmp. - BasicBlock* DecisionBB = - BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func); - - BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre); - - // Fill in the "decision" basic block. - CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB); - CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum", - DecisionBB); - - SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB); - return SwitchValMap[Func] = SwitchValuePair(SI, LJVal); -} - -// TransformSetJmpCall - The setjmp call is a bit trickier to transform. -// We're going to convert all setjmp calls to nops. Then all "call" and -// "invoke" instructions in the function are converted to "invoke" where -// the "except" branch is used when returning from a longjmp call. -void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) -{ - BasicBlock* ABlock = Inst->getParent(); - Function* Func = ABlock->getParent(); - - // Add this setjmp to the setjmp map. - const Type* SBPTy = - Type::getInt8PtrTy(Inst->getContext()); - CastInst* BufPtr = - new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst); - Value *Args[] = { - GetSetJmpMap(Func), BufPtr, - ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++) - }; - CallInst::Create(AddSJToMap, Args, "", Inst); - - // We are guaranteed that there are no values live across basic blocks - // (because we are "not in SSA form" yet), but there can still be values live - // in basic blocks. Because of this, splitting the setjmp block can cause - // values above the setjmp to not dominate uses which are after the setjmp - // call. For all of these occasions, we must spill the value to the stack. - // - std::set<Instruction*> InstrsAfterCall; - - // The call is probably very close to the end of the basic block, for the - // common usage pattern of: 'if (setjmp(...))', so keep track of the - // instructions after the call. - for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end(); - I != E; ++I) - InstrsAfterCall.insert(I); - - for (BasicBlock::iterator II = ABlock->begin(); - II != BasicBlock::iterator(Inst); ++II) - // Loop over all of the uses of instruction. If any of them are after the - // call, "spill" the value to the stack. - for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); - UI != E; ++UI) { - User *U = *UI; - if (cast<Instruction>(U)->getParent() != ABlock || - InstrsAfterCall.count(cast<Instruction>(U))) { - DemoteRegToStack(*II); - break; - } - } - InstrsAfterCall.clear(); - - // Change the setjmp call into a branch statement. We'll remove the - // setjmp call in a little bit. No worries. - BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst); - assert(SetJmpContBlock && "Couldn't split setjmp BB!!"); - - SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont"); - - // Add the SetJmpContBlock to the set of blocks reachable from a setjmp. - DFSBlocks.insert(SetJmpContBlock); - - // This PHI node will be in the new block created from the - // splitBasicBlock call. - PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), 2, - "SetJmpReturn", Inst); - - // Coming from a call to setjmp, the return is 0. - PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())), - ABlock); - - // Add the case for this setjmp's number... - SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func)); - SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()), - SetJmpIDMap[Func] - 1), - SetJmpContBlock); - - // Value coming from the handling of the exception. - PHI->addIncoming(SVP.second, SVP.second->getParent()); - - // Replace all uses of this instruction with the PHI node created by - // the eradication of setjmp. - Inst->replaceAllUsesWith(PHI); - Inst->eraseFromParent(); - - ++SetJmpsTransformed; -} - -// visitCallInst - This converts all LLVM call instructions into invoke -// instructions. The except part of the invoke goes to the "LongJmpBlkPre" -// that grabs the exception and proceeds to determine if it's a longjmp -// exception or not. -void LowerSetJmp::visitCallInst(CallInst& CI) -{ - if (CI.getCalledFunction()) - if (!IsTransformableFunction(CI.getCalledFunction()->getName()) || - CI.getCalledFunction()->isIntrinsic()) return; - - BasicBlock* OldBB = CI.getParent(); - - // If not reachable from a setjmp call, don't transform. - if (!DFSBlocks.count(OldBB)) return; - - BasicBlock* NewBB = OldBB->splitBasicBlock(CI); - assert(NewBB && "Couldn't split BB of \"call\" instruction!!"); - DFSBlocks.insert(NewBB); - NewBB->setName("Call2Invoke"); - - Function* Func = OldBB->getParent(); - - // Construct the new "invoke" instruction. - TerminatorInst* Term = OldBB->getTerminator(); - CallSite CS(&CI); - std::vector<Value*> Params(CS.arg_begin(), CS.arg_end()); - InvokeInst* II = - InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func], - Params, CI.getName(), Term); - II->setCallingConv(CI.getCallingConv()); - II->setAttributes(CI.getAttributes()); - - // Replace the old call inst with the invoke inst and remove the call. - CI.replaceAllUsesWith(II); - CI.eraseFromParent(); - - // The old terminator is useless now that we have the invoke inst. - Term->eraseFromParent(); - ++CallsTransformed; -} - -// visitInvokeInst - Converting the "invoke" instruction is fairly -// straight-forward. The old exception part is replaced by a query asking -// if this is a longjmp exception. If it is, then it goes to the longjmp -// exception blocks. Otherwise, control is passed the old exception. -void LowerSetJmp::visitInvokeInst(InvokeInst& II) -{ - if (II.getCalledFunction()) - if (!IsTransformableFunction(II.getCalledFunction()->getName()) || - II.getCalledFunction()->isIntrinsic()) return; - - BasicBlock* BB = II.getParent(); - - // If not reachable from a setjmp call, don't transform. - if (!DFSBlocks.count(BB)) return; - - BasicBlock* ExceptBB = II.getUnwindDest(); - - Function* Func = BB->getParent(); - BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(), - "InvokeExcept", Func); - - // If this is a longjmp exception, then branch to the preliminary BB of - // the longjmp exception handling. Otherwise, go to the old exception. - CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept", - NewExceptBB); - - BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB); - - II.setUnwindDest(NewExceptBB); - ++InvokesTransformed; -} - -// visitReturnInst - We want to destroy the setjmp map upon exit from the -// function. -void LowerSetJmp::visitReturnInst(ReturnInst &RI) { - Function* Func = RI.getParent()->getParent(); - CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI); -} - -// visitUnwindInst - We want to destroy the setjmp map upon exit from the -// function. -void LowerSetJmp::visitUnwindInst(UnwindInst &UI) { - Function* Func = UI.getParent()->getParent(); - CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &UI); -} - -ModulePass *llvm::createLowerSetJmpPass() { - return new LowerSetJmp(); -} - diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 7796d05..0b01c38 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -76,7 +76,7 @@ STATISTIC(NumDoubleWeak, "Number of new functions created"); /// functions that will compare equal, without looking at the instructions /// inside the function. static unsigned profileFunction(const Function *F) { - const FunctionType *FTy = F->getFunctionType(); + FunctionType *FTy = F->getFunctionType(); FoldingSetNodeID ID; ID.AddInteger(F->size()); @@ -185,7 +185,7 @@ private: } /// Compare two Types, treating all pointer types as equal. - bool isEquivalentType(const Type *Ty1, const Type *Ty2) const; + bool isEquivalentType(Type *Ty1, Type *Ty2) const; // The two functions undergoing comparison. const Function *F1, *F2; @@ -200,8 +200,8 @@ private: // Any two pointers in the same address space are equivalent, intptr_t and // pointers are equivalent. Otherwise, standard type equivalence rules apply. -bool FunctionComparator::isEquivalentType(const Type *Ty1, - const Type *Ty2) const { +bool FunctionComparator::isEquivalentType(Type *Ty1, + Type *Ty2) const { if (Ty1 == Ty2) return true; if (Ty1->getTypeID() != Ty2->getTypeID()) { @@ -233,14 +233,14 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, return true; case Type::PointerTyID: { - const PointerType *PTy1 = cast<PointerType>(Ty1); - const PointerType *PTy2 = cast<PointerType>(Ty2); + PointerType *PTy1 = cast<PointerType>(Ty1); + PointerType *PTy2 = cast<PointerType>(Ty2); return PTy1->getAddressSpace() == PTy2->getAddressSpace(); } case Type::StructTyID: { - const StructType *STy1 = cast<StructType>(Ty1); - const StructType *STy2 = cast<StructType>(Ty2); + StructType *STy1 = cast<StructType>(Ty1); + StructType *STy2 = cast<StructType>(Ty2); if (STy1->getNumElements() != STy2->getNumElements()) return false; @@ -255,8 +255,8 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, } case Type::FunctionTyID: { - const FunctionType *FTy1 = cast<FunctionType>(Ty1); - const FunctionType *FTy2 = cast<FunctionType>(Ty2); + FunctionType *FTy1 = cast<FunctionType>(Ty1); + FunctionType *FTy2 = cast<FunctionType>(Ty2); if (FTy1->getNumParams() != FTy2->getNumParams() || FTy1->isVarArg() != FTy2->isVarArg()) return false; @@ -272,8 +272,8 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, } case Type::ArrayTyID: { - const ArrayType *ATy1 = cast<ArrayType>(Ty1); - const ArrayType *ATy2 = cast<ArrayType>(Ty2); + ArrayType *ATy1 = cast<ArrayType>(Ty1); + ArrayType *ATy2 = cast<ArrayType>(Ty2); return ATy1->getNumElements() == ATy2->getNumElements() && isEquivalentType(ATy1->getElementType(), ATy2->getElementType()); } @@ -305,10 +305,14 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1, // Check special state that is a part of some instructions. if (const LoadInst *LI = dyn_cast<LoadInst>(I1)) return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() && - LI->getAlignment() == cast<LoadInst>(I2)->getAlignment(); + LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() && + LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() && + LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope(); if (const StoreInst *SI = dyn_cast<StoreInst>(I1)) return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() && - SI->getAlignment() == cast<StoreInst>(I2)->getAlignment(); + SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() && + SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() && + SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope(); if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); if (const CallInst *CI = dyn_cast<CallInst>(I1)) @@ -317,22 +321,22 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1, if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1)) return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() && CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes(); - if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) { - if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices()) - return false; - for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i) - if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i]) - return false; - return true; - } - if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) { - if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices()) - return false; - for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i) - if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i]) - return false; - return true; - } + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) + return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices(); + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) + return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices(); + if (const FenceInst *FI = dyn_cast<FenceInst>(I1)) + return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() && + FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope(); + if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1)) + return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() && + CXI->getOrdering() == cast<AtomicCmpXchgInst>(I2)->getOrdering() && + CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope(); + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1)) + return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() && + RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() && + RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() && + RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope(); return true; } @@ -346,9 +350,9 @@ bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end()); SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end()); uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(), - Indices1.data(), Indices1.size()); + Indices1); uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(), - Indices2.data(), Indices2.size()); + Indices2); return Offset1 == Offset2; } @@ -725,7 +729,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { SmallVector<Value *, 16> Args; unsigned i = 0; - const FunctionType *FFTy = F->getFunctionType(); + FunctionType *FFTy = F->getFunctionType(); for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); AI != AE; ++AI) { Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i))); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp new file mode 100644 index 0000000..8fdfd72 --- /dev/null +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -0,0 +1,343 @@ +//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PassManagerBuilder class, which is used to set up a +// "standard" optimization sequence suitable for languages like C and C++. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/Transforms/IPO/PassManagerBuilder.h" + +#include "llvm-c/Transforms/PassManagerBuilder.h" + +#include "llvm/PassManager.h" +#include "llvm/DefaultPasses.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; + +PassManagerBuilder::PassManagerBuilder() { + OptLevel = 2; + SizeLevel = 0; + LibraryInfo = 0; + Inliner = 0; + DisableSimplifyLibCalls = false; + DisableUnitAtATime = false; + DisableUnrollLoops = false; +} + +PassManagerBuilder::~PassManagerBuilder() { + delete LibraryInfo; + delete Inliner; +} + +/// Set of global extensions, automatically added as part of the standard set. +static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy, + PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions; + +void PassManagerBuilder::addGlobalExtension( + PassManagerBuilder::ExtensionPointTy Ty, + PassManagerBuilder::ExtensionFn Fn) { + GlobalExtensions->push_back(std::make_pair(Ty, Fn)); +} + +void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { + Extensions.push_back(std::make_pair(Ty, Fn)); +} + +void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, + PassManagerBase &PM) const { + for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i) + if ((*GlobalExtensions)[i].first == ETy) + (*GlobalExtensions)[i].second(*this, PM); + for (unsigned i = 0, e = Extensions.size(); i != e; ++i) + if (Extensions[i].first == ETy) + Extensions[i].second(*this, PM); +} + +void +PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const { + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAliasAnalysisPass()); + PM.add(createBasicAliasAnalysisPass()); +} + +void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) { + addExtensionsToPM(EP_EarlyAsPossible, FPM); + + // Add LibraryInfo if we have some. + if (LibraryInfo) FPM.add(new TargetLibraryInfo(*LibraryInfo)); + + if (OptLevel == 0) return; + + addInitialAliasAnalysisPasses(FPM); + + FPM.add(createCFGSimplificationPass()); + FPM.add(createScalarReplAggregatesPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); +} + +void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { + // If all optimizations are disabled, just run the always-inline pass. + if (OptLevel == 0) { + if (Inliner) { + MPM.add(Inliner); + Inliner = 0; + } + return; + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) MPM.add(new TargetLibraryInfo(*LibraryInfo)); + + addInitialAliasAnalysisPasses(MPM); + + if (!DisableUnitAtATime) { + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + + MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + + MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + } + + // Start of CallGraph SCC passes. + if (!DisableUnitAtATime) + MPM.add(createPruneEHPass()); // Remove dead EH info + if (Inliner) { + MPM.add(Inliner); + Inliner = 0; + } + if (!DisableUnitAtATime) + MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs + if (OptLevel > 2) + MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + // Start of function pass. + // Break up aggregate allocas, using SSAUpdater. + MPM.add(createScalarReplAggregatesPass(-1, false)); + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + if (!DisableSimplifyLibCalls) + MPM.add(createSimplifyLibCallsPass()); // Library Call Optimizations + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Combine silly seq's + + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + MPM.add(createLoopRotatePass()); // Rotate Loop + MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); + MPM.add(createInstructionCombiningPass()); + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); // Unroll small loops + addExtensionsToPM(EP_LoopOptimizerEnd, MPM); + + if (OptLevel > 1) + MPM.add(createGVNPass()); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + MPM.add(createInstructionCombiningPass()); + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + + addExtensionsToPM(EP_ScalarOptimizerLate, MPM); + + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Clean up after everything. + + if (!DisableUnitAtATime) { + // FIXME: We shouldn't bother with this anymore. + MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes + + // GlobalOpt already deletes dead functions and globals, at -O3 try a + // late pass of GlobalDCE. It is capable of deleting dead cycles. + if (OptLevel > 2) + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + + if (OptLevel > 1) + MPM.add(createConstantMergePass()); // Merge dup global constants + } +} + +void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, + bool Internalize, + bool RunInliner) { + // Provide AliasAnalysis services for optimizations. + addInitialAliasAnalysisPasses(PM); + + // Now that composite has been compiled, scan through the module, looking + // for a main function. If main is defined, mark all other functions + // internal. + if (Internalize) + PM.add(createInternalizePass(true)); + + // Propagate constants at call sites into the functions they call. This + // opens opportunities for globalopt (and inlining) by substituting function + // pointers passed as arguments to direct uses of functions. + PM.add(createIPSCCPPass()); + + // Now that we internalized some globals, see if we can hack on them! + PM.add(createGlobalOptimizerPass()); + + // Linking modules together can lead to duplicated global constants, only + // keep one copy of each constant. + PM.add(createConstantMergePass()); + + // Remove unused arguments from functions. + PM.add(createDeadArgEliminationPass()); + + // Reduce the code after globalopt and ipsccp. Both can open up significant + // simplification opportunities, and both can propagate functions through + // function pointers. When this happens, we often have to resolve varargs + // calls, etc, so let instcombine do this. + PM.add(createInstructionCombiningPass()); + + // Inline small functions + if (RunInliner) + PM.add(createFunctionInliningPass()); + + PM.add(createPruneEHPass()); // Remove dead EH info. + + // Optimize globals again if we ran the inliner. + if (RunInliner) + PM.add(createGlobalOptimizerPass()); + PM.add(createGlobalDCEPass()); // Remove dead functions. + + // If we didn't decide to inline a function, check to see if we can + // transform it to pass arguments by value instead of by reference. + PM.add(createArgumentPromotionPass()); + + // The IPO passes may leave cruft around. Clean up after them. + PM.add(createInstructionCombiningPass()); + PM.add(createJumpThreadingPass()); + // Break up allocas + PM.add(createScalarReplAggregatesPass()); + + // Run a few AA driven optimizations here and now, to cleanup the code. + PM.add(createFunctionAttrsPass()); // Add nocapture. + PM.add(createGlobalsModRefPass()); // IP alias analysis. + + PM.add(createLICMPass()); // Hoist loop invariants. + PM.add(createGVNPass()); // Remove redundancies. + PM.add(createMemCpyOptPass()); // Remove dead memcpys. + // Nuke dead stores. + PM.add(createDeadStoreEliminationPass()); + + // Cleanup and simplify the code after the scalar optimizations. + PM.add(createInstructionCombiningPass()); + + PM.add(createJumpThreadingPass()); + + // Delete basic blocks, which optimization passes may have killed. + PM.add(createCFGSimplificationPass()); + + // Now that we have optimized the program, discard unreachable functions. + PM.add(createGlobalDCEPass()); +} + +LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void) { + PassManagerBuilder *PMB = new PassManagerBuilder(); + return wrap(PMB); +} + +void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) { + PassManagerBuilder *Builder = unwrap(PMB); + delete Builder; +} + +void +LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, + unsigned OptLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->OptLevel = OptLevel; +} + +void +LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, + unsigned SizeLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->SizeLevel = SizeLevel; +} + +void +LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableUnitAtATime = Value; +} + +void +LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableUnrollLoops = Value; +} + +void +LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableSimplifyLibCalls = Value; +} + +void +LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB, + unsigned Threshold) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->Inliner = createFunctionInliningPass(Threshold); +} + +void +LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + FunctionPassManager *FPM = unwrap<FunctionPassManager>(PM); + Builder->populateFunctionPassManager(*FPM); +} + +void +LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + PassManagerBase *MPM = unwrap(PM); + Builder->populateModulePassManager(*MPM); +} + +void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM, + bool Internalize, + bool RunInliner) { + PassManagerBuilder *Builder = unwrap(PMB); + PassManagerBase *LPM = unwrap(PM); + Builder->populateLTOPassManager(*LPM, Internalize, RunInliner); +} + diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index b7e63dc..cbb80f0 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -101,8 +101,9 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { // Check to see if this function performs an unwind or calls an // unwinding function. for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - if (CheckUnwind && isa<UnwindInst>(BB->getTerminator())) { - // Uses unwind! + if (CheckUnwind && (isa<UnwindInst>(BB->getTerminator()) || + isa<ResumeInst>(BB->getTerminator()))) { + // Uses unwind / resume! SCCMightUnwind = true; } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) { SCCMightReturn = true; diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 0fbaff1..b5caa9a 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -180,7 +180,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) { for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { StructType *STy = StructTypes[i]; - if (STy->isAnonymous() || STy->getName().empty()) continue; + if (STy->isLiteral() || STy->getName().empty()) continue; if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg")) continue; diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt index d070ccc..a46d5ad 100644 --- a/lib/Transforms/InstCombine/CMakeLists.txt +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -13,3 +13,11 @@ add_llvm_library(LLVMInstCombine InstCombineSimplifyDemanded.cpp InstCombineVectorOps.cpp ) + +add_llvm_library_dependencies(LLVMInstCombine + LLVMAnalysis + LLVMCore + LLVMSupport + LLVMTarget + LLVMTransformUtils + ) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 8257d6b..3808278 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -11,6 +11,7 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" @@ -103,7 +104,7 @@ public: // Instruction *visitAdd(BinaryOperator &I); Instruction *visitFAdd(BinaryOperator &I); - Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty); + Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty); Instruction *visitSub(BinaryOperator &I); Instruction *visitFSub(BinaryOperator &I); Instruction *visitMul(BinaryOperator &I); @@ -192,15 +193,16 @@ public: Instruction *visitExtractElementInst(ExtractElementInst &EI); Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); Instruction *visitExtractValueInst(ExtractValueInst &EV); + Instruction *visitLandingPadInst(LandingPadInst &LI); // visitInstruction - Specify what to return for unhandled instructions... Instruction *visitInstruction(Instruction &I) { return 0; } private: - bool ShouldChangeType(const Type *From, const Type *To) const; + bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V) const; - const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, + Type *FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); @@ -209,12 +211,13 @@ private: /// the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty); + Type *Ty); Instruction *visitCallSite(CallSite CS); Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD); bool transformConstExprCastCall(CallSite CS); - Instruction *transformCallThroughTrampoline(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); @@ -357,7 +360,7 @@ private: Instruction *SimplifyMemSet(MemSetInst *MI); - Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); + Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); }; diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c36a955..d10046c 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -188,7 +188,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateMul(LHS, AddOne(C2)); // A+B --> A|B iff A and B have no bits set in common. - if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { + if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); @@ -401,7 +401,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { Value *InstCombiner::EmitGEPOffset(User *GEP) { TargetData &TD = *getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); - const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); + Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); Value *Result = Constant::getNullValue(IntPtrTy); // If the GEP is inbounds, we know that none of the addressing operations will @@ -420,7 +420,7 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { if (OpC->isZero()) continue; // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); if (Size) @@ -460,7 +460,7 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. /// Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, - const Type *Ty) { + Type *Ty) { assert(TD && "Must have target data info for this"); // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 64ea36f..5e0bfe8 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1174,30 +1174,31 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { ((A == C && B == D) || (A == D && B == C))) return BinaryOperator::CreateXor(A, B); - if (Op0->hasOneUse() && - match(Op0, m_Xor(m_Value(A), m_Value(B)))) { - if (A == Op1) { // (A^B)&A -> A&(A^B) - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); - } else if (B == Op1) { // (A^B)&B -> B&(B^A) - cast<BinaryOperator>(Op0)->swapOperands(); - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); + // A&(A^B) => A & ~B + { + Value *tmpOp0 = Op0; + Value *tmpOp1 = Op1; + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1 || B == Op1 ) { + tmpOp1 = Op0; + tmpOp0 = Op1; + // Simplify below + } } - } - if (Op1->hasOneUse() && - match(Op1, m_Xor(m_Value(A), m_Value(B)))) { - if (B == Op0) { // B&(A^B) -> B&(B^A) - cast<BinaryOperator>(Op1)->swapOperands(); - std::swap(A, B); + if (tmpOp1->hasOneUse() && + match(tmpOp1, m_Xor(m_Value(A), m_Value(B)))) { + if (B == tmpOp0) { + std::swap(A, B); + } + // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if + // A is originally -1 (or a vector of -1 and undefs), then we enter + // an endless loop. By checking that A is non-constant we ensure that + // we will never get to the loop. + if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(B)); } - // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if - // A is originally -1 (or a vector of -1 and undefs), then we enter - // an endless loop. By checking that A is non-constant we ensure that - // we will never get to the loop. - if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); } // (A&((~A)|B)) -> A&B @@ -1224,7 +1225,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // fold (and (cast A), (cast B)) -> (cast (and A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) { - const Type *SrcTy = Op0C->getOperand(0)->getType(); + Type *SrcTy = Op0C->getOperand(0)->getType(); if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ? SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntOrIntVectorTy()) { @@ -2008,7 +2009,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { CastInst *Op1C = dyn_cast<CastInst>(Op1); if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); + Type *SrcTy = Op0C->getOperand(0)->getType(); if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntOrIntVectorTy()) { Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); @@ -2227,14 +2228,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (A == Op1) // (B|A)^B == (A|B)^B std::swap(A, B); if (B == Op1) // (A|B)^B == A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); + return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1)); } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){ if (A == Op1) // (A&B)^A -> (B&A)^A std::swap(A, B); if (B == Op1 && // (B&A)^A == ~B & A !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C - return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); + return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1); } } } @@ -2288,7 +2289,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? - const Type *SrcTy = Op0C->getOperand(0)->getType(); + Type *SrcTy = Op0C->getOperand(0)->getType(); if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() && // Only do this if the casts both really cause code to be generated. ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 537f2b3..c7b3ff8 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -22,8 +21,8 @@ using namespace llvm; /// getPromotedType - Return the specified type promoted as it would be to pass /// though a va_arg area. -static const Type *getPromotedType(const Type *Ty) { - if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { +static Type *getPromotedType(Type *Ty) { + if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { if (ITy->getBitWidth() < 32) return Type::getInt32Ty(Ty->getContext()); } @@ -64,7 +63,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned DstAddrSp = cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); - const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); + IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); @@ -76,18 +75,18 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // integer datatype. Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); if (StrippedDest != MI->getArgOperand(0)) { - const Type *SrcETy = cast<PointerType>(StrippedDest->getType()) + Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { // The SrcETy might be something like {{{double}}} or [1 x double]. Rip // down through these levels if so. while (!SrcETy->isSingleValueType()) { - if (const StructType *STy = dyn_cast<StructType>(SrcETy)) { + if (StructType *STy = dyn_cast<StructType>(SrcETy)) { if (STy->getNumElements() == 1) SrcETy = STy->getElementType(0); else break; - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { if (ATy->getNumElements() == 1) SrcETy = ATy->getElementType(); else @@ -142,7 +141,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { - const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. + Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. Value *Dest = MI->getDest(); unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); @@ -250,7 +249,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // We need target data for just about everything so depend on it. if (!TD) break; - const Type *ReturnTy = CI.getType(); + Type *ReturnTy = CI.getType(); uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL; // Get to the real allocated thing and offset as fast as possible. @@ -266,8 +265,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Get the current byte offset into the thing. Use the original // operand in case we're looking through a bitcast. SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); - Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), - Ops.data(), Ops.size()); + Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); Op1 = GEP->getPointerOperand()->stripPointerCasts(); @@ -300,7 +298,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } else if (CallInst *MI = extractMallocCall(Op1)) { // Get allocation size. - const Type* MallocType = getMallocAllocatedType(MI); + Type* MallocType = getMallocAllocatedType(MI); if (MallocType && MallocType->isSized()) if (Value *NElems = getMallocArraySize(MI, TD, true)) if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) @@ -355,7 +353,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::cttz: { // If all bits below the first known one are known zero, // this value is constant. - const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); // FIXME: Try to simplify vectors of integers. if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); @@ -374,7 +372,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ctlz: { // If all bits above the first known one are known zero, // this value is constant. - const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); // FIXME: Try to simplify vectors of integers. if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); @@ -392,7 +390,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::uadd_with_overflow: { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt Mask = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0); @@ -416,7 +414,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(LHS->getType()), ConstantInt::getTrue(II->getContext()) }; - const StructType *ST = cast<StructType>(II->getType()); + StructType *ST = cast<StructType>(II->getType()); Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } @@ -430,7 +428,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(LHS->getType()), ConstantInt::getFalse(II->getContext()) }; - const StructType *ST = cast<StructType>(II->getType()); + StructType *ST = cast<StructType>(II->getType()); Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } @@ -559,7 +557,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) { - const Type *OpPtrTy = + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr); @@ -570,7 +568,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { - const Type *OpPtrTy = + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); return new StoreInst(II->getArgOperand(1), Ptr); @@ -656,15 +654,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (ExtractedElts[Idx] == 0) { ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, - ConstantInt::get(Type::getInt32Ty(II->getContext()), - Idx&15, false), "tmp"); + Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + Builder->getInt32(Idx&15)); } // Insert this value into the result vector. Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], - ConstantInt::get(Type::getInt32Ty(II->getContext()), - i, false), "tmp"); + Builder->getInt32(i)); } return CastInst::Create(Instruction::BitCast, Result, CI.getType()); } @@ -733,9 +729,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } - // If the stack restore is in a return/unwind block and if there are no - // allocas or calls between the restore and the return, nuke the restore. - if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI))) + // If the stack restore is in a return, resume, or unwind block and if there + // are no allocas or calls between the restore and the return, nuke the + // restore. + if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI) || + isa<UnwindInst>(TI))) return EraseInstFromFunction(CI); break; } @@ -765,9 +763,9 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, if (!CS.paramHasAttr(ix, Attribute::ByVal)) return true; - const Type* SrcTy = + Type* SrcTy = cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); - const Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); + Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) return false; if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) @@ -820,6 +818,83 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { return Simplifier.NewInstruction; } +static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { + // Strip off at most one level of pointer casts, looking for an alloca. This + // is good enough in practice and simpler than handling any number of casts. + Value *Underlying = TrampMem->stripPointerCasts(); + if (Underlying != TrampMem && + (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem)) + return 0; + if (!isa<AllocaInst>(Underlying)) + return 0; + + IntrinsicInst *InitTrampoline = 0; + for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end(); + I != E; I++) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I); + if (!II) + return 0; + if (II->getIntrinsicID() == Intrinsic::init_trampoline) { + if (InitTrampoline) + // More than one init_trampoline writes to this value. Give up. + return 0; + InitTrampoline = II; + continue; + } + if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) + // Allow any number of calls to adjust.trampoline. + continue; + return 0; + } + + // No call to init.trampoline found. + if (!InitTrampoline) + return 0; + + // Check that the alloca is being used in the expected way. + if (InitTrampoline->getOperand(0) != TrampMem) + return 0; + + return InitTrampoline; +} + +static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, + Value *TrampMem) { + // Visit all the previous instructions in the basic block, and try to find a + // init.trampoline which has a direct path to the adjust.trampoline. + for (BasicBlock::iterator I = AdjustTramp, + E = AdjustTramp->getParent()->begin(); I != E; ) { + Instruction *Inst = --I; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::init_trampoline && + II->getOperand(0) == TrampMem) + return II; + if (Inst->mayWriteToMemory()) + return 0; + } + return 0; +} + +// Given a call to llvm.adjust.trampoline, find and return the corresponding +// call to llvm.init.trampoline if the call to the trampoline can be optimized +// to a direct call to a function. Otherwise return NULL. +// +static IntrinsicInst *FindInitTrampoline(Value *Callee) { + Callee = Callee->stripPointerCasts(); + IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); + if (!AdjustTramp || + AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) + return 0; + + Value *TrampMem = AdjustTramp->getOperand(0); + + if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem)) + return IT; + if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) + return IT; + return 0; +} + // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { @@ -879,13 +954,11 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { return EraseInstFromFunction(*CS.getInstruction()); } - if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee)) - if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0))) - if (In->getIntrinsicID() == Intrinsic::init_trampoline) - return transformCallThroughTrampoline(CS); + if (IntrinsicInst *II = FindInitTrampoline(Callee)) + return transformCallThroughTrampoline(CS, II); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); if (FTy->isVarArg()) { int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1); // See if we can optimize any arguments passed through the varargs area of @@ -934,9 +1007,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // would cause a type conversion of one of our arguments, change this call to // be a direct call with arguments casted to the appropriate types. // - const FunctionType *FT = Callee->getFunctionType(); - const Type *OldRetTy = Caller->getType(); - const Type *NewRetTy = FT->getReturnType(); + FunctionType *FT = Callee->getFunctionType(); + Type *OldRetTy = Caller->getType(); + Type *NewRetTy = FT->getReturnType(); if (NewRetTy->isStructTy()) return false; // TODO: Handle multiple return values. @@ -982,8 +1055,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { CallSite::arg_iterator AI = CS.arg_begin(); for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); - const Type *ActTy = (*AI)->getType(); + Type *ParamTy = FT->getParamType(i); + Type *ActTy = (*AI)->getType(); if (!CastInst::isCastable(ActTy, ParamTy)) return false; // Cannot transform this parameter value. @@ -995,11 +1068,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) { - const PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); + PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) return false; - const Type *CurElTy = cast<PointerType>(ActTy)->getElementType(); + Type *CurElTy = cast<PointerType>(ActTy)->getElementType(); if (TD->getTypeAllocSize(CurElTy) != TD->getTypeAllocSize(ParamPTy->getElementType())) return false; @@ -1023,7 +1096,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If the callee is just a declaration, don't change the varargsness of the // call. We don't want to introduce a varargs call where one doesn't // already exist. - const PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); + PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg()) return false; } @@ -1062,13 +1135,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { AI = CS.arg_begin(); for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); + Type *ParamTy = FT->getParamType(i); if ((*AI)->getType() == ParamTy) { Args.push_back(*AI); } else { Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, ParamTy, false); - Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp")); + Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy)); } // Add any parameter attributes. @@ -1089,12 +1162,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } else { // Add all of the arguments in their promoted form to the arg list. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { - const Type *PTy = getPromotedType((*AI)->getType()); + Type *PTy = getPromotedType((*AI)->getType()); if (PTy != (*AI)->getType()) { // Must promote to pass through va_arg area! Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, PTy, false); - Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp")); + Args.push_back(Builder->CreateCast(opcode, *AI, PTy)); } else { Args.push_back(*AI); } @@ -1138,13 +1211,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!NV->getType()->isVoidTy()) { Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, OldRetTy, false); - NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); + NV = NC = CastInst::Create(opcode, NC, OldRetTy); NC->setDebugLoc(Caller->getDebugLoc()); // If this is an invoke instruction, we should insert it after the first // non-phi, instruction in the normal successor block. if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { - BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI(); + BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt(); InsertNewInstBefore(NC, *I); } else { // Otherwise, it's a call, just insert cast right after the call. @@ -1163,13 +1236,16 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return true; } -// transformCallThroughTrampoline - Turn a call to a function created by the -// init_trampoline intrinsic into a direct call to the underlying function. +// transformCallThroughTrampoline - Turn a call to a function created by +// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the +// underlying function. // -Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { +Instruction * +InstCombiner::transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp) { Value *Callee = CS.getCalledValue(); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); const AttrListPtr &Attrs = CS.getAttributes(); // If the call already has the 'nest' attribute somewhere then give up - @@ -1177,12 +1253,12 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { if (Attrs.hasAttrSomewhere(Attribute::Nest)) return 0; - IntrinsicInst *Tramp = - cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); + assert(Tramp && + "transformCallThroughTrampoline called with incorrect CallSite."); Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); - const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); - const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); + PointerType *NestFPTy = cast<PointerType>(NestF->getType()); + FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); const AttrListPtr &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 82c734e..f10e48a 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; @@ -79,14 +80,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // This requires TargetData to get the alloca alignment and size information. if (!TD) return 0; - const PointerType *PTy = cast<PointerType>(CI.getType()); + PointerType *PTy = cast<PointerType>(CI.getType()); BuilderTy AllocaBuilder(*Builder); AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); // Get the type really allocated and the type casted to. - const Type *AllocElTy = AI.getAllocatedType(); - const Type *CastElTy = PTy->getElementType(); + Type *AllocElTy = AI.getAllocatedType(); + Type *CastElTy = PTy->getElementType(); if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); @@ -121,13 +122,13 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, } else { Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale); // Insert before the alloca, not before the cast. - Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); + Amt = AllocaBuilder.CreateMul(Amt, NumElements); } if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { Value *Off = ConstantInt::get(AI.getArraySize()->getType(), Offset, true); - Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); + Amt = AllocaBuilder.CreateAdd(Amt, Off); } AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); @@ -151,7 +152,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, /// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, +Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); @@ -229,12 +230,12 @@ static Instruction::CastOps isEliminableCastPair( const CastInst *CI, ///< The first cast instruction unsigned opcode, ///< The opcode of the second cast instruction - const Type *DstTy, ///< The target type for the second cast instruction + Type *DstTy, ///< The target type for the second cast instruction TargetData *TD ///< The target data for pointer size ) { - const Type *SrcTy = CI->getOperand(0)->getType(); // A from above - const Type *MidTy = CI->getType(); // B from above + Type *SrcTy = CI->getOperand(0)->getType(); // A from above + Type *MidTy = CI->getType(); // B from above // Get the opcodes of the two Cast instructions Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); @@ -260,7 +261,7 @@ isEliminableCastPair( /// the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, - const Type *Ty) { + Type *Ty) { // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa<Constant>(V)) return false; @@ -324,7 +325,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateTruncated(Value *V, const Type *Ty) { +static bool CanEvaluateTruncated(Value *V, Type *Ty) { // We can always evaluate constants in another type. if (isa<Constant>(V)) return true; @@ -332,7 +333,7 @@ static bool CanEvaluateTruncated(Value *V, const Type *Ty) { Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - const Type *OrigTy = V->getType(); + Type *OrigTy = V->getType(); // If this is an extension from the dest type, we can eliminate it, even if it // has multiple uses. @@ -435,7 +436,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { return &CI; Value *Src = CI.getOperand(0); - const Type *DestTy = CI.getType(), *SrcTy = Src->getType(); + Type *DestTy = CI.getType(), *SrcTy = Src->getType(); // Attempt to truncate the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -456,7 +457,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector. if (DestTy->getScalarSizeInBits() == 1) { Constant *One = ConstantInt::get(Src->getType(), 1); - Src = Builder->CreateAnd(Src, One, "tmp"); + Src = Builder->CreateAnd(Src, One); Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } @@ -518,7 +519,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, In->getType()->getScalarSizeInBits()-1); In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp"); + In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/); if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { Constant *One = ConstantInt::get(In->getType(), 1); @@ -572,7 +573,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if ((Op1CV != 0) == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, "tmp"); + In = Builder->CreateXor(In, One); } if (CI.getType() == In->getType()) @@ -586,7 +587,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // It is also profitable to transform icmp eq into not(xor(A, B)) because that // may lead to additional simplifications. if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { - if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { + if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { uint32_t BitWidth = ITy->getBitWidth(); Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); @@ -644,7 +645,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, /// clear the top bits anyway, doing this has no extra cost. /// /// This function works on both vectors and scalars. -static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) { +static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { BitsToClear = 0; if (isa<Constant>(V)) return true; @@ -758,7 +759,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { return &CI; Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + Type *SrcTy = Src->getType(), *DestTy = CI.getType(); // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -820,7 +821,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { AndValue)); } if (SrcSize > DstSize) { - Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp"); + Value *Trunc = Builder->CreateTrunc(A, CI.getType()); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), @@ -867,7 +868,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *TI0 = TI->getOperand(0); if (TI0->getType() == CI.getType()) { Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); - Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp"); + Value *NewAnd = Builder->CreateAnd(TI0, ZC); return BinaryOperator::CreateXor(NewAnd, ZC); } } @@ -900,7 +901,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { Op0->getType()->getScalarSizeInBits()-1); Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit"); if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp"); + In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/); if (Pred == ICmpInst::ICMP_SGT) In = Builder->CreateNot(In, In->getName()+".not"); @@ -965,10 +966,10 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { } // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed. - if (const VectorType *VTy = dyn_cast<VectorType>(CI.getType())) { + if (VectorType *VTy = dyn_cast<VectorType>(CI.getType())) { if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) && Op0->getType() == CI.getType()) { - const Type *EltTy = VTy->getElementType(); + Type *EltTy = VTy->getElementType(); // splat the shift constant to a constant vector. Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); @@ -988,7 +989,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateSExtd(Value *V, const Type *Ty) { +static bool CanEvaluateSExtd(Value *V, Type *Ty) { assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() && "Can't sign extend type to a smaller type"); // If this is a constant, it can be trivially promoted. @@ -1063,7 +1064,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { return &CI; Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + Type *SrcTy = Src->getType(), *DestTy = CI.getType(); // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the @@ -1192,7 +1193,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: - const Type *SrcTy = OpI->getType(); + Type *SrcTy = OpI->getType(); Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); if (LHSTrunc->getType() != SrcTy && @@ -1306,13 +1307,13 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { if (CI.getOperand(0)->getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { Value *P = Builder->CreateTrunc(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), "tmp"); + TD->getIntPtrType(CI.getContext())); return new IntToPtrInst(P, CI.getType()); } if (CI.getOperand(0)->getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { Value *P = Builder->CreateZExt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), "tmp"); + TD->getIntPtrType(CI.getContext())); return new IntToPtrInst(P, CI.getType()); } } @@ -1351,7 +1352,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // Get the base pointer input of the bitcast, and the type it points to. Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); - const Type *GEPIdxTy = + Type *GEPIdxTy = cast<PointerType>(OrigBase->getType())->getElementType(); SmallVector<Value*, 8> NewIndices; if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) { @@ -1359,9 +1360,8 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // and bitcast the result. This eliminates one bitcast, potentially // two. Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, - NewIndices.begin(), NewIndices.end()) : - Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); + Builder->CreateInBoundsGEP(OrigBase, NewIndices) : + Builder->CreateGEP(OrigBase, NewIndices); NGEP->takeName(GEP); if (isa<BitCastInst>(CI)) @@ -1382,14 +1382,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { if (TD) { if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), - "tmp"); + TD->getIntPtrType(CI.getContext())); return new TruncInst(P, CI.getType()); } if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), - "tmp"); + TD->getIntPtrType(CI.getContext())); return new ZExtInst(P, CI.getType()); } } @@ -1402,12 +1400,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { /// replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. -static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, +static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, InstCombiner &IC) { // We can only do this optimization if the output is a multiple of the input // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. - const VectorType *SrcTy = cast<VectorType>(InVal->getType()); + VectorType *SrcTy = cast<VectorType>(InVal->getType()); if (SrcTy->getElementType() != DestTy->getElementType()) { // The input types don't need to be identical, but for now they must be the @@ -1427,7 +1425,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, // size of the input. SmallVector<Constant*, 16> ShuffleMask; Value *V2; - const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); + IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); if (SrcTy->getNumElements() > DestTy->getNumElements()) { // If we're shrinking the number of elements, just shuffle in the low @@ -1453,11 +1451,11 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask)); } -static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) { +static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) { return Value % Ty->getPrimitiveSizeInBits() == 0; } -static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) { +static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { return Value / Ty->getPrimitiveSizeInBits(); } @@ -1471,7 +1469,7 @@ static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) { /// filling in Elements with the elements found here. static bool CollectInsertionElements(Value *V, unsigned ElementIndex, SmallVectorImpl<Value*> &Elements, - const Type *VecEltTy) { + Type *VecEltTy) { // Undef values never contribute useful bits to the result. if (isa<UndefValue>(V)) return true; @@ -1508,7 +1506,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(), C->getType()->getPrimitiveSizeInBits())); unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); - const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); + Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); for (unsigned i = 0; i != NumElts; ++i) { Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), @@ -1572,7 +1570,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, /// Into two insertelements that do "buildvector{%inc, %inc5}". static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombiner &IC) { - const VectorType *DestVecTy = cast<VectorType>(CI.getType()); + VectorType *DestVecTy = cast<VectorType>(CI.getType()); Value *IntInput = CI.getOperand(0); SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); @@ -1599,7 +1597,7 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// bitcast. The various long double bitcasts can't get in here. static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ Value *Src = CI.getOperand(0); - const Type *DestTy = CI.getType(); + Type *DestTy = CI.getType(); // If this is a bitcast from int to float, check to see if the int is an // extraction from a vector. @@ -1607,7 +1605,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ // bitcast(trunc(bitcast(somevector))) if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && isa<VectorType>(VecInput->getType())) { - const VectorType *VecTy = cast<VectorType>(VecInput->getType()); + VectorType *VecTy = cast<VectorType>(VecInput->getType()); unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) { @@ -1628,7 +1626,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), m_ConstantInt(ShAmt)))) && isa<VectorType>(VecInput->getType())) { - const VectorType *VecTy = cast<VectorType>(VecInput->getType()); + VectorType *VecTy = cast<VectorType>(VecInput->getType()); unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 && ShAmt->getZExtValue() % DestWidth == 0) { @@ -1651,18 +1649,18 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(); - const Type *DestTy = CI.getType(); + Type *SrcTy = Src->getType(); + Type *DestTy = CI.getType(); // Get rid of casts from one type to the same type. These are useless and can // be replaced by the operand. if (DestTy == Src->getType()) return ReplaceInstUsesWith(CI, Src); - if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { - const PointerType *SrcPTy = cast<PointerType>(SrcTy); - const Type *DstElTy = DstPTy->getElementType(); - const Type *SrcElTy = SrcPTy->getElementType(); + if (PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { + PointerType *SrcPTy = cast<PointerType>(SrcTy); + Type *DstElTy = DstPTy->getElementType(); + Type *SrcElTy = SrcPTy->getElementType(); // If the address spaces don't match, don't eliminate the bitcast, which is // required for changing types. @@ -1693,7 +1691,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); - return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end()); + return GetElementPtrInst::CreateInBounds(Src, Idxs); } } @@ -1702,7 +1700,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) return I; - if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { + if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); return InsertElementInst::Create(UndefValue::get(DestTy), Elem, @@ -1731,7 +1729,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } - if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { + if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { Value *Elem = Builder->CreateExtractElement(Src, diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index c78760b..bb1cbfa 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" @@ -56,7 +57,7 @@ static bool AddWithOverflow(Constant *&Result, Constant *In1, Constant *In2, bool IsSigned = false) { Result = ConstantExpr::getAdd(In1, In2); - if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); if (HasAddOverflow(ExtractElement(Result, Idx), @@ -78,7 +79,7 @@ static bool HasSubOverflow(ConstantInt *Result, bool IsSigned) { if (!IsSigned) return Result->getValue().ugt(In1->getValue()); - + if (In2->isNegative()) return Result->getValue().slt(In1->getValue()); @@ -91,7 +92,7 @@ static bool SubWithOverflow(Constant *&Result, Constant *In1, Constant *In2, bool IsSigned = false) { Result = ConstantExpr::getSub(In1, In2); - if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); if (HasSubOverflow(ExtractElement(Result, Idx), @@ -128,7 +129,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, // True if LHS u> RHS and RHS == high-bit-mask - 1 TrueIfSigned = true; return RHS->isMaxValue(true); - case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; return RHS->getValue().isSignBit(); @@ -143,7 +144,7 @@ static bool isHighOnes(const ConstantInt *CI) { return (~CI->getValue() + 1).isPowerOf2(); } -/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a +/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a /// set of known zero and one bits, compute the maximum and minimum values that /// could have the specified known zero and known one bits, returning them in /// min/max. @@ -160,7 +161,7 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, // bit if it is unknown. Min = KnownOne; Max = KnownOne|UnknownBits; - + if (UnknownBits.isNegative()) { // Sign bit is unknown Min.setBit(Min.getBitWidth()-1); Max.clearBit(Max.getBitWidth()-1); @@ -179,7 +180,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, KnownZero.getBitWidth() == Max.getBitWidth() && "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); APInt UnknownBits = ~(KnownZero|KnownOne); - + // The minimum value is when the unknown bits are all zeros. Min = KnownOne; // The maximum value is when the unknown bits are all ones. @@ -201,10 +202,10 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { // We need TD information to know the pointer size unless this is inbounds. if (!GEP->isInBounds() && TD == 0) return 0; - + ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer()); if (Init == 0 || Init->getNumOperands() > 1024) return 0; - + // There are many forms of this optimization we can handle, for now, just do // the simple index into a single-dimensional array. // @@ -219,31 +220,31 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // type they index. Collect the indices. This is typically for arrays of // structs. SmallVector<unsigned, 4> LaterIndices; - - const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); + + Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (Idx == 0) return 0; // Variable index. - + uint64_t IdxVal = Idx->getZExtValue(); if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. - - if (const StructType *STy = dyn_cast<StructType>(EltTy)) + + if (StructType *STy = dyn_cast<StructType>(EltTy)) EltTy = STy->getElementType(IdxVal); - else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { + else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { if (IdxVal >= ATy->getNumElements()) return 0; EltTy = ATy->getElementType(); } else { return 0; // Unknown type. } - + LaterIndices.push_back(IdxVal); } - + enum { Overdefined = -3, Undefined = -2 }; // Variables for our state machines. - + // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form // "i == 47 | i == 87", where 47 is the first index the condition is true for, // and 87 is the second (and last) index. FirstTrueElement is -2 when @@ -254,7 +255,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the // form "i != 47 & i != 87". Same state transitions as for true elements. int FirstFalseElement = Undefined, SecondFalseElement = Undefined; - + /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these /// define a state machine that triggers for ranges of values that the index /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. @@ -262,25 +263,25 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, /// index in the range (inclusive). We use -2 for undefined here because we /// use relative comparisons and don't want 0-1 to match -1. int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; - + // MagicBitvector - This is a magic bitvector where we set a bit if the // comparison is true for element 'i'. If there are 64 elements or less in // the array, this will fully represent all the comparison results. uint64_t MagicBitvector = 0; - - + + // Scan the array and see if one of our patterns matches. Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { Constant *Elt = Init->getOperand(i); - + // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); - + // If the element is masked, handle it. if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); - + // Find out if the comparison would be true or false for the i'th element. Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, CompareRHS, TD); @@ -294,15 +295,15 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, FalseRangeEnd = i; continue; } - + // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. if (!isa<ConstantInt>(C)) return 0; - + // Otherwise, we know if the comparison is true or false for this element, // update our state machines. bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); - + // State machine for single/double/range index comparison. if (IsTrueForElt) { // Update the TrueElement state machine. @@ -314,7 +315,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, SecondTrueElement = i; else SecondTrueElement = Overdefined; - + // Update range state machine. if (TrueRangeEnd == (int)i-1) TrueRangeEnd = i; @@ -331,7 +332,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, SecondFalseElement = i; else SecondFalseElement = Overdefined; - + // Update range state machine. if (FalseRangeEnd == (int)i-1) FalseRangeEnd = i; @@ -339,12 +340,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, FalseRangeEnd = Overdefined; } } - - + + // If this element is in range, update our magic bitvector. if (i < 64 && IsTrueForElt) MagicBitvector |= 1ULL << i; - + // If all of our states become overdefined, bail out early. Since the // predicate is expensive, only check it every 8 elements. This is only // really useful for really huge arrays. @@ -364,20 +365,20 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, if (!GEP->isInBounds() && Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits()) Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext())); - + // If the comparison is only true for one or two elements, emit direct // comparisons. if (SecondTrueElement != Overdefined) { // None true -> false. if (FirstTrueElement == Undefined) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext())); - + Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); - + // True for one element -> 'i == 47'. if (SecondTrueElement == Undefined) return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); - + // True for two elements -> 'i == 47 | i == 72'. Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); @@ -391,36 +392,36 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // None false -> true. if (FirstFalseElement == Undefined) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext())); - + Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); // False for one element -> 'i != 47'. if (SecondFalseElement == Undefined) return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); - + // False for two elements -> 'i != 47 & i != 72'. Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } - + // If the comparison can be replaced with a range comparison for the elements // where it is true, emit the range check. if (TrueRangeEnd != Overdefined) { assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); - + // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). if (FirstTrueElement) { Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); Idx = Builder->CreateAdd(Idx, Offs); } - + Value *End = ConstantInt::get(Idx->getType(), TrueRangeEnd-FirstTrueElement+1); return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); } - + // False range check. if (FalseRangeEnd != Overdefined) { assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); @@ -429,19 +430,19 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); Idx = Builder->CreateAdd(Idx, Offs); } - + Value *End = ConstantInt::get(Idx->getType(), FalseRangeEnd-FirstFalseElement); return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); } - - + + // If a 32-bit or 64-bit magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 if (Init->getNumOperands() <= 32 || (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) { - const Type *Ty; + Type *Ty; if (Init->getNumOperands() <= 32) Ty = Type::getInt32Ty(Init->getContext()); else @@ -451,7 +452,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); } - + return 0; } @@ -465,11 +466,11 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, /// to generate the first by knowing that pointer arithmetic doesn't overflow. /// /// If we can't emit an optimized form for this expression, this returns null. -/// +/// static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { TargetData &TD = *IC.getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); - + // Check to see if this gep only has a single variable index. If so, and if // any constant indices are a multiple of its scale, then we can compute this // in terms of the scale of the variable index. For example, if the GEP @@ -481,9 +482,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; - + // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); @@ -494,33 +495,33 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { break; } } - + // If there are no variable indices, we must have a constant offset, just // evaluate it the general way. if (i == e) return 0; - + Value *VariableIdx = GEP->getOperand(i); // Determine the scale factor of the variable element. For example, this is // 4 if the variable index is into an array of i32. uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); - + // Verify that there are no other variable indices. If so, emit the hard way. for (++i, ++GTI; i != e; ++i, ++GTI) { ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (!CI) return 0; - + // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; - + // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); Offset += Size*CI->getSExtValue(); } } - + // Okay, we know we have a single variable index, which must be a // pointer/array/vector index. If there is no offset, life is simple, return // the index. @@ -530,19 +531,19 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // we don't need to bother extending: the extension won't affect where the // computation crosses zero. if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { - const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); } return VariableIdx; } - + // Otherwise, there is an index. The computation we will do will be modulo // the pointer size, so get it. uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - + Offset &= PtrSizeMask; VariableScale &= PtrSizeMask; - + // To do this transformation, any constant index must be a multiple of the // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a @@ -550,9 +551,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { int64_t NewOffs = Offset / (int64_t)VariableScale; if (Offset != NewOffs*(int64_t)VariableScale) return 0; - + // Okay, we can do this evaluation. Start by converting the index to intptr. - const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); if (VariableIdx->getType() != IntPtrTy) VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, true /*Signed*/); @@ -576,7 +577,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // know pointers can't overflow since the gep is inbounds. See if we can // output an optimized form. Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); - + // If not, synthesize the offset the hard way. if (Offset == 0) Offset = EmitGEPOffset(GEPLHS); @@ -686,7 +687,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, bool isTrue = ICmpInst::isTrueWhenEqual(Pred); return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); } - + // (X+4) == X -> false. if (Pred == ICmpInst::ICMP_EQ) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); @@ -698,22 +699,22 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, // so the values can never be equal. Similarly for all other "or equals" // operators. - + // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { - Value *R = + Value *R = ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); return new ICmpInst(ICmpInst::ICMP_UGT, X, R); } - + // (X+1) >u X --> X <u (0-1) --> X != 255 // (X+2) >u X --> X <u (0-2) --> X <u 254 // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); - + unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); ConstantInt *SMax = ConstantInt::get(X->getContext(), APInt::getSignedMaxValue(BitWidth)); @@ -726,14 +727,14 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); - + // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 - + assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); @@ -745,14 +746,14 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS) { ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1)); const APInt &CmpRHSV = CmpRHS->getValue(); - - // FIXME: If the operand types don't match the type of the divide + + // FIXME: If the operand types don't match the type of the divide // then don't attempt this transform. The code below doesn't have the // logic to deal with a signed divide and an unsigned compare (and - // vice versa). This is because (x /s C1) <s C2 produces different + // vice versa). This is because (x /s C1) <s C2 produces different // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even - // (x /u C1) <u C2. Simply casting the operands and result won't - // work. :( The if statement below tests that condition and bails + // (x /u C1) <u C2. Simply casting the operands and result won't + // work. :( The if statement below tests that condition and bails // if it finds it. bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) @@ -768,14 +769,14 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, } // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and - // C2 (CI). By solving for X we can turn this into a range check - // instead of computing a divide. + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); // Determine if the product overflows by seeing if the product is // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. + // as in the LHS instruction that we're folding. bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; @@ -785,9 +786,9 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, /// If the division is known to be exact, then there is no remainder from the /// divide, so the covered range size is unit, otherwise it is the divisor. ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS; - + // Figure out the interval that is being checked. For example, a comparison - // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). // Compute this interval based on the constants involved and the signedness of // the compare/divide. This computes a half-open interval, keeping track of // whether either value in the interval overflows. After analysis each @@ -805,7 +806,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // to the same result value. HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); } - + } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. if (CmpRHSV == 0) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) @@ -848,7 +849,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, if (!HiOverflow) HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true); } - + // Dividing by a negative swaps the condition. LT <-> GT Pred = ICmpInst::getSwappedPredicate(Pred); } @@ -901,7 +902,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, ConstantInt *ShAmt) { const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue(); - + // Check that the shift amount is in range. If not, don't perform // undefined shifts. When the shift is visited it will be // simplified. @@ -909,48 +910,48 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); if (ShAmtVal >= TypeBits || ShAmtVal == 0) return 0; - + if (!ICI.isEquality()) { // If we have an unsigned comparison and an ashr, we can't simplify this. // Similarly for signed comparisons with lshr. if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) return 0; - + // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv // by a power of 2. Since we already have logic to simplify these, // transform to div and then simplify the resultant comparison. if (Shr->getOpcode() == Instruction::AShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1)) return 0; - + // Revisit the shift (to delete it). Worklist.Add(Shr); - + Constant *DivCst = ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); - + Value *Tmp = Shr->getOpcode() == Instruction::AShr ? Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) : Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()); - + ICI.setOperand(0, Tmp); - + // If the builder folded the binop, just return it. BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp); if (TheDiv == 0) return &ICI; - + // Otherwise, fold this div/compare. assert(TheDiv->getOpcode() == Instruction::SDiv || TheDiv->getOpcode() == Instruction::UDiv); - + Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst)); assert(Res && "This div/cst should have folded!"); return Res; } - - + + // If we are comparing against bits always shifted out, the // comparison cannot succeed. APInt Comp = CmpRHSV << ShAmtVal; @@ -959,25 +960,25 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, Comp = Comp.lshr(ShAmtVal); else Comp = Comp.ashr(ShAmtVal); - + if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero. bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE); return ReplaceInstUsesWith(ICI, Cst); } - + // Otherwise, check to see if the bits shifted out are known to be zero. // If so, we can compare against the unshifted value: // (X & 4) >> 1 == 2 --> (X & 4) == 4. if (Shr->hasOneUse() && Shr->isExact()) return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS); - + if (Shr->hasOneUse()) { // Otherwise strength reduce the shift into an and. APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); Constant *Mask = ConstantInt::get(ICI.getContext(), Val); - + Value *And = Builder->CreateAnd(Shr->getOperand(0), Mask, Shr->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); @@ -992,7 +993,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHSI, ConstantInt *RHS) { const APInt &RHSV = RHS->getValue(); - + switch (LHSI->getOpcode()) { case Instruction::Trunc: if (ICI.isEquality() && LHSI->hasOneUse()) { @@ -1003,7 +1004,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); - + // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { // Pull in the high bits from known-ones set. @@ -1014,7 +1015,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } break; - + case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { // If this is a comparison that tests the signbit (X < 0) or (x > -1), @@ -1022,7 +1023,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { Value *CompareVal = LHSI->getOperand(0); - + // If the sign bit of the XorCST is not set, there is no change to // the operation, just stop using the Xor. if (!XorCST->isNegative()) { @@ -1030,13 +1031,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Worklist.Add(LHSI); return &ICI; } - + // Was the old condition true if the operand is positive? bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; - + // If so, the new one isn't. isTrueIfPositive ^= true; - + if (isTrueIfPositive) return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, SubOne(RHS)); @@ -1075,13 +1076,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) && LHSI->getOperand(0)->hasOneUse()) { ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1)); - + // If the LHS is an AND of a truncating cast, we can widen the // and/compare to be the input width without changing the value // produced, eliminating a cast. if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) { // We can do this transformation if either the AND constant does not - // have its sign bit set or if it is an equality comparison. + // have its sign bit set or if it is an equality comparison. // Extending a relational comparison when we're checking the sign // bit would not work. if (ICI.isEquality() || @@ -1098,7 +1099,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // If the LHS is an AND of a zext, and we have an equality compare, we can // shrink the and/compare to the smaller type, eliminating the cast. if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) { - const IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); + IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); // Make sure we don't compare the upper bits, SimplifyDemandedBits // should fold the icmp to true/false in that case. if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { @@ -1118,12 +1119,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); if (Shift && !Shift->isShift()) Shift = 0; - + ConstantInt *ShAmt; ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0; - const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. - const Type *AndTy = AndCST->getType(); // Type of the and. - + Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. + Type *AndTy = AndCST->getType(); // Type of the and. + // We can fold this as long as we can't shift unknown bits // into the mask. This can only happen with signed shift // rights, as they sign-extend. @@ -1134,20 +1135,20 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // of the bits shifted in could be tested after the mask. uint32_t TyBits = Ty->getPrimitiveSizeInBits(); int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); - + uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); - if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & + if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & AndCST->getValue()) == 0) CanFold = true; } - + if (CanFold) { Constant *NewCst; if (Shift->getOpcode() == Instruction::Shl) NewCst = ConstantExpr::getLShr(RHS, ShAmt); else NewCst = ConstantExpr::getShl(RHS, ShAmt); - + // Check to see if we are shifting out any of the bits being // compared. if (ConstantExpr::get(Shift->getOpcode(), @@ -1175,7 +1176,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } } - + // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is // preferable because it allows the C<<Y expression to be hoisted out // of a loop if Y is invariant and X is not. @@ -1185,21 +1186,21 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Compute C << Y. Value *NS; if (Shift->getOpcode() == Instruction::LShr) { - NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp"); + NS = Builder->CreateShl(AndCST, Shift->getOperand(1)); } else { // Insert a logical shift. - NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp"); + NS = Builder->CreateLShr(AndCST, Shift->getOperand(1)); } - + // Compute X & (C << Y). - Value *NewAnd = + Value *NewAnd = Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); - + ICI.setOperand(0, NewAnd); return &ICI; } } - + // Try to optimize things like "A[i]&42 == 0" to index computations. if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) { if (GetElementPtrInst *GEP = @@ -1234,19 +1235,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; } - + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); if (!ShAmt) break; - + uint32_t TypeBits = RHSV.getBitWidth(); - + // Check that the shift amount is in range. If not, don't perform // undefined shifts. When the shift is visited it will be // simplified. if (ShAmt->uge(TypeBits)) break; - + if (ICI.isEquality()) { // If we are comparing against bits always shifted out, the // comparison cannot succeed. @@ -1259,34 +1260,34 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE); return ReplaceInstUsesWith(ICI, Cst); } - + // If the shift is NUW, then it is just shifting out zeros, no need for an // AND. if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap()) return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantExpr::getLShr(RHS, ShAmt)); - + if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); Constant *Mask = - ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, + ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal)); - + Value *And = Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, ConstantExpr::getLShr(RHS, ShAmt)); } } - + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. bool TrueIfSigned = false; if (LHSI->hasOneUse() && isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { // (X << 31) <s 0 --> (X&1) != 0 Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(), - APInt::getOneBitSet(TypeBits, + APInt::getOneBitSet(TypeBits, TypeBits-ShAmt->getZExtValue()-1)); Value *And = Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); @@ -1295,7 +1296,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; } - + case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) case Instruction::AShr: { // Handle equality comparisons of shift-by-constant. @@ -1312,13 +1313,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; } - + case Instruction::SDiv: case Instruction::UDiv: // Fold: icmp pred ([us]div X, C1), C2 -> range test - // Fold this div into the comparison, producing a range check. - // Determine, based on the divide type, what the range is being - // checked. If there is an overflow on the low or high side, remember + // Fold this div into the comparison, producing a range check. + // Determine, based on the divide type, what the range is being + // checked. If there is an overflow on the low or high side, remember // it, otherwise compute the range [low, hi) bounding the new value. // See: InsertRangeTest above for the kinds of replacements possible. if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) @@ -1357,12 +1358,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; } - + // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. if (ICI.isEquality()) { bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - - // If the first operand is (add|sub|and|or|xor|rem) with a constant, and + + // If the first operand is (add|sub|and|or|xor|rem) with a constant, and // the second operand is a constant, simplify a bit. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) { switch (BO->getOpcode()) { @@ -1389,7 +1390,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); - + if (Value *NegVal = dyn_castNegVal(BOp1)) return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); if (Value *NegVal = dyn_castNegVal(BOp0)) @@ -1432,11 +1433,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Constant *NotCI = ConstantExpr::getNot(RHS); if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) return ReplaceInstUsesWith(ICI, - ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + ConstantInt::get(Type::getInt1Ty(ICI.getContext()), isICMP_NE)); } break; - + case Instruction::And: if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { // If bits are being compared against that are and'd out, then the @@ -1445,7 +1446,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::getInt1Ty(ICI.getContext()), isICMP_NE)); - + // If we have ((X & C) == C), turn it into ((X & C) != 0). if (RHS == BOC && RHSV.isPowerOf2()) return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : @@ -1460,16 +1461,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (BOC->getValue().isSignBit()) { Value *X = BO->getOperand(0); Constant *Zero = Constant::getNullValue(X->getType()); - ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; return new ICmpInst(pred, X, Zero); } - + // ((X & ~7) == 0) --> X < 8 if (RHSV == 0 && isHighOnes(BOC)) { Value *X = BO->getOperand(0); Constant *NegX = ConstantExpr::getNeg(BOC); - ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(pred, X, NegX); } @@ -1517,11 +1518,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0)); Value *LHSCIOp = LHSCI->getOperand(0); - const Type *SrcTy = LHSCIOp->getType(); - const Type *DestTy = LHSCI->getType(); + Type *SrcTy = LHSCIOp->getType(); + Type *DestTy = LHSCI->getType(); Value *RHSCIOp; - // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && TD->getPointerSizeInBits() == @@ -1539,7 +1540,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { if (RHSOp) return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); } - + // The code below only handles extension cast instructions, so far. // Enforce this. if (LHSCI->getOpcode() != Instruction::ZExt && @@ -1552,9 +1553,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) { // Not an extension from the same type? RHSCIOp = CI->getOperand(0); - if (RHSCIOp->getType() != LHSCIOp->getType()) + if (RHSCIOp->getType() != LHSCIOp->getType()) return 0; - + // If the signedness of the two casts doesn't agree (i.e. one is a sext // and the other is a zext), then we can't handle this. if (CI->getOpcode() != LHSCI->getOpcode()) @@ -1599,7 +1600,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); } - // The re-extended constant changed so the constant cannot be represented + // The re-extended constant changed so the constant cannot be represented // in the shorter type. Consequently, we cannot emit a simple comparison. // All the cases that fold to true or false will have already been handled // by SimplifyICmpInst, so only deal with the tricky case. @@ -1637,26 +1638,26 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // llvm.sadd.with.overflow. To do this, we have to replace the original add // with a narrower add, and discard the add-with-constant that is part of the // range check (if we can't eliminate it, this isn't profitable). - + // In order to eliminate the add-with-constant, the compare can be its only // use. Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); if (!AddWithCst->hasOneUse()) return 0; - + // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. if (!CI2->getValue().isPowerOf2()) return 0; unsigned NewWidth = CI2->getValue().countTrailingZeros(); if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0; - + // The width of the new add formed is 1 more than the bias. ++NewWidth; - + // Check to see that CI1 is an all-ones value with NewWidth bits. if (CI1->getBitWidth() == NewWidth || CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) return 0; - - // In order to replace the original add with a narrower + + // In order to replace the original add with a narrower // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant // and truncates that discard the high bits of the add. Verify that this is // the case. @@ -1664,7 +1665,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end(); UI != E; ++UI) { if (*UI == AddWithCst) continue; - + // Only accept truncates for now. We would really like a nice recursive // predicate like SimplifyDemandedBits, but which goes downwards the use-def // chain to see which bits of a value are actually demanded. If the @@ -1674,32 +1675,32 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, if (TI == 0 || TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0; } - + // If the pattern matches, truncate the inputs to the narrower type and // use the sadd_with_overflow intrinsic to efficiently compute both the // result and the overflow bit. Module *M = I.getParent()->getParent()->getParent(); - + Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, NewType); InstCombiner::BuilderTy *Builder = IC.Builder; - + // Put the new code above the original add, in case there are any uses of the // add between the add and the compare. Builder->SetInsertPoint(OrigAdd); - + Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc"); Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc"); CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd"); Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); - + // The inner add was the result of the narrow add, zero extended to the // wider type. Replace it with the result computed by the intrinsic. IC.ReplaceInstUsesWith(*OrigAdd, ZExt); - + // The original icmp gets replaced with the overflow value. return ExtractValueInst::Create(Call, 1, "sadd.overflow"); } @@ -1709,13 +1710,13 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, // Don't bother doing this transformation for pointers, don't do it for // vectors. if (!isa<IntegerType>(OrigAddV->getType())) return 0; - + // If the add is a constant expr, then we don't bother transforming it. Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); if (OrigAdd == 0) return 0; - + Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); - + // Put the new code above the original add, in case there are any uses of the // add between the add and the compare. InstCombiner::BuilderTy *Builder = IC.Builder; @@ -1740,13 +1741,13 @@ static APInt DemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth, bool isSignCheck) { if (isSignCheck) return APInt::getSignBit(BitWidth); - + ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); if (!CI) return APInt::getAllOnesValue(BitWidth); const APInt &RHS = CI->getValue(); - + switch (I.getPredicate()) { - // For a UGT comparison, we don't care about any bits that + // For a UGT comparison, we don't care about any bits that // correspond to the trailing ones of the comparand. The value of these // bits doesn't impact the outcome of the comparison, because any value // greater than the RHS must differ in a bit higher than these due to carry. @@ -1755,7 +1756,7 @@ static APInt DemandedBitsLHSMask(ICmpInst &I, APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes); return ~lowBitsSet; } - + // Similarly, for a ULT comparison, we don't care about the trailing zeros. // Any value less than the RHS must differ in a higher bit because of carries. case ICmpInst::ICMP_ULT: { @@ -1763,17 +1764,17 @@ static APInt DemandedBitsLHSMask(ICmpInst &I, APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros); return ~lowBitsSet; } - + default: return APInt::getAllOnesValue(BitWidth); } - + } Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. @@ -1782,11 +1783,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { std::swap(Op0, Op1); Changed = true; } - + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); - - const Type *Ty = Op0->getType(); + + Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations if (Ty->isIntegerTy(1)) { @@ -1835,13 +1836,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BitWidth = Ty->getScalarSizeInBits(); else if (TD) // Pointers require TD info to get their size. BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); - + bool isSignBit = false; // See if we are doing a comparison with a constant. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { Value *A = 0, *B = 0; - + // Match the following pattern, which is a common idiom when writing // overflow-safe integer arithmetic function. The source performs an // addition in wider type, and explicitly checks for overflow using @@ -1849,9 +1850,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // sadd_with_overflow intrinsic. // // TODO: This could probably be generalized to handle other overflow-safe - // operations if we worked out the formulas to compute the appropriate + // operations if we worked out the formulas to compute the appropriate // magic constants. - // + // // sum = a + b // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 { @@ -1861,14 +1862,14 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this)) return Res; } - + // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) if (I.isEquality() && CI->isZero() && match(Op0, m_Sub(m_Value(A), m_Value(B)))) { // (icmp cond A B) if cond is equality return new ICmpInst(I.getPredicate(), A, B); } - + // If we have an icmp le or icmp ge instruction, turn it into the // appropriate icmp lt or icmp gt instruction. This allows us to rely on // them being folded in the code below. The SimplifyICmpInst code has @@ -1892,7 +1893,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_SGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); } - + // If this comparison is a normal comparison, it demands all // bits, if it is a sign bit comparison, it only demands the sign bit. bool UnusedBit; @@ -1948,7 +1949,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_EQ: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - + // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check // to see if *that* bit is set. @@ -1960,7 +1961,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; - + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) == 0" into "x != 3". Value *X = 0; @@ -1969,7 +1970,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_NE, X, ConstantInt::get(X->getType(), CmpVal)); } - + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, // then turn "((8 >>u x)&1) == 0" into "x != 3". const APInt *CI; @@ -1979,13 +1980,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { ConstantInt::get(X->getType(), CI->countTrailingZeros())); } - + break; } case ICmpInst::ICMP_NE: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); - + // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check // to see if *that* bit is set. @@ -1997,7 +1998,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; - + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) != 0" into "x == 3". Value *X = 0; @@ -2006,7 +2007,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_EQ, X, ConstantInt::get(X->getType(), CmpVal)); } - + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, // then turn "((8 >>u x)&1) != 0" into "x == 3". const APInt *CI; @@ -2016,7 +2017,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { ConstantInt::get(X->getType(), CI->countTrailingZeros())); } - + break; } case ICmpInst::ICMP_ULT: @@ -2137,9 +2138,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // See if we are doing a comparison between a constant and an instruction that // can be folded into the comparison. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - // Since the RHS is a ConstantInt (CI), if the left hand side is an - // instruction, see if that instruction also has constants so that the - // instruction can be folded into the icmp + // Since the RHS is a ConstantInt (CI), if the left hand side is an + // instruction, see if that instruction also has constants so that the + // instruction can be folded into the icmp if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) return Res; @@ -2194,7 +2195,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && TD && - TD->getIntPtrType(RHSC->getContext()) == + TD->getIntPtrType(RHSC->getContext()) == LHSI->getOperand(0)->getType()) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); @@ -2227,8 +2228,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // values. If the ptr->ptr cast can be stripped off both arguments, we do so // now. if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { - if (Op0->getType()->isPointerTy() && - (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { + if (Op0->getType()->isPointerTy() && + (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { // We keep moving the cast from the left operand over to the right // operand, where it can often be eliminated completely. Op0 = CI->getOperand(0); @@ -2250,7 +2251,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(I.getPredicate(), Op0, Op1); } } - + if (isa<CastInst>(Op0)) { // Handle the special case of: icmp (cast bool to X), <cst> // This comes up when you have code like @@ -2384,7 +2385,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); } - + if (CI->isMaxValue(true)) { ICmpInst::Predicate Pred = I.isSigned() ? I.getUnsignedPredicate() @@ -2404,7 +2405,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Mask = -1 >> count-trailing-zeros(Cst). if (!CI->isZero() && !CI->isOne()) { const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get(I.getContext(), + ConstantInt *Mask = ConstantInt::get(I.getContext(), APInt::getLowBitsSet(AP.getBitWidth(), AP.getBitWidth() - AP.countTrailingZeros())); @@ -2438,7 +2439,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } } - + { Value *A, *B; // ~x < ~y --> y < x // ~x < cst --> ~cst < x @@ -2452,11 +2453,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // (a+b) <u a --> llvm.uadd.with.overflow. // (a+b) <u b --> llvm.uadd.with.overflow. if (I.getPredicate() == ICmpInst::ICMP_ULT && - match(Op0, m_Add(m_Value(A), m_Value(B))) && + match(Op0, m_Add(m_Value(A), m_Value(B))) && (Op1 == A || Op1 == B)) if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) return R; - + // a >u (a+b) --> llvm.uadd.with.overflow. // b >u (a+b) --> llvm.uadd.with.overflow. if (I.getPredicate() == ICmpInst::ICMP_UGT && @@ -2465,7 +2466,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) return R; } - + if (I.isEquality()) { Value *A, *B, *C, *D; @@ -2483,10 +2484,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { Constant *NC = ConstantInt::get(I.getContext(), C1->getValue() ^ C2->getValue()); - Value *Xor = Builder->CreateXor(C, NC, "tmp"); + Value *Xor = Builder->CreateXor(C, NC); return new ICmpInst(I.getPredicate(), A, Xor); } - + // A^B == A^D -> B == D if (A == C) return new ICmpInst(I.getPredicate(), B, D); if (A == D) return new ICmpInst(I.getPredicate(), B, C); @@ -2494,7 +2495,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (B == D) return new ICmpInst(I.getPredicate(), A, C); } } - + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) { // A == (A^B) -> B == 0 @@ -2504,10 +2505,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && + if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { Value *X = 0, *Y = 0, *Z = 0; - + if (A == C) { X = B; Y = D; Z = A; } else if (A == D) { @@ -2517,16 +2518,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } else if (B == D) { X = A; Y = C; Z = B; } - + if (X) { // Build (X^Y) & Z - Op1 = Builder->CreateXor(X, Y, "tmp"); - Op1 = Builder->CreateAnd(Op1, Z, "tmp"); + Op1 = Builder->CreateXor(X, Y); + Op1 = Builder->CreateAnd(Op1, Z); I.setOperand(0, Op1); I.setOperand(1, Constant::getNullValue(Op1->getType())); return &I; } } - + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to // "icmp (and X, mask), cst" uint64_t ShAmt = 0; @@ -2539,21 +2540,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // when it exposes other optimizations. !A->hasOneUse()) { unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); - + if (ShAmt < ASize) { APInt MaskV = APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); MaskV <<= ShAmt; - + APInt CmpV = Cst1->getValue().zext(ASize); CmpV <<= ShAmt; - + Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); } } } - + { Value *X; ConstantInt *Cst; // icmp X+Cst, X @@ -2579,31 +2580,31 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, Constant *RHSC) { if (!isa<ConstantFP>(RHSC)) return 0; const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); - + // Get the width of the mantissa. We don't want to hack on conversions that // might lose information from the integer, e.g. "i64 -> float" int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); if (MantissaWidth == -1) return 0; // Unknown. - + // Check to see that the input is converted from an integer type that is small // enough that preserves all bits. TODO: check here for "known" sign bits. // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); - + // If this is a uitofp instruction, we need an extra bit to hold the sign. bool LHSUnsigned = isa<UIToFPInst>(LHSI); if (LHSUnsigned) ++InputSize; - + // If the conversion would lose info, don't hack on this. if ((int)InputSize > MantissaWidth) return 0; - + // Otherwise, we can potentially simplify the comparison. We know that it // will always come through as an integer value and we know the constant is // not a NAN (it would have been previously simplified). assert(!RHS.isNaN() && "NaN comparison not already folded!"); - + ICmpInst::Predicate Pred; switch (I.getPredicate()) { default: llvm_unreachable("Unexpected predicate!"); @@ -2636,15 +2637,15 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, case FCmpInst::FCMP_UNO: return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } - - const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); - + + IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); + // Now we know that the APFloat is a normal number, zero or inf. - + // See if the FP constant is too large for the integer. For example, // comparing an i8 to 300.0. unsigned IntWidth = IntTy->getScalarSizeInBits(); - + if (!LHSUnsigned) { // If the RHS value is > SignedMax, fold the comparison. This handles +INF // and large values. @@ -2670,7 +2671,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } } - + if (!LHSUnsigned) { // See if the RHS value is < SignedMin. APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); @@ -2766,7 +2767,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { bool Changed = false; - + /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. @@ -2776,7 +2777,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { } Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); @@ -2792,7 +2793,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { I.setPredicate(FCmpInst::FCMP_UNO); I.setOperand(1, Constant::getNullValue(Op0->getType())); return &I; - + case FCmpInst::FCMP_ORD: // True if ordered (no nans) case FCmpInst::FCMP_OEQ: // True if ordered and equal case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal @@ -2803,7 +2804,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return &I; } } - + // Handle fcmp with constant RHS if (Constant *RHSC = dyn_cast<Constant>(Op1)) { if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) @@ -2836,10 +2837,14 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { APFloat F = RHSF->getValueAPF(); F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); - // Avoid lossy conversions and denormals. + // Avoid lossy conversions and denormals. Zero is a special case + // that's OK to convert. + APFloat Fabs = F; + Fabs.clearSign(); if (!Lossy && - F.compare(APFloat::getSmallestNormalized(*Sem)) != - APFloat::cmpLessThan) + ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) != + APFloat::cmpLessThan) || Fabs.isZero())) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), ConstantFP::get(RHSC->getContext(), F)); break; diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index f499290..7446a51 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -26,7 +26,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. if (TD) { - const Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); + Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); @@ -38,7 +38,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - const Type *NewTy = + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); @@ -58,8 +58,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { Idx[0] = NullIdx; Idx[1] = NullIdx; Instruction *GEP = - GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, - New->getName()+".sub"); + GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub"); InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original @@ -92,28 +91,28 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, User *CI = cast<User>(LI.getOperand(0)); Value *CastOp = CI->getOperand(0); - const PointerType *DestTy = cast<PointerType>(CI->getType()); - const Type *DestPTy = DestTy->getElementType(); - if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { + PointerType *DestTy = cast<PointerType>(CI->getType()); + Type *DestPTy = DestTy->getElementType(); + if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { // If the address spaces don't match, don't eliminate the cast. if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) return 0; - const Type *SrcPTy = SrcTy->getElementType(); + Type *SrcPTy = SrcTy->getElementType(); if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || DestPTy->isVectorTy()) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. - if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) + if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) if (Constant *CSrc = dyn_cast<Constant>(CastOp)) if (ASrcTy->getNumElements() != 0) { Value *Idxs[2]; Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); Idxs[1] = Idxs[0]; - CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); + CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); SrcTy = cast<PointerType>(CastOp->getType()); SrcPTy = SrcTy->getElementType(); } @@ -133,6 +132,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, LoadInst *NewLoad = IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); NewLoad->setAlignment(LI.getAlignment()); + NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); // Now cast the result of the load. return new BitCastInst(NewLoad, LI.getType()); } @@ -163,8 +163,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; - // None of the following transforms are legal for volatile loads. - if (LI.isVolatile()) return 0; + // None of the following transforms are legal for volatile/atomic loads. + // FIXME: Some of it is okay for atomic loads; needs refactoring. + if (!LI.isSimple()) return 0; // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, @@ -256,11 +257,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { User *CI = cast<User>(SI.getOperand(1)); Value *CastOp = CI->getOperand(0); - const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); - const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); + Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); + PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); if (SrcTy == 0) return 0; - const Type *SrcPTy = SrcTy->getElementType(); + Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) return 0; @@ -280,12 +281,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { NewGEPIndices.push_back(Zero); while (1) { - if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) { + if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { if (!STy->getNumElements()) /* Struct can be empty {} */ break; NewGEPIndices.push_back(Zero); SrcPTy = STy->getElementType(0); - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { NewGEPIndices.push_back(Zero); SrcPTy = ATy->getElementType(); } else { @@ -314,8 +315,8 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { Value *NewCast; Value *SIOp0 = SI.getOperand(0); Instruction::CastOps opcode = Instruction::BitCast; - const Type* CastSrcTy = SIOp0->getType(); - const Type* CastDstTy = SrcPTy; + Type* CastSrcTy = SIOp0->getType(); + Type* CastDstTy = SrcPTy; if (CastDstTy->isPointerTy()) { if (CastSrcTy->isIntegerTy()) opcode = Instruction::IntToPtr; @@ -327,8 +328,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. if (!NewGEPIndices.empty()) - CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), - NewGEPIndices.end()); + CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"); @@ -370,21 +370,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); - // If the RHS is an alloca with a single use, zapify the store, making the - // alloca dead. - if (!SI.isVolatile()) { - if (Ptr->hasOneUse()) { - if (isa<AllocaInst>(Ptr)) - return EraseInstFromFunction(SI); - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { - if (isa<AllocaInst>(GEP->getOperand(0))) { - if (GEP->getOperand(0)->hasOneUse()) - return EraseInstFromFunction(SI); - } - } - } - } - // Attempt to improve the alignment. if (TD) { unsigned KnownAlign = @@ -400,6 +385,23 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { SI.setAlignment(EffectiveStoreAlign); } + // Don't hack volatile/atomic stores. + // FIXME: Some bits are legal for atomic stores; needs refactoring. + if (!SI.isSimple()) return 0; + + // If the RHS is an alloca with a single use, zapify the store, making the + // alloca dead. + if (Ptr->hasOneUse()) { + if (isa<AllocaInst>(Ptr)) + return EraseInstFromFunction(SI); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { + if (isa<AllocaInst>(GEP->getOperand(0))) { + if (GEP->getOperand(0)->hasOneUse()) + return EraseInstFromFunction(SI); + } + } + } + // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. @@ -417,8 +419,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? - if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), - SI.getOperand(1))) { + if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), + SI.getOperand(1))) { ++NumDeadStore; ++BBI; EraseInstFromFunction(*PrevSI); @@ -432,7 +434,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - !SI.isVolatile()) + LI->isSimple()) return EraseInstFromFunction(SI); // Otherwise, this is a load from some other location. Stores before it @@ -444,9 +446,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; } - - - if (SI.isVolatile()) return 0; // Don't hack volatile stores. // store X, null -> turns into 'unreachable' in SimplifyCFG if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { @@ -549,11 +548,11 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { return false; --BBI; } - // If this isn't a store, isn't a store to the same location, or if the - // alignments differ, bail out. + // If this isn't a store, isn't a store to the same location, or is not the + // right kind of store, bail out. OtherStore = dyn_cast<StoreInst>(BBI); if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) + !SI.isSameOperationAs(OtherStore)) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the @@ -569,7 +568,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // Check to see if we find the matching store. if ((OtherStore = dyn_cast<StoreInst>(BBI))) { if (OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) + !SI.isSameOperationAs(OtherStore)) return false; break; } @@ -601,10 +600,12 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // Advance to a place where it is safe to insert the new store and // insert it. - BBI = DestBB->getFirstNonPHI(); + BBI = DestBB->getFirstInsertionPt(); StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), - OtherStore->isVolatile(), - SI.getAlignment()); + SI.isVolatile(), + SI.getAlignment(), + SI.getOrdering(), + SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); NewSI->setDebugLoc(OtherStore->getDebugLoc()); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 630a6fe..7f48125 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -38,7 +38,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { m_Value(B))) && // The "1" can be any value known to be a power of 2. isPowerOfTwo(PowerOf2, IC.getTargetData())) { - A = IC.Builder->CreateSub(A, B, "tmp"); + A = IC.Builder->CreateSub(A, B); return IC.Builder->CreateShl(PowerOf2, A); } @@ -131,7 +131,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { { Value *X; ConstantInt *C1; if (Op0->hasOneUse() && match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) { - Value *Add = Builder->CreateMul(X, CI, "tmp"); + Value *Add = Builder->CreateMul(X, CI); return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI)); } } @@ -244,7 +244,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (BoolCast) { Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), - BoolCast, "tmp"); + BoolCast); return BinaryOperator::CreateAnd(V, OtherOp); } } @@ -421,7 +421,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { /// dyn_castZExtVal - Checks if V is a zext or constant that can /// be truncated to Ty without losing bits. -static Value *dyn_castZExtVal(Value *V, const Type *Ty) { +static Value *dyn_castZExtVal(Value *V, Type *Ty) { if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) { if (Z->getSrcTy() == Ty) return Z->getOperand(0); @@ -466,8 +466,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { { const APInt *CI; Value *N; if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) { if (*CI != 1) - N = Builder->CreateAdd(N, ConstantInt::get(I.getType(), CI->logBase2()), - "tmp"); + N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2())); if (I.isExact()) return BinaryOperator::CreateExactLShr(Op0, N); return BinaryOperator::CreateLShr(Op0, N); @@ -630,7 +629,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) if (match(Op1, m_Shl(m_Power2(), m_Value()))) { Constant *N1 = Constant::getAllOnesValue(I.getType()); - Value *Add = Builder->CreateAdd(Op1, N1, "tmp"); + Value *Add = Builder->CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 3777340..664546c 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -28,8 +28,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { Value *LHSVal = FirstInst->getOperand(0); Value *RHSVal = FirstInst->getOperand(1); - const Type *LHSType = LHSVal->getType(); - const Type *RHSType = RHSVal->getType(); + Type *LHSType = LHSVal->getType(); + Type *RHSType = RHSVal->getType(); bool isNUW = false, isNSW = false, isExact = false; if (OverflowingBinaryOperator *BO = @@ -229,8 +229,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { Value *Base = FixedOperands[0]; GetElementPtrInst *NewGEP = - GetElementPtrInst::Create(Base, FixedOperands.begin()+1, - FixedOperands.end()); + GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1)); if (AllInBounds) NewGEP->setIsInBounds(); NewGEP->setDebugLoc(FirstInst->getDebugLoc()); return NewGEP; @@ -287,7 +286,12 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0)); - + + // FIXME: This is overconservative; this transform is allowed in some cases + // for atomic operations. + if (FirstLI->isAtomic()) + return 0; + // When processing loads, we need to propagate two bits of information to the // sunk load: whether it is volatile, and what its alignment is. We currently // don't sink loads when some have their alignment specified and some don't. @@ -397,7 +401,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // the same type or "+42") we can pull the operation through the PHI, reducing // code size and simplifying code. Constant *ConstantOp = 0; - const Type *CastSrcTy = 0; + Type *CastSrcTy = 0; bool isNUW = false, isNSW = false, isExact = false; if (isa<CastInst>(FirstInst)) { @@ -572,7 +576,7 @@ struct LoweredPHIRecord { unsigned Shift; // The amount shifted. unsigned Width; // The width extracted. - LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) + LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty) : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} // Ctor form used by DenseMap. @@ -701,7 +705,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { unsigned PHIId = PHIUsers[UserI].PHIId; PHINode *PN = PHIsToSlice[PHIId]; unsigned Offset = PHIUsers[UserI].Shift; - const Type *Ty = PHIUsers[UserI].Inst->getType(); + Type *Ty = PHIUsers[UserI].Inst->getType(); PHINode *EltPHI; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5733c20..91e60a4 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" using namespace llvm; using namespace PatternMatch; @@ -323,9 +324,14 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, } // All operands were constants, fold it. - if (ConstOps.size() == I->getNumOperands()) + if (ConstOps.size() == I->getNumOperands()) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - ConstOps.data(), ConstOps.size(), TD); + ConstOps, TD); + } } return 0; @@ -363,7 +369,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: { // These transformations only work for selects over integers. - const IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType()); + IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType()); if (!SelectTy) break; @@ -443,7 +449,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, // FIXME: Type and constness constraints could be lifted, but we have to // watch code size carefully. We should consider xor instead of // sub/add when we decide to do that. - if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { + if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { if (TrueVal->getType() == Ty) { if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) { ConstantInt *C1 = NULL, *C2 = NULL; @@ -476,10 +482,16 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) return ReplaceInstUsesWith(SI, FalseVal); + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) + return ReplaceInstUsesWith(SI, FalseVal); } else if (Pred == ICmpInst::ICMP_NE) { if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) return ReplaceInstUsesWith(SI, TrueVal); + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) + return ReplaceInstUsesWith(SI, TrueVal); } // NOTE: if we wanted to, this is where to detect integer MIN/MAX diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 811f949..6d85add 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; @@ -207,11 +208,12 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, return I; case Instruction::Shl: { - unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + BinaryOperator *BO = cast<BinaryOperator>(I); + unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. - ConstantInt *CI = cast<ConstantInt>(I->getOperand(1)); - + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. @@ -219,7 +221,9 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, if (NewShAmt >= TypeWidth) return Constant::getNullValue(I->getType()); - I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt)); + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); + BO->setHasNoUnsignedWrap(false); + BO->setHasNoSignedWrap(false); return I; } @@ -227,11 +231,11 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // zeros. if (CI->getValue() == NumBits) { APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits)); - V = IC.Builder->CreateAnd(I->getOperand(0), - ConstantInt::get(I->getContext(), Mask)); + V = IC.Builder->CreateAnd(BO->getOperand(0), + ConstantInt::get(BO->getContext(), Mask)); if (Instruction *VI = dyn_cast<Instruction>(V)) { - VI->moveBefore(I); - VI->takeName(I); + VI->moveBefore(BO); + VI->takeName(BO); } return V; } @@ -239,23 +243,27 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); - I->setOperand(1, ConstantInt::get(I->getType(), - CI->getZExtValue() - NumBits)); - return I; + BO->setOperand(1, ConstantInt::get(BO->getType(), + CI->getZExtValue() - NumBits)); + BO->setHasNoUnsignedWrap(false); + BO->setHasNoSignedWrap(false); + return BO; } case Instruction::LShr: { - unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + BinaryOperator *BO = cast<BinaryOperator>(I); + unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. - ConstantInt *CI = cast<ConstantInt>(I->getOperand(1)); + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. unsigned NewShAmt = NumBits+CI->getZExtValue(); if (NewShAmt >= TypeWidth) - return Constant::getNullValue(I->getType()); + return Constant::getNullValue(BO->getType()); - I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt)); + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); + BO->setIsExact(false); return I; } @@ -264,7 +272,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, if (CI->getValue() == NumBits) { APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits)); V = IC.Builder->CreateAnd(I->getOperand(0), - ConstantInt::get(I->getContext(), Mask)); + ConstantInt::get(BO->getContext(), Mask)); if (Instruction *VI = dyn_cast<Instruction>(V)) { VI->moveBefore(I); VI->takeName(I); @@ -275,9 +283,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); - I->setOperand(1, ConstantInt::get(I->getType(), - CI->getZExtValue() - NumBits)); - return I; + BO->setOperand(1, ConstantInt::get(BO->getType(), + CI->getZExtValue() - NumBits)); + BO->setIsExact(false); + return BO; } case Instruction::Select: @@ -528,7 +537,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. - const IntegerType *Ty = cast<IntegerType>(I.getType()); + IntegerType *Ty = cast<IntegerType>(I.getType()); // Check for (X << c1) << c2 and (X >> c1) >> c2 if (I.getOpcode() == ShiftOp->getOpcode()) { diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 8fea8eb..5cd9a4b 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -103,7 +103,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(V != 0 && "Null pointer of Value???"); assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); - const Type *VTy = V->getType(); + Type *VTy = V->getType(); assert((TD || !VTy->isPointerTy()) && "SimplifyDemandedBits needs to know bit widths!"); assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && @@ -325,8 +325,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { Constant *AndC = Constant::getIntegerValue(VTy, ~RHSKnownOne & DemandedMask); - Instruction *And = - BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); + Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); return InsertNewInstWith(And, *I); } } @@ -351,14 +350,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Constant *AndC = ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); - Instruction *NewAnd = - BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); + Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); InsertNewInstWith(NewAnd, *I); Constant *XorC = ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); - Instruction *NewXor = - BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); + Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); return InsertNewInstWith(NewXor, *I); } @@ -404,8 +401,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) return 0; // vector->int or fp->int? - if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { - if (const VectorType *SrcVTy = + if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { + if (VectorType *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) { if (DstVTy->getNumElements() != SrcVTy->getNumElements()) // Don't touch a bitcast between vectors of different element counts. @@ -826,7 +823,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts = 0; if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { - const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); + Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); std::vector<Constant*> Elts; @@ -855,7 +852,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (DemandedElts.isAllOnesValue()) return 0; - const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); + Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Zero = Constant::getNullValue(EltTy); Constant *Undef = UndefValue::get(EltTy); std::vector<Constant*> Elts; @@ -962,6 +959,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, unsigned MaskVal = Shuffle->getMaskValue(i); if (MaskVal == -1u) { UndefElts.setBit(i); + } else if (!DemandedElts[i]) { + NewUndefElts = true; + UndefElts.setBit(i); } else if (MaskVal < LHSVWidth) { if (UndefElts4[MaskVal]) { NewUndefElts = true; @@ -992,7 +992,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } case Instruction::BitCast: { // Vector->vector casts only. - const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); + VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); if (!VTy) break; unsigned InVWidth = VTy->getNumElements(); APInt InputDemandedElts(InVWidth, 0); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index ad6a8d0..154267c 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -77,7 +77,7 @@ static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) { /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); - const VectorType *PTy = cast<VectorType>(V->getType()); + VectorType *PTy = cast<VectorType>(V->getType()); unsigned Width = PTy->getNumElements(); if (EltNo >= Width) // Out of range access. return UndefValue::get(PTy->getElementType()); @@ -175,7 +175,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // the same number of elements, see if we can find the source element from // it. In this case, we will end up needing to bitcast the scalars. if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { - if (const VectorType *VT = + if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) if (VT->getNumElements() == VectorWidth) if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) @@ -225,7 +225,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { SrcIdx -= LHSWidth; Src = SVI->getOperand(1); } - const Type *Int32Ty = Type::getInt32Ty(EI.getContext()); + Type *Int32Ty = Type::getInt32Ty(EI.getContext()); return ExtractElementInst::Create(Src, ConstantInt::get(Int32Ty, SrcIdx, false)); @@ -555,7 +555,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // shuffle mask, do the replacement. if (isSplat || NewMask == LHSMask || NewMask == Mask) { std::vector<Constant*> Elts; - const Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); + Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { if (NewMask[i] < 0) { Elts.push_back(UndefValue::get(Int32Ty)); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index ab98ef9..92874b9 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -46,8 +46,10 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm-c/Initialization.h" #include <algorithm> #include <climits> @@ -83,7 +85,7 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { /// ShouldChangeType - Return true if it is desirable to convert a computation /// from 'From' to 'To'. We don't want to convert from a legal to an illegal /// type for example, or from a smaller to a larger illegal type. -bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { +bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { assert(From->isIntegerTy() && To->isIntegerTy()); // If we don't have TD, we don't know if the source/dest are legal. @@ -107,6 +109,43 @@ bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { return true; } +// Return true, if No Signed Wrap should be maintained for I. +// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C", +// where both B and C should be ConstantInts, results in a constant that does +// not overflow. This function only handles the Add and Sub opcodes. For +// all other opcodes, the function conservatively returns false. +static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { + OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I); + if (!OBO || !OBO->hasNoSignedWrap()) { + return false; + } + + // We reason about Add and Sub Only. + Instruction::BinaryOps Opcode = I.getOpcode(); + if (Opcode != Instruction::Add && + Opcode != Instruction::Sub) { + return false; + } + + ConstantInt *CB = dyn_cast<ConstantInt>(B); + ConstantInt *CC = dyn_cast<ConstantInt>(C); + + if (!CB || !CC) { + return false; + } + + const APInt &BVal = CB->getValue(); + const APInt &CVal = CC->getValue(); + bool Overflow = false; + + if (Opcode == Instruction::Add) { + BVal.sadd_ov(CVal, Overflow); + } else { + BVal.ssub_ov(CVal, Overflow); + } + + return !Overflow; +} /// SimplifyAssociativeOrCommutative - This performs a few simplifications for /// operators which are associative or commutative: @@ -158,7 +197,16 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + if (MaintainNoSignedWrap(I, B, C) && + (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) { + // Note: this is only valid because SimplifyBinOp doesn't look at + // the operands to Op0. + I.clearSubclassOptionalData(); + I.setHasNoSignedWrap(true); + } else { + I.clearSubclassOptionalData(); + } + Changed = true; ++NumReassoc; continue; @@ -240,7 +288,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Constant *C2 = cast<Constant>(Op1->getOperand(1)); Constant *Folded = ConstantExpr::get(Opcode, C1, C2); - Instruction *New = BinaryOperator::Create(Opcode, A, B); + BinaryOperator *New = BinaryOperator::Create(Opcode, A, B); InsertNewInstWith(New, I); New->takeName(Op1); I.setOperand(0, New); @@ -248,6 +296,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. I.clearSubclassOptionalData(); + Changed = true; continue; } @@ -516,8 +565,8 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { // If it's a bitcast involving vectors, make sure it has the same number of // elements on both sides. if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) { - const VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy()); - const VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy()); + VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy()); + VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy()); // Verify that either both or neither are vectors. if ((SrcTy == NULL) != (DestTy == NULL)) return 0; @@ -654,7 +703,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } } else { CastInst *CI = cast<CastInst>(&I); - const Type *RetTy = CI->getType(); + Type *RetTy = CI->getType(); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV; if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) @@ -680,7 +729,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { /// or not there is a sequence of GEP indices into the type that will land us at /// the specified offset. If so, fill them into NewIndices and return the /// resultant element type, otherwise return null. -const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, +Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices) { if (!TD) return 0; if (!Ty->isSized()) return 0; @@ -688,7 +737,7 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); int64_t FirstIdx = 0; if (int64_t TySize = TD->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -711,7 +760,7 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) return 0; - if (const StructType *STy = dyn_cast<StructType>(Ty)) { + if (StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = TD->getStructLayout(STy); assert(Offset < (int64_t)SL->getSizeInBytes() && "Offset must stay within the indexed type"); @@ -722,7 +771,7 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); - } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); @@ -737,12 +786,20 @@ const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, return Ty; } - +static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { + // If this GEP has only 0 indices, it is the same pointer as + // Src. If Src is not a trivial GEP too, don't combine + // the indices. + if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && + !Src.hasOneUse()) + return false; + return true; +} Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); - if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) + if (Value *V = SimplifyGEPInst(Ops, TD)) return ReplaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); @@ -751,13 +808,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // by multiples of a zero size type with zero. if (TD) { bool MadeChange = false; - const Type *IntPtrTy = TD->getIntPtrType(GEP.getContext()); + Type *IntPtrTy = TD->getIntPtrType(GEP.getContext()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; ++I, ++GTI) { // Skip indices into struct types. - const SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); + SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); if (!SeqTy) continue; // If the element type has zero size then any index over it is equivalent @@ -785,21 +842,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // getelementptr instructions into a single instruction. // if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { - - // If this GEP has only 0 indices, it is the same pointer as - // Src. If Src is not a trivial GEP too, don't combine - // the indices. - if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() && - !Src->hasOneUse()) + if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src)) return 0; // Note that if our source is a gep chain itself that we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. - // - if (GetElementPtrInst *SrcGEP = - dyn_cast<GetElementPtrInst>(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2) + if (GEPOperator *SrcGEP = + dyn_cast<GEPOperator>(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) return 0; // Wait until our source is folded to completion. SmallVector<Value*, 8> Indices; @@ -851,15 +902,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!Indices.empty()) return (GEP.isInBounds() && Src->isInBounds()) ? - GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), - Indices.end(), GEP.getName()) : - GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), - Indices.end(), GEP.getName()); + GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices, + GEP.getName()) : + GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName()); } // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); - const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); if (StrippedPtr != PtrOp && StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { @@ -875,21 +925,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { - const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); - if (const ArrayType *CATy = + PointerType *CPTy = cast<PointerType>(PtrOp->getType()); + if (ArrayType *CATy = dyn_cast<ArrayType>(CPTy->getElementType())) { // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end()); GetElementPtrInst *Res = - GetElementPtrInst::Create(StrippedPtr, Idx.begin(), - Idx.end(), GEP.getName()); + GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); return Res; } - if (const ArrayType *XATy = + if (ArrayType *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { @@ -907,8 +956,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Transform things like: // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast - const Type *SrcElTy = StrippedPtrTy->getElementType(); - const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); + Type *SrcElTy = StrippedPtrTy->getElementType(); + Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); if (TD && SrcElTy->isArrayTy() && TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == TD->getTypeAllocSize(ResElTy)) { @@ -916,8 +965,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); Idx[1] = GEP.getOperand(1); Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()); + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return new BitCastInst(NewGEP, GEP.getType()); } @@ -975,8 +1024,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); Idx[1] = NewIdx; Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()): - Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()); + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()): + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return new BitCastInst(NewGEP, GEP.getType()); } @@ -1023,14 +1072,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. SmallVector<Value*, 8> NewIndices; - const Type *InTy = + Type *InTy = cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); if (FindElementAtOffset(InTy, Offset, NewIndices)) { Value *NGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()) : - Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()); + Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) : + Builder->CreateGEP(BCI->getOperand(0), NewIndices); if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); @@ -1045,15 +1092,43 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { -static bool IsOnlyNullComparedAndFreed(const Value &V) { - for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end(); +static bool IsOnlyNullComparedAndFreed(Value *V, SmallVectorImpl<WeakVH> &Users, + int Depth = 0) { + if (Depth == 8) + return false; + + for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { - const User *U = *UI; - if (isFreeCall(U)) + User *U = *UI; + if (isFreeCall(U)) { + Users.push_back(U); continue; - if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U)) - if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) + } + if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { + if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) { + Users.push_back(ICI); + continue; + } + } + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (IsOnlyNullComparedAndFreed(BCI, Users, Depth+1)) { + Users.push_back(BCI); + continue; + } + } + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (IsOnlyNullComparedAndFreed(GEPI, Users, Depth+1)) { + Users.push_back(GEPI); + continue; + } + } + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + Users.push_back(II); continue; + } + } return false; } return true; @@ -1063,25 +1138,20 @@ Instruction *InstCombiner::visitMalloc(Instruction &MI) { // If we have a malloc call which is only used in any amount of comparisons // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. - if (IsOnlyNullComparedAndFreed(MI)) { - for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end(); - UI != UE;) { - // We can assume that every remaining use is a free call or an icmp eq/ne - // to null, so the cast is safe. - Instruction *I = cast<Instruction>(*UI); - - // Early increment here, as we're about to get rid of the user. - ++UI; - - if (isFreeCall(I)) { - EraseInstFromFunction(*cast<CallInst>(I)); - continue; + SmallVector<WeakVH, 64> Users; + if (IsOnlyNullComparedAndFreed(&MI, Users)) { + for (unsigned i = 0, e = Users.size(); i != e; ++i) { + Instruction *I = cast_or_null<Instruction>(&*Users[i]); + if (!I) continue; + + if (ICmpInst *C = dyn_cast<ICmpInst>(I)) { + ReplaceInstUsesWith(*C, + ConstantInt::get(Type::getInt1Ty(C->getContext()), + C->isFalseWhenEqual())); + } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { + ReplaceInstUsesWith(*I, UndefValue::get(I->getType())); } - // Again, the cast is safe. - ICmpInst *C = cast<ICmpInst>(I); - ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()), - C->isFalseWhenEqual())); - EraseInstFromFunction(*C); + EraseInstFromFunction(*I); } return EraseInstFromFunction(MI); } @@ -1120,8 +1190,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { !isa<Constant>(X)) { // Swap Destinations and condition... BI.setCondition(X); - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); return &BI; } @@ -1136,8 +1205,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); Worklist.Add(Cond); return &BI; } @@ -1153,8 +1221,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); + BI.swapSuccessors(); Worklist.Add(Cond); return &BI; } @@ -1168,11 +1235,17 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { if (I->getOpcode() == Instruction::Add) if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { // change 'switch (X+4) case 1:' into 'switch (X) case -3' - for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) - SI.setOperand(i, - ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)), - AddRHS)); - SI.setOperand(0, I->getOperand(0)); + unsigned NumCases = SI.getNumCases(); + // Skip the first item since that's the default case. + for (unsigned i = 1; i < NumCases; ++i) { + ConstantInt* CaseVal = SI.getCaseValue(i); + Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal), + AddRHS); + assert(isa<ConstantInt>(NewCaseVal) && + "Result of expression should be constant"); + SI.setSuccessorValue(i, cast<ConstantInt>(NewCaseVal)); + } + SI.setCondition(I->getOperand(0)); Worklist.Add(I); return &SI; } @@ -1242,7 +1315,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), - ArrayRef<unsigned>(insi, inse)); + makeArrayRef(insi, inse)); } if (insi == inse) // The insert list is a prefix of the extract list @@ -1254,7 +1327,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // with // %E extractvalue { i32 } { i32 42 }, 0 return ExtractValueInst::Create(IV->getInsertedValueOperand(), - ArrayRef<unsigned>(exti, exte)); + makeArrayRef(exti, exte)); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { // We're extracting from an intrinsic, see if we're the only user, which @@ -1310,7 +1383,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // load from a GEP. This reduces the size of the load. // FIXME: If a load is used only by extractvalue instructions then this // could be done regardless of having multiple uses. - if (!L->isVolatile() && L->hasOneUse()) { + if (L->isSimple() && L->hasOneUse()) { // extractvalue has integer indices, getelementptr has Value*s. Convert. SmallVector<Value*, 4> Indices; // Prefix an i32 0 since we need the first element. @@ -1322,8 +1395,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // We need to insert these at the location of the old load, not at that of // the extractvalue. Builder->SetInsertPoint(L->getParent(), L); - Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), - Indices.begin(), Indices.end()); + Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices); // Returning the load directly will cause the main loop to insert it in // the wrong spot, so use ReplaceInstUsesWith(). return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP)); @@ -1339,6 +1411,342 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { return 0; } +enum Personality_Type { + Unknown_Personality, + GNU_Ada_Personality, + GNU_CXX_Personality +}; + +/// RecognizePersonality - See if the given exception handling personality +/// function is one that we understand. If so, return a description of it; +/// otherwise return Unknown_Personality. +static Personality_Type RecognizePersonality(Value *Pers) { + Function *F = dyn_cast<Function>(Pers->stripPointerCasts()); + if (!F) + return Unknown_Personality; + return StringSwitch<Personality_Type>(F->getName()) + .Case("__gnat_eh_personality", GNU_Ada_Personality) + .Case("__gxx_personality_v0", GNU_CXX_Personality) + .Default(Unknown_Personality); +} + +/// isCatchAll - Return 'true' if the given typeinfo will match anything. +static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) { + switch (Personality) { + case Unknown_Personality: + return false; + case GNU_Ada_Personality: + // While __gnat_all_others_value will match any Ada exception, it doesn't + // match foreign exceptions (or didn't, before gcc-4.7). + return false; + case GNU_CXX_Personality: + return TypeInfo->isNullValue(); + } + llvm_unreachable("Unknown personality!"); +} + +static bool shorter_filter(const Value *LHS, const Value *RHS) { + return + cast<ArrayType>(LHS->getType())->getNumElements() + < + cast<ArrayType>(RHS->getType())->getNumElements(); +} + +Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { + // The logic here should be correct for any real-world personality function. + // However if that turns out not to be true, the offending logic can always + // be conditioned on the personality function, like the catch-all logic is. + Personality_Type Personality = RecognizePersonality(LI.getPersonalityFn()); + + // Simplify the list of clauses, eg by removing repeated catch clauses + // (these are often created by inlining). + bool MakeNewInstruction = false; // If true, recreate using the following: + SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction; + bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. + + SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already. + for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) { + bool isLastClause = i + 1 == e; + if (LI.isCatch(i)) { + // A catch clause. + Value *CatchClause = LI.getClause(i); + Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts()); + + // If we already saw this clause, there is no point in having a second + // copy of it. + if (AlreadyCaught.insert(TypeInfo)) { + // This catch clause was not already seen. + NewClauses.push_back(CatchClause); + } else { + // Repeated catch clause - drop the redundant copy. + MakeNewInstruction = true; + } + + // If this is a catch-all then there is no point in keeping any following + // clauses or marking the landingpad as having a cleanup. + if (isCatchAll(Personality, TypeInfo)) { + if (!isLastClause) + MakeNewInstruction = true; + CleanupFlag = false; + break; + } + } else { + // A filter clause. If any of the filter elements were already caught + // then they can be dropped from the filter. It is tempting to try to + // exploit the filter further by saying that any typeinfo that does not + // occur in the filter can't be caught later (and thus can be dropped). + // However this would be wrong, since typeinfos can match without being + // equal (for example if one represents a C++ class, and the other some + // class derived from it). + assert(LI.isFilter(i) && "Unsupported landingpad clause!"); + Value *FilterClause = LI.getClause(i); + ArrayType *FilterType = cast<ArrayType>(FilterClause->getType()); + unsigned NumTypeInfos = FilterType->getNumElements(); + + // An empty filter catches everything, so there is no point in keeping any + // following clauses or marking the landingpad as having a cleanup. By + // dealing with this case here the following code is made a bit simpler. + if (!NumTypeInfos) { + NewClauses.push_back(FilterClause); + if (!isLastClause) + MakeNewInstruction = true; + CleanupFlag = false; + break; + } + + bool MakeNewFilter = false; // If true, make a new filter. + SmallVector<Constant *, 16> NewFilterElts; // New elements. + if (isa<ConstantAggregateZero>(FilterClause)) { + // Not an empty filter - it contains at least one null typeinfo. + assert(NumTypeInfos > 0 && "Should have handled empty filter already!"); + Constant *TypeInfo = + Constant::getNullValue(FilterType->getElementType()); + // If this typeinfo is a catch-all then the filter can never match. + if (isCatchAll(Personality, TypeInfo)) { + // Throw the filter away. + MakeNewInstruction = true; + continue; + } + + // There is no point in having multiple copies of this typeinfo, so + // discard all but the first copy if there is more than one. + NewFilterElts.push_back(TypeInfo); + if (NumTypeInfos > 1) + MakeNewFilter = true; + } else { + ConstantArray *Filter = cast<ConstantArray>(FilterClause); + SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements. + NewFilterElts.reserve(NumTypeInfos); + + // Remove any filter elements that were already caught or that already + // occurred in the filter. While there, see if any of the elements are + // catch-alls. If so, the filter can be discarded. + bool SawCatchAll = false; + for (unsigned j = 0; j != NumTypeInfos; ++j) { + Value *Elt = Filter->getOperand(j); + Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts()); + if (isCatchAll(Personality, TypeInfo)) { + // This element is a catch-all. Bail out, noting this fact. + SawCatchAll = true; + break; + } + if (AlreadyCaught.count(TypeInfo)) + // Already caught by an earlier clause, so having it in the filter + // is pointless. + continue; + // There is no point in having multiple copies of the same typeinfo in + // a filter, so only add it if we didn't already. + if (SeenInFilter.insert(TypeInfo)) + NewFilterElts.push_back(cast<Constant>(Elt)); + } + // A filter containing a catch-all cannot match anything by definition. + if (SawCatchAll) { + // Throw the filter away. + MakeNewInstruction = true; + continue; + } + + // If we dropped something from the filter, make a new one. + if (NewFilterElts.size() < NumTypeInfos) + MakeNewFilter = true; + } + if (MakeNewFilter) { + FilterType = ArrayType::get(FilterType->getElementType(), + NewFilterElts.size()); + FilterClause = ConstantArray::get(FilterType, NewFilterElts); + MakeNewInstruction = true; + } + + NewClauses.push_back(FilterClause); + + // If the new filter is empty then it will catch everything so there is + // no point in keeping any following clauses or marking the landingpad + // as having a cleanup. The case of the original filter being empty was + // already handled above. + if (MakeNewFilter && !NewFilterElts.size()) { + assert(MakeNewInstruction && "New filter but not a new instruction!"); + CleanupFlag = false; + break; + } + } + } + + // If several filters occur in a row then reorder them so that the shortest + // filters come first (those with the smallest number of elements). This is + // advantageous because shorter filters are more likely to match, speeding up + // unwinding, but mostly because it increases the effectiveness of the other + // filter optimizations below. + for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) { + unsigned j; + // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters. + for (j = i; j != e; ++j) + if (!isa<ArrayType>(NewClauses[j]->getType())) + break; + + // Check whether the filters are already sorted by length. We need to know + // if sorting them is actually going to do anything so that we only make a + // new landingpad instruction if it does. + for (unsigned k = i; k + 1 < j; ++k) + if (shorter_filter(NewClauses[k+1], NewClauses[k])) { + // Not sorted, so sort the filters now. Doing an unstable sort would be + // correct too but reordering filters pointlessly might confuse users. + std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j, + shorter_filter); + MakeNewInstruction = true; + break; + } + + // Look for the next batch of filters. + i = j + 1; + } + + // If typeinfos matched if and only if equal, then the elements of a filter L + // that occurs later than a filter F could be replaced by the intersection of + // the elements of F and L. In reality two typeinfos can match without being + // equal (for example if one represents a C++ class, and the other some class + // derived from it) so it would be wrong to perform this transform in general. + // However the transform is correct and useful if F is a subset of L. In that + // case L can be replaced by F, and thus removed altogether since repeating a + // filter is pointless. So here we look at all pairs of filters F and L where + // L follows F in the list of clauses, and remove L if every element of F is + // an element of L. This can occur when inlining C++ functions with exception + // specifications. + for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) { + // Examine each filter in turn. + Value *Filter = NewClauses[i]; + ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType()); + if (!FTy) + // Not a filter - skip it. + continue; + unsigned FElts = FTy->getNumElements(); + // Examine each filter following this one. Doing this backwards means that + // we don't have to worry about filters disappearing under us when removed. + for (unsigned j = NewClauses.size() - 1; j != i; --j) { + Value *LFilter = NewClauses[j]; + ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType()); + if (!LTy) + // Not a filter - skip it. + continue; + // If Filter is a subset of LFilter, i.e. every element of Filter is also + // an element of LFilter, then discard LFilter. + SmallVector<Value *, 16>::iterator J = NewClauses.begin() + j; + // If Filter is empty then it is a subset of LFilter. + if (!FElts) { + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + // Move on to the next filter. + continue; + } + unsigned LElts = LTy->getNumElements(); + // If Filter is longer than LFilter then it cannot be a subset of it. + if (FElts > LElts) + // Move on to the next filter. + continue; + // At this point we know that LFilter has at least one element. + if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros. + // Filter is a subset of LFilter iff Filter contains only zeros (as we + // already know that Filter is not longer than LFilter). + if (isa<ConstantAggregateZero>(Filter)) { + assert(FElts <= LElts && "Should have handled this case earlier!"); + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + } + // Move on to the next filter. + continue; + } + ConstantArray *LArray = cast<ConstantArray>(LFilter); + if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros. + // Since Filter is non-empty and contains only zeros, it is a subset of + // LFilter iff LFilter contains a zero. + assert(FElts > 0 && "Should have eliminated the empty filter earlier!"); + for (unsigned l = 0; l != LElts; ++l) + if (LArray->getOperand(l)->isNullValue()) { + // LFilter contains a zero - discard it. + NewClauses.erase(J); + MakeNewInstruction = true; + break; + } + // Move on to the next filter. + continue; + } + // At this point we know that both filters are ConstantArrays. Loop over + // operands to see whether every element of Filter is also an element of + // LFilter. Since filters tend to be short this is probably faster than + // using a method that scales nicely. + ConstantArray *FArray = cast<ConstantArray>(Filter); + bool AllFound = true; + for (unsigned f = 0; f != FElts; ++f) { + Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts(); + AllFound = false; + for (unsigned l = 0; l != LElts; ++l) { + Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts(); + if (LTypeInfo == FTypeInfo) { + AllFound = true; + break; + } + } + if (!AllFound) + break; + } + if (AllFound) { + // Discard LFilter. + NewClauses.erase(J); + MakeNewInstruction = true; + } + // Move on to the next filter. + } + } + + // If we changed any of the clauses, replace the old landingpad instruction + // with a new one. + if (MakeNewInstruction) { + LandingPadInst *NLI = LandingPadInst::Create(LI.getType(), + LI.getPersonalityFn(), + NewClauses.size()); + for (unsigned i = 0, e = NewClauses.size(); i != e; ++i) + NLI->addClause(NewClauses[i]); + // A landing pad with no clauses must have the cleanup flag set. It is + // theoretically possible, though highly unlikely, that we eliminated all + // clauses. If so, force the cleanup flag to true. + if (NewClauses.empty()) + CleanupFlag = true; + NLI->setCleanup(CleanupFlag); + return NLI; + } + + // Even if none of the clauses changed, we may nonetheless have understood + // that the cleanup flag is pointless. Clear it if so. + if (LI.isCleanup() != CleanupFlag) { + assert(!CleanupFlag && "Adding a cleanup, not removing one?!"); + LI.setCleanup(CleanupFlag); + return &LI; + } + + return 0; +} + @@ -1350,7 +1758,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { assert(I->hasOneUse() && "Invariants didn't hold!"); // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I)) + if (isa<PHINode>(I) || isa<LandingPadInst>(I) || I->mayHaveSideEffects() || + isa<TerminatorInst>(I)) return false; // Do not sink alloca instructions out of the entry block. @@ -1367,8 +1776,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { return false; } - BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); - + BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); I->moveBefore(InsertPos); ++NumSunkInst; return true; @@ -1503,27 +1911,29 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents // the instcombine code from having to deal with some bad special cases. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (!Visited.count(BB)) { - Instruction *Term = BB->getTerminator(); - while (Term != BB->begin()) { // Remove instrs bottom-up - BasicBlock::iterator I = Term; --I; - - DEBUG(errs() << "IC: DCE: " << *I << '\n'); - // A debug intrinsic shouldn't force another iteration if we weren't - // going to do one without it. - if (!isa<DbgInfoIntrinsic>(I)) { - ++NumDeadInst; - MadeIRChange = true; - } - - // If I is not void type then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!I->getType()->isVoidTy()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); - I->eraseFromParent(); + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (Visited.count(BB)) continue; + + // Delete the instructions backwards, as it has a reduced likelihood of + // having to update as many def-use and use-def chains. + Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + while (EndInst != BB->begin()) { + // Delete the next to last instruction. + BasicBlock::iterator I = EndInst; + Instruction *Inst = --I; + if (!Inst->use_empty()) + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + if (isa<LandingPadInst>(Inst)) { + EndInst = Inst; + continue; } + if (!isa<DbgInfoIntrinsic>(Inst)) { + ++NumDeadInst; + MadeIRChange = true; + } + Inst->eraseFromParent(); } + } } while (!Worklist.isEmpty()) { @@ -1604,13 +2014,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Everything uses the new instruction now. I->replaceAllUsesWith(Result); + // Move the name to the new instruction first. + Result->takeName(I); + // Push the new instruction and any users onto the worklist. Worklist.Add(Result); Worklist.AddUsersToWorkList(*Result); - // Move the name to the new instruction first. - Result->takeName(I); - // Insert the new instruction into the basic block... BasicBlock *InstParent = I->getParent(); BasicBlock::iterator InsertPos = I; diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 5700ac8..7b3a927a 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -6,3 +6,10 @@ add_llvm_library(LLVMInstrumentation PathProfiling.cpp ProfilingUtils.cpp ) + +add_llvm_library_dependencies(LLVMInstrumentation + LLVMAnalysis + LLVMCore + LLVMSupport + LLVMTransformUtils + ) diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp index 1d31fcc..e8ef265 100644 --- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -74,7 +74,7 @@ bool EdgeProfiler::runOnModule(Module &M) { } } - const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges); + Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges); GlobalVariable *Counters = new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, Constant::getNullValue(ATy), "EdgeProfCounters"); diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 3f2c412..ccf7e11 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -60,11 +60,11 @@ namespace { bool runOnModule(Module &M); // Create the GCNO files for the Module based on DebugInfo. - void emitGCNO(DebugInfoFinder &DIF); + void emitGCNO(); // Modify the program to track transitions along edges and call into the // profiling runtime to emit .gcda files when run. - bool emitProfileArcs(DebugInfoFinder &DIF); + bool emitProfileArcs(); // Get pointers to the functions in the runtime library. Constant *getStartFileFunc(); @@ -86,8 +86,7 @@ namespace { // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(DebugInfoFinder &, - SmallVector<std::pair<GlobalVariable *, + void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &); std::string mangleName(DICompileUnit CU, std::string NewStem); @@ -110,15 +109,6 @@ ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, return new GCOVProfiler(EmitNotes, EmitData, Use402Format); } -static DISubprogram findSubprogram(DIScope Scope) { - while (!Scope.isSubprogram()) { - assert(Scope.isLexicalBlock() && - "Debug location not lexical block or subprogram"); - Scope = DILexicalBlock(Scope).getContext(); - } - return DISubprogram(Scope); -} - namespace { class GCOVRecord { protected: @@ -177,18 +167,24 @@ namespace { } uint32_t length() { + // Here 2 = 1 for string lenght + 1 for '0' id#. return lengthOfGCOVString(Filename) + 2 + Lines.size(); } - private: - friend class GCOVBlock; + void writeOut() { + write(0); + writeGCOVString(Filename); + for (int i = 0, e = Lines.size(); i != e; ++i) + write(Lines[i]); + } - GCOVLines(std::string Filename, raw_ostream *os) - : Filename(Filename) { + GCOVLines(StringRef F, raw_ostream *os) + : Filename(F) { this->os = os; } - std::string Filename; + private: + StringRef Filename; SmallVector<uint32_t, 32> Lines; }; @@ -197,7 +193,7 @@ namespace { // other blocks. class GCOVBlock : public GCOVRecord { public: - GCOVLines &getFile(std::string Filename) { + GCOVLines &getFile(StringRef Filename) { GCOVLines *&Lines = LinesByFile[Filename]; if (!Lines) { Lines = new GCOVLines(Filename, os); @@ -220,13 +216,8 @@ namespace { write(Len); write(Number); for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(), - E = LinesByFile.end(); I != E; ++I) { - write(0); - writeGCOVString(I->second->Filename); - for (int i = 0, e = I->second->Lines.size(); i != e; ++i) { - write(I->second->Lines[i]); - } - } + E = LinesByFile.end(); I != E; ++I) + I->second->writeOut(); write(0); write(0); } @@ -353,66 +344,66 @@ bool GCOVProfiler::runOnModule(Module &M) { this->M = &M; Ctx = &M.getContext(); - DebugInfoFinder DIF; - DIF.processModule(M); - - if (EmitNotes) emitGCNO(DIF); - if (EmitData) return emitProfileArcs(DIF); + if (EmitNotes) emitGCNO(); + if (EmitData) return emitProfileArcs(); return false; } -void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) { +void GCOVProfiler::emitGCNO() { DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles; - for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(), - E = DIF.compile_unit_end(); I != E; ++I) { - // Each compile unit gets its own .gcno file. This means that whether we run - // this pass over the original .o's as they're produced, or run it after - // LTO, we'll generate the same .gcno files. - - DICompileUnit CU(*I); - raw_fd_ostream *&out = GcnoFiles[CU]; - std::string ErrorInfo; - out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo, - raw_fd_ostream::F_Binary); - if (!Use402Format) - out->write("oncg*404MVLL", 12); - else - out->write("oncg*402MVLL", 12); - } - - for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), - SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) { - DISubprogram SP(*SPI); - raw_fd_ostream *&os = GcnoFiles[SP.getCompileUnit()]; - - Function *F = SP.getFunction(); - if (!F) continue; - GCOVFunction Func(SP, os, Use402Format); - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - GCOVBlock &Block = Func.getBlock(BB); - TerminatorInst *TI = BB->getTerminator(); - if (int successors = TI->getNumSuccessors()) { - for (int i = 0; i != successors; ++i) { - Block.addEdge(Func.getBlock(TI->getSuccessor(i))); + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (CU_Nodes) { + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + // Each compile unit gets its own .gcno file. This means that whether we run + // this pass over the original .o's as they're produced, or run it after + // LTO, we'll generate the same .gcno files. + + DICompileUnit CU(CU_Nodes->getOperand(i)); + raw_fd_ostream *&out = GcnoFiles[CU]; + std::string ErrorInfo; + out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo, + raw_fd_ostream::F_Binary); + if (!Use402Format) + out->write("oncg*404MVLL", 12); + else + out->write("oncg*204MVLL", 12); + + DIArray SPs = CU.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (!SP.Verify()) continue; + raw_fd_ostream *&os = GcnoFiles[CU]; + + Function *F = SP.getFunction(); + if (!F) continue; + GCOVFunction Func(SP, os, Use402Format); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + GCOVBlock &Block = Func.getBlock(BB); + TerminatorInst *TI = BB->getTerminator(); + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + Block.addEdge(Func.getBlock(TI->getSuccessor(i))); + } + } else if (isa<ReturnInst>(TI)) { + Block.addEdge(Func.getReturnBlock()); + } + + uint32_t Line = 0; + for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { + const DebugLoc &Loc = I->getDebugLoc(); + if (Loc.isUnknown()) continue; + if (Line == Loc.getLine()) continue; + Line = Loc.getLine(); + if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; + + GCOVLines &Lines = Block.getFile(SP.getFilename()); + Lines.addLine(Loc.getLine()); + } } - } else if (isa<ReturnInst>(TI)) { - Block.addEdge(Func.getReturnBlock()); - } - - uint32_t Line = 0; - for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { - const DebugLoc &Loc = I->getDebugLoc(); - if (Loc.isUnknown()) continue; - if (Line == Loc.getLine()) continue; - Line = Loc.getLine(); - if (SP != findSubprogram(DIScope(Loc.getScope(*Ctx)))) continue; - - GCOVLines &Lines = Block.getFile(SP.getFilename()); - Lines.addLine(Loc.getLine()); + Func.writeOut(); } } - Func.writeOut(); } for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator @@ -424,103 +415,107 @@ void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) { } } -bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) { - if (DIF.subprogram_begin() == DIF.subprogram_end()) - return false; - - SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; - for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), - SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) { - DISubprogram SP(*SPI); - Function *F = SP.getFunction(); - if (!F) continue; - - unsigned Edges = 0; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - if (isa<ReturnInst>(TI)) - ++Edges; - else - Edges += TI->getNumSuccessors(); - } - - const ArrayType *CounterTy = +bool GCOVProfiler::emitProfileArcs() { + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (!CU_Nodes) return false; + + bool Result = false; + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit CU(CU_Nodes->getOperand(i)); + DIArray SPs = CU.getSubprograms(); + SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (!SP.Verify()) continue; + Function *F = SP.getFunction(); + if (!F) continue; + if (!Result) Result = true; + unsigned Edges = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + if (isa<ReturnInst>(TI)) + ++Edges; + else + Edges += TI->getNumSuccessors(); + } + + ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Edges); - GlobalVariable *Counters = + GlobalVariable *Counters = new GlobalVariable(*M, CounterTy, false, GlobalValue::InternalLinkage, Constant::getNullValue(CounterTy), "__llvm_gcov_ctr", 0, false, 0); - CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); - - UniqueVector<BasicBlock *> ComplexEdgePreds; - UniqueVector<BasicBlock *> ComplexEdgeSuccs; - - unsigned Edge = 0; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); - if (Successors) { - IRBuilder<> Builder(TI); - - if (Successors == 1) { - Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, - Edge); - Value *Count = Builder.CreateLoad(Counter); - Count = Builder.CreateAdd(Count, - ConstantInt::get(Type::getInt64Ty(*Ctx),1)); - Builder.CreateStore(Count, Counter); - } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - Value *Sel = Builder.CreateSelect( + CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); + + UniqueVector<BasicBlock *> ComplexEdgePreds; + UniqueVector<BasicBlock *> ComplexEdgeSuccs; + + unsigned Edge = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); + if (Successors) { + IRBuilder<> Builder(TI); + + if (Successors == 1) { + Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, + Edge); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + Value *Sel = Builder.CreateSelect( BI->getCondition(), ConstantInt::get(Type::getInt64Ty(*Ctx), Edge), ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1)); - SmallVector<Value *, 2> Idx; - Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx))); - Idx.push_back(Sel); - Value *Counter = Builder.CreateInBoundsGEP(Counters, - Idx.begin(), Idx.end()); - Value *Count = Builder.CreateLoad(Counter); - Count = Builder.CreateAdd(Count, - ConstantInt::get(Type::getInt64Ty(*Ctx),1)); - Builder.CreateStore(Count, Counter); - } else { - ComplexEdgePreds.insert(BB); - for (int i = 0; i != Successors; ++i) - ComplexEdgeSuccs.insert(TI->getSuccessor(i)); + SmallVector<Value *, 2> Idx; + Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx))); + Idx.push_back(Sel); + Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else { + ComplexEdgePreds.insert(BB); + for (int i = 0; i != Successors; ++i) + ComplexEdgeSuccs.insert(TI->getSuccessor(i)); + } + Edge += Successors; } - Edge += Successors; } - } - - if (!ComplexEdgePreds.empty()) { - GlobalVariable *EdgeTable = + + if (!ComplexEdgePreds.empty()) { + GlobalVariable *EdgeTable = buildEdgeLookupTable(F, Counters, ComplexEdgePreds, ComplexEdgeSuccs); - GlobalVariable *EdgeState = getEdgeStateValue(); - - const Type *Int32Ty = Type::getInt32Ty(*Ctx); - for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { - IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator()); - Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState); - } - for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { - // call runtime to perform increment - IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstNonPHI()); - Value *CounterPtrArray = + GlobalVariable *EdgeState = getEdgeStateValue(); + + Type *Int32Ty = Type::getInt32Ty(*Ctx); + for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { + IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator()); + Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState); + } + for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { + // call runtime to perform increment + BasicBlock::iterator InsertPt = + ComplexEdgeSuccs[i+1]->getFirstInsertionPt(); + IRBuilder<> Builder(InsertPt); + Value *CounterPtrArray = Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0, i * ComplexEdgePreds.size()); - Builder.CreateCall2(getIncrementIndirectCounterFunc(), - EdgeState, CounterPtrArray); - // clear the predecessor number - Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState); + Builder.CreateCall2(getIncrementIndirectCounterFunc(), + EdgeState, CounterPtrArray); + // clear the predecessor number + Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState); + } } } + insertCounterWriteout(CountersBySP); } - - insertCounterWriteout(DIF, CountersBySP); - - return true; + return Result; } // All edges with successors that aren't branches are "complex", because it @@ -535,8 +530,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( // read it. Threads and invoke make this untrue. // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]]. - const Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); - const ArrayType *EdgeTableTy = ArrayType::get( + Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); + ArrayType *EdgeTableTy = ArrayType::get( Int64PtrTy, Succs.size() * Preds.size()); Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()]; @@ -572,7 +567,7 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( } Constant *GCOVProfiler::getStartFileFunc() { - const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_start_file", FTy); } @@ -582,7 +577,7 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row }; - const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy); } @@ -592,7 +587,7 @@ Constant *GCOVProfiler::getEmitFunctionFunc() { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name }; - const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); } @@ -602,13 +597,13 @@ Constant *GCOVProfiler::getEmitArcsFunc() { Type::getInt32Ty(*Ctx), // uint32_t num_counters Type::getInt64PtrTy(*Ctx), // uint64_t *counters }; - const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); } Constant *GCOVProfiler::getEndFileFunc() { - const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_end_file", FTy); } @@ -626,9 +621,8 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { } void GCOVProfiler::insertCounterWriteout( - DebugInfoFinder &DIF, SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) { - const FunctionType *WriteoutFTy = + FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage, @@ -642,29 +636,31 @@ void GCOVProfiler::insertCounterWriteout( Constant *EmitArcs = getEmitArcsFunc(); Constant *EndFile = getEndFileFunc(); - for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(), - CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) { - DICompileUnit compile_unit(*CUI); - std::string FilenameGcda = mangleName(compile_unit, "gcda"); - Builder.CreateCall(StartFile, - Builder.CreateGlobalStringPtr(FilenameGcda)); - for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (CU_Nodes) { + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit compile_unit(CU_Nodes->getOperand(i)); + std::string FilenameGcda = mangleName(compile_unit, "gcda"); + Builder.CreateCall(StartFile, + Builder.CreateGlobalStringPtr(FilenameGcda)); + for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator I = CountersBySP.begin(), E = CountersBySP.end(); - I != E; ++I) { - DISubprogram SP(I->second); - intptr_t ident = reinterpret_cast<intptr_t>(I->second); - Builder.CreateCall2(EmitFunction, - ConstantInt::get(Type::getInt32Ty(*Ctx), ident), - Builder.CreateGlobalStringPtr(SP.getName())); - - GlobalVariable *GV = I->first; - unsigned Arcs = + I != E; ++I) { + DISubprogram SP(I->second); + intptr_t ident = reinterpret_cast<intptr_t>(I->second); + Builder.CreateCall2(EmitFunction, + ConstantInt::get(Type::getInt32Ty(*Ctx), ident), + Builder.CreateGlobalStringPtr(SP.getName())); + + GlobalVariable *GV = I->first; + unsigned Arcs = cast<ArrayType>(GV->getType()->getElementType())->getNumElements(); - Builder.CreateCall2(EmitArcs, - ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs), - Builder.CreateConstGEP2_64(GV, 0, 0)); + Builder.CreateCall2(EmitArcs, + ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs), + Builder.CreateConstGEP2_64(GV, 0, 0)); + } + Builder.CreateCall(EndFile); } - Builder.CreateCall(EndFile); } Builder.CreateRetVoid(); diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index e09f882..62c21b8 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -112,8 +112,8 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // be calculated from other edge counters on reading the profile info back // in. - const Type *Int32 = Type::getInt32Ty(M.getContext()); - const ArrayType *ATy = ArrayType::get(Int32, NumEdges); + Type *Int32 = Type::getInt32Ty(M.getContext()); + ArrayType *ATy = ArrayType::get(Int32, NumEdges); GlobalVariable *Counters = new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, Constant::getNullValue(ATy), "OptEdgeProfCounters"); diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index 7541663..23915d3 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -374,7 +374,7 @@ namespace llvm { template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable, xcompile> { public: - static const StructType *get(LLVMContext& C) { + static StructType *get(LLVMContext& C) { return( StructType::get( TypeBuilder<types::i<32>, xcompile>::get(C), // type TypeBuilder<types::i<32>, xcompile>::get(C), // array size @@ -909,7 +909,7 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value* pathNumber) { if(pathNumber == NULL || isa<ConstantInt>(pathNumber) || (((Instruction*)(pathNumber))->getParent()) != block) { - return(block->getFirstNonPHI()); + return(block->getFirstInsertionPt()); } else { Instruction* pathNumberInst = (Instruction*) (pathNumber); BasicBlock::iterator insertPoint; @@ -930,7 +930,7 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value* // A PHINode is created in the node, and its values initialized to -1U. void PathProfiler::preparePHI(BLInstrumentationNode* node) { BasicBlock* block = node->getBlock(); - BasicBlock::iterator insertPoint = block->getFirstNonPHI(); + BasicBlock::iterator insertPoint = block->getFirstInsertionPt(); pred_iterator PB = pred_begin(node->getBlock()), PE = pred_end(node->getBlock()); PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), @@ -999,7 +999,7 @@ void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node, BasicBlock::iterator insertPoint; if( atBeginning ) - insertPoint = block->getFirstNonPHI(); + insertPoint = block->getFirstInsertionPt(); else insertPoint = block->getTerminator(); @@ -1029,8 +1029,7 @@ void PathProfiler::insertCounterIncrement(Value* incValue, gepIndices[1] = incValue; GetElementPtrInst* pcPointer = - GetElementPtrInst::Create(dag->getCounterArray(), - gepIndices.begin(), gepIndices.end(), + GetElementPtrInst::Create(dag->getCounterArray(), gepIndices, "counterInc", insertPoint); // Load from the array - call it oldPC @@ -1140,7 +1139,7 @@ void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge, } BasicBlock::iterator insertPoint = atBeginning ? - instrumentNode->getBlock()->getFirstNonPHI() : + instrumentNode->getBlock()->getFirstInsertionPt() : instrumentNode->getBlock()->getTerminator(); // add information from the bottom edge, if it exists @@ -1172,7 +1171,7 @@ void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge, // Insert instrumentation if this is a normal edge else { BasicBlock::iterator insertPoint = atBeginning ? - instrumentNode->getBlock()->getFirstNonPHI() : + instrumentNode->getBlock()->getFirstInsertionPt() : instrumentNode->getBlock()->getTerminator(); if( edge->isInitialization() ) { // initialize path number @@ -1233,7 +1232,7 @@ void PathProfiler::insertInstrumentation( end = callEdges.end(); edge != end; edge++ ) { BLInstrumentationNode* node = (BLInstrumentationNode*)(*edge)->getSource(); - BasicBlock::iterator insertPoint = node->getBlock()->getFirstNonPHI(); + BasicBlock::iterator insertPoint = node->getBlock()->getFirstInsertionPt(); // Find the first function call while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call ) @@ -1289,7 +1288,7 @@ void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit, // Should we store the information in an array or hash if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) { - const Type* t = ArrayType::get(Type::getInt32Ty(*Context), + Type* t = ArrayType::get(Type::getInt32Ty(*Context), dag.getNumberOfPaths()); dag.setCounterArray(new GlobalVariable(M, t, false, @@ -1301,7 +1300,7 @@ void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit, // Add to global function reference table unsigned type; - const Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context); + Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context); if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) type = ProfilingArray; @@ -1315,7 +1314,7 @@ void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit, ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) : Constant::getNullValue(voidPtr); - const StructType* at = ftEntryTypeBuilder::get(*Context); + StructType* at = ftEntryTypeBuilder::get(*Context); ConstantStruct* functionEntry = (ConstantStruct*)ConstantStruct::get(at, entryArray); ftInit.push_back(functionEntry); @@ -1379,8 +1378,8 @@ bool PathProfiler::runOnModule(Module &M) { runOnFunction(ftInit, *F, M); } - const Type *t = ftEntryTypeBuilder::get(*Context); - const ArrayType* ftArrayType = ArrayType::get(t, ftInit.size()); + Type *t = ftEntryTypeBuilder::get(*Context); + ArrayType* ftArrayType = ArrayType::get(t, ftInit.size()); Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit); DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n"); @@ -1388,7 +1387,7 @@ bool PathProfiler::runOnModule(Module &M) { GlobalVariable* functionTable = new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage, ftInitConstant, "functionPathTable"); - const Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0); + Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0); InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable, PointerType::getUnqual(eltType)); diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 445a5b6..de57cd1 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -25,9 +25,9 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, GlobalValue *Array, PointerType *arrayType) { LLVMContext &Context = MainFn->getContext(); - const Type *ArgVTy = + Type *ArgVTy = PointerType::getUnqual(Type::getInt8PtrTy(Context)); - const PointerType *UIntPtr = arrayType ? arrayType : + PointerType *UIntPtr = arrayType ? arrayType : Type::getInt32PtrTy(Context); Module &M = *MainFn->getParent(); Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context), @@ -51,8 +51,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, Constant::getNullValue(Type::getInt32Ty(Context))); unsigned NumElements = 0; if (Array) { - Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0], - GEPIndices.size()); + Args[2] = ConstantExpr::getGetElementPtr(Array, GEPIndices); NumElements = cast<ArrayType>(Array->getType()->getElementType())->getNumElements(); } else { @@ -108,7 +107,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, GlobalValue *CounterArray, bool beginning) { // Insert the increment after any alloca or PHI instructions... - BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() : + BasicBlock::iterator InsertPos = beginning ? BB->getFirstInsertionPt() : BB->getTerminator(); while (isa<AllocaInst>(InsertPos)) ++InsertPos; @@ -120,7 +119,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum); Constant *ElementPtr = - ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size()); + ConstantExpr::getGetElementPtr(CounterArray, Indices); // Load, increment and store the value back. Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos); @@ -137,7 +136,7 @@ void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) { Type::getInt32Ty(Mod->getContext()), FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo() }; - const StructType *GlobalDtorElemTy = + StructType *GlobalDtorElemTy = StructType::get(Mod->getContext(), GlobalDtorElems, false); // Construct the new element we'll be adding. diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index a5adb5e..ba214d1 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -57,6 +57,7 @@ bool ADCE::runOnFunction(Function& F) { for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isa<TerminatorInst>(I.getInstructionIterator()) || isa<DbgInfoIntrinsic>(I.getInstructionIterator()) || + isa<LandingPadInst>(I.getInstructionIterator()) || I->mayHaveSideEffects()) { alive.insert(I.getInstructionIterator()); worklist.push_back(I.getInstructionIterator()); @@ -65,7 +66,6 @@ bool ADCE::runOnFunction(Function& F) { // Propagate liveness backwards to operands. while (!worklist.empty()) { Instruction* curr = worklist.pop_back_val(); - for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end(); OI != OE; ++OI) if (Instruction* Inst = dyn_cast<Instruction>(OI)) diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index c223da6..79bcae5 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -29,6 +29,14 @@ add_llvm_library(LLVMScalarOpts SimplifyCFGPass.cpp SimplifyLibCalls.cpp Sink.cpp - TailDuplication.cpp TailRecursionElimination.cpp ) + +add_llvm_library_dependencies(LLVMScalarOpts + LLVMAnalysis + LLVMCore + LLVMInstCombine + LLVMSupport + LLVMTarget + LLVMTransformUtils + ) diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 0af14ed..f8f18b2 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -58,6 +58,7 @@ STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumRetsDup, "Number of return instructions duplicated"); +STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), @@ -104,12 +105,13 @@ namespace { void EliminateMostlyEmptyBlock(BasicBlock *BB); bool OptimizeBlock(BasicBlock &BB); bool OptimizeInst(Instruction *I); - bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy); + bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); bool OptimizeInlineAsmInst(CallInst *CS); bool OptimizeCallInst(CallInst *CI); bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); bool DupRetToEnableTailCallOpts(ReturnInst *RI); + bool PlaceDbgValues(Function &F); }; } @@ -132,6 +134,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); + // llvm.dbg.value is far away from the value then iSel may not be able + // handle it properly. iSel will drop llvm.dbg.value if it can not + // find a node corresponding to the value. + EverMadeChange |= PlaceDbgValues(F); + bool MadeChange = true; while (MadeChange) { MadeChange = false; @@ -410,8 +417,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ CastInst *&InsertedCast = InsertedCasts[UserBB]; if (!InsertedCast) { - BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); - + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", InsertPt); @@ -467,8 +473,7 @@ static bool OptimizeCmpExpression(CmpInst *CI) { CmpInst *&InsertedCmp = InsertedCmps[UserBB]; if (!InsertedCmp) { - BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); - + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); InsertedCmp = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), @@ -528,7 +533,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); - const Type *ReturnTy = CI->getType(); + Type *ReturnTy = CI->getType(); Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); // Substituting this can cause recursive simplifications, which can @@ -551,22 +556,6 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // From here on out we're working with named functions. if (CI->getCalledFunction() == 0) return false; - // llvm.dbg.value is far away from the value then iSel may not be able - // handle it properly. iSel will drop llvm.dbg.value if it can not - // find a node corresponding to the value. - if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(CI)) - if (Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue())) - if (!VI->isTerminator() && - (DVI->getParent() != VI->getParent() || DT->dominates(DVI, VI))) { - DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); - DVI->removeFromParent(); - if (isa<PHINode>(VI)) - DVI->insertBefore(VI->getParent()->getFirstNonPHI()); - else - DVI->insertAfter(VI); - return true; - } - // We'll need TargetData from here on out. const TargetData *TD = TLI ? TLI->getTargetData() : 0; if (!TD) return false; @@ -724,7 +713,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { /// This method is used to optimize both load/store and inline asms with memory /// operands. bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - const Type *AccessTy) { + Type *AccessTy) { Value *Repl = Addr; // Try to collapse single-value PHI nodes. This is necessary to undo @@ -746,13 +735,11 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, worklist.pop_back(); // Break use-def graph loops. - if (Visited.count(V)) { + if (!Visited.insert(V)) { Consensus = 0; break; } - Visited.insert(V); - // For a PHI node, push all of its incoming values. if (PHINode *P = dyn_cast<PHINode>(V)) { for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) @@ -763,7 +750,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // For non-PHIs, determine the addressing mode being computed. SmallVector<Instruction*, 16> NewAddrModeInsts; ExtAddrMode NewAddrMode = - AddressingModeMatcher::Match(V, AccessTy,MemoryInst, + AddressingModeMatcher::Match(V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI); // This check is broken into two cases with very similar code to avoid using @@ -822,7 +809,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Insert this computation right after this user. Since our caller is // scanning from the top of the BB to the bottom, reuse of the expr are // guaranteed to happen later. - BasicBlock::iterator InsertPt = MemoryInst; + IRBuilder<> Builder(MemoryInst); // Now that we determined the addressing expression we want to use and know // that we have to sink it into this block. Check to see if we have already @@ -833,11 +820,11 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); if (SunkAddr->getType() != Addr->getType()) - SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); - const Type *IntPtrTy = + Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); Value *Result = 0; @@ -850,10 +837,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (AddrMode.BaseReg) { Value *V = AddrMode.BaseReg; if (V->getType()->isPointerTy()) - V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); if (V->getType() != IntPtrTy) - V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true, - "sunkaddr", InsertPt); + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); Result = V; } @@ -863,29 +849,27 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (V->getType() == IntPtrTy) { // done. } else if (V->getType()->isPointerTy()) { - V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth()) { - V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } else { - V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); + V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr"); } if (AddrMode.Scale != 1) - V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, - AddrMode.Scale), - "sunkaddr", InsertPt); + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); if (Result) - Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); + Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } // Add in the BaseGV if present. if (AddrMode.BaseGV) { - Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", - InsertPt); + Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); if (Result) - Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); + Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } @@ -894,7 +878,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (AddrMode.BaseOffs) { Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); if (Result) - Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); + Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } @@ -902,7 +886,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (Result == 0) SunkAddr = Constant::getNullValue(Addr->getType()); else - SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); + SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); } MemoryInst->replaceUsesOfWith(Repl, SunkAddr); @@ -1059,8 +1043,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; if (!InsertedTrunc) { - BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); - + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); } @@ -1159,3 +1142,34 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { return MadeChange; } + +// llvm.dbg.value is far away from the value then iSel may not be able +// handle it properly. iSel will drop llvm.dbg.value if it can not +// find a node corresponding to the value. +bool CodeGenPrepare::PlaceDbgValues(Function &F) { + bool MadeChange = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + Instruction *PrevNonDbgInst = NULL; + for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { + Instruction *Insn = BI; ++BI; + DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); + if (!DVI) { + PrevNonDbgInst = Insn; + continue; + } + + Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); + if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { + DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); + DVI->removeFromParent(); + if (isa<PHINode>(VI)) + DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); + else + DVI->insertAfter(VI); + MadeChange = true; + ++NumDbgValueMoved; + } + } + } + return MadeChange; +} diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index cb9b5be..a593d0f 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -52,18 +52,18 @@ namespace { AA = &getAnalysis<AliasAnalysis>(); MD = &getAnalysis<MemoryDependenceAnalysis>(); DominatorTree &DT = getAnalysis<DominatorTree>(); - + bool Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) // Only check non-dead blocks. Dead blocks may have strange pointer // cycles that will confuse alias analysis. if (DT.isReachableFromEntry(I)) Changed |= runOnBasicBlock(*I); - + AA = 0; MD = 0; return Changed; } - + bool runOnBasicBlock(BasicBlock &BB); bool HandleFree(CallInst *F); bool handleEndBlock(BasicBlock &BB); @@ -105,34 +105,34 @@ static void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD, SmallPtrSet<Value*, 16> *ValueSet = 0) { SmallVector<Instruction*, 32> NowDeadInsts; - + NowDeadInsts.push_back(I); --NumFastOther; - + // Before we touch this instruction, remove it from memdep! do { Instruction *DeadInst = NowDeadInsts.pop_back_val(); ++NumFastOther; - + // This instruction is dead, zap it, in stages. Start by removing it from // MemDep, which needs to know the operands and needs it to be in the // function. MD.removeInstruction(DeadInst); - + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); DeadInst->setOperand(op, 0); - + // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; - + if (Instruction *OpI = dyn_cast<Instruction>(Op)) if (isInstructionTriviallyDead(OpI)) NowDeadInsts.push_back(OpI); } - + DeadInst->eraseFromParent(); - + if (ValueSet) ValueSet->erase(DeadInst); } while (!NowDeadInsts.empty()); } @@ -159,11 +159,13 @@ static bool hasMemoryWrite(Instruction *I) { } /// getLocForWrite - Return a Location stored to by the specified instruction. +/// If isRemovable returns true, this function and getLocForRead completely +/// describe the memory operations for this instruction. static AliasAnalysis::Location getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) return AA.getLocation(SI); - + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { // memcpy/memmove/memset. AliasAnalysis::Location Loc = AA.getLocationForDest(MI); @@ -174,10 +176,10 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { return AliasAnalysis::Location(); return Loc; } - + IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); if (II == 0) return AliasAnalysis::Location(); - + switch (II->getIntrinsicID()) { default: return AliasAnalysis::Location(); // Unhandled intrinsic. case Intrinsic::init_trampoline: @@ -185,7 +187,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { // that we should use the size of the pointee type. This isn't valid for // init.trampoline, which writes more than an i8. if (AA.getTargetData() == 0) return AliasAnalysis::Location(); - + // FIXME: We don't know the size of the trampoline, so we can't really // handle it here. return AliasAnalysis::Location(II->getArgOperand(0)); @@ -198,10 +200,10 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { /// getLocForRead - Return the location read by the specified "hasMemoryWrite" /// instruction if any. -static AliasAnalysis::Location +static AliasAnalysis::Location getLocForRead(Instruction *Inst, AliasAnalysis &AA) { assert(hasMemoryWrite(Inst) && "Unknown instruction case"); - + // The only instructions that both read and write are the mem transfer // instructions (memcpy/memmove). if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst)) @@ -213,10 +215,10 @@ getLocForRead(Instruction *Inst, AliasAnalysis &AA) { /// isRemovable - If the value of this instruction and the memory it writes to /// is unused, may we delete this instruction? static bool isRemovable(Instruction *I) { - // Don't remove volatile stores. + // Don't remove volatile/atomic stores. if (StoreInst *SI = dyn_cast<StoreInst>(I)) - return !SI->isVolatile(); - + return SI->isUnordered(); + IntrinsicInst *II = cast<IntrinsicInst>(I); switch (II->getIntrinsicID()) { default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate"); @@ -227,7 +229,7 @@ static bool isRemovable(Instruction *I) { case Intrinsic::init_trampoline: // Always safe to remove init_trampoline. return true; - + case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: @@ -255,16 +257,16 @@ static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) { const TargetData *TD = AA.getTargetData(); if (TD == 0) return AliasAnalysis::UnknownSize; - + if (AllocaInst *A = dyn_cast<AllocaInst>(V)) { // Get size information for the alloca if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize())) return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType()); return AliasAnalysis::UnknownSize; } - + assert(isa<Argument>(V) && "Expected AllocaInst or Argument!"); - const PointerType *PT = cast<PointerType>(V->getType()); + PointerType *PT = cast<PointerType>(V->getType()); return TD->getTypeAllocSize(PT->getElementType()); } @@ -287,7 +289,7 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, AliasAnalysis &AA) { const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); - + // If the start pointers are the same, we just have to compare sizes to see if // the later store was larger than the earlier store. if (P1 == P2) { @@ -302,33 +304,33 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, return Later.Ptr->getType() == Earlier.Ptr->getType(); return false; } - + // Make sure that the Later size is >= the Earlier size. if (Later.Size < Earlier.Size) return false; return true; } - + // Otherwise, we have to have size information, and the later store has to be // larger than the earlier one. if (Later.Size == AliasAnalysis::UnknownSize || Earlier.Size == AliasAnalysis::UnknownSize || Later.Size <= Earlier.Size || AA.getTargetData() == 0) return false; - + // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval argument). If so, then it clearly overwrites any // other store to the same object. const TargetData &TD = *AA.getTargetData(); - + const Value *UO1 = GetUnderlyingObject(P1, &TD), *UO2 = GetUnderlyingObject(P2, &TD); - + // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. if (UO1 != UO2) return false; - + // If the "Later" store is to a recognizable object, get its size. if (isObjectPointerWithTrustworthySize(UO2)) { uint64_t ObjectSize = @@ -336,26 +338,26 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, if (ObjectSize == Later.Size) return true; } - + // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. int64_t EarlierOff = 0, LaterOff = 0; const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD); const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD); - + // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) return false; // The later store completely overlaps the earlier store if: - // + // // 1. Both start at the same offset and the later one's size is greater than // or equal to the earlier one's, or // // |--earlier--| // |-- later --| - // + // // 2. The earlier store has an offset greater than the later offset, but which // still lies completely within the later store. // @@ -373,7 +375,7 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a /// memory region into an identical pointer) then it doesn't actually make its -/// input dead in the traditional sense. Consider this case: +/// input dead in the traditional sense. Consider this case: /// /// memcpy(A <- B) /// memcpy(A <- A) @@ -391,10 +393,10 @@ static bool isPossibleSelfRead(Instruction *Inst, // location read. AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA); if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction. - + // If the read and written loc obviously don't alias, it isn't a read. if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; - + // Okay, 'Inst' may copy over itself. However, we can still remove a the // DepWrite instruction if we can prove that it reads from the same location // as Inst. This handles useful cases like: @@ -404,10 +406,10 @@ static bool isPossibleSelfRead(Instruction *Inst, // aliases, so removing the first memcpy is safe (assuming it writes <= # // bytes as the second one. AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA); - + if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) return false; - + // If DepWrite doesn't read memory or if we can't prove it is a must alias, // then it can't be considered dead. return true; @@ -420,43 +422,43 @@ static bool isPossibleSelfRead(Instruction *Inst, bool DSE::runOnBasicBlock(BasicBlock &BB) { bool MadeChange = false; - + // Do a top-down walk on the BB. for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { Instruction *Inst = BBI++; - + // Handle 'free' calls specially. if (CallInst *F = isFreeCall(Inst)) { MadeChange |= HandleFree(F); continue; } - + // If we find something that writes memory, get its memory dependence. if (!hasMemoryWrite(Inst)) continue; MemDepResult InstDep = MD->getDependency(Inst); - + // Ignore any store where we can't find a local dependence. // FIXME: cross-block DSE would be fun. :) - if (InstDep.isNonLocal() || InstDep.isUnknown()) + if (!InstDep.isDef() && !InstDep.isClobber()) continue; - + // If we're storing the same value back to a pointer that we just // loaded from, then the store can be removed. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) { if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - SI->getOperand(0) == DepLoad && !SI->isVolatile()) { + SI->getOperand(0) == DepLoad && isRemovable(SI)) { DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n " << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); - + // DeleteDeadInstruction can delete the current instruction. Save BBI // in case we need it. WeakVH NextInst(BBI); - + DeleteDeadInstruction(SI, *MD); - + if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. @@ -467,15 +469,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { } } } - + // Figure out what location is being stored to. AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA); // If we didn't get a useful location, fail. if (Loc.Ptr == 0) continue; - - while (!InstDep.isNonLocal() && !InstDep.isUnknown()) { + + while (InstDep.isDef() || InstDep.isClobber()) { // Get the memory clobbered by the instruction we depend on. MemDep will // skip any instructions that 'Loc' clearly doesn't interact with. If we // end up depending on a may- or must-aliased load, then we can't optimize @@ -496,12 +498,12 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite << "\n KILLER: " << *Inst << '\n'); - + // Delete the store and now-dead instructions that feed it. DeleteDeadInstruction(DepWrite, *MD); ++NumFastStores; MadeChange = true; - + // DeleteDeadInstruction can delete the current instruction in loop // cases, reset BBI. BBI = Inst; @@ -509,7 +511,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { --BBI; break; } - + // If this is a may-aliased store that is clobbering the store value, we // can keep searching past it for another must-aliased pointer that stores // to the same location. For example, in: @@ -519,20 +521,20 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // we can remove the first store to P even though we don't know if P and Q // alias. if (DepWrite == &BB.front()) break; - + // Can't look past this instruction if it might read 'Loc'. if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref) break; - + InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB); } } - + // If this block ends in a return, unwind, or unreachable, all allocas are // dead at its end, which means stores to them are also dead. if (BB.getTerminator()->getNumSuccessors() == 0) MadeChange |= handleEndBlock(BB); - + return MadeChange; } @@ -543,18 +545,18 @@ bool DSE::HandleFree(CallInst *F) { MemDepResult Dep = MD->getDependency(F); - while (!Dep.isNonLocal() && !Dep.isUnknown()) { + while (Dep.isDef() || Dep.isClobber()) { Instruction *Dependency = Dep.getInst(); if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) return MadeChange; - + Value *DepPointer = GetUnderlyingObject(getStoredPointerOperand(Dependency)); // Check for aliasing. if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) return MadeChange; - + // DCE instructions only used to calculate that store DeleteDeadInstruction(Dependency, *MD); ++NumFastStores; @@ -567,7 +569,7 @@ bool DSE::HandleFree(CallInst *F) { // free(s); Dep = MD->getDependency(F); }; - + return MadeChange; } @@ -579,28 +581,28 @@ bool DSE::HandleFree(CallInst *F) { /// ret void bool DSE::handleEndBlock(BasicBlock &BB) { bool MadeChange = false; - + // Keep track of all of the stack objects that are dead at the end of the // function. SmallPtrSet<Value*, 16> DeadStackObjects; - + // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) DeadStackObjects.insert(AI); - + // Treat byval arguments the same, stores to them are dead at the end of the // function. for (Function::arg_iterator AI = BB.getParent()->arg_begin(), AE = BB.getParent()->arg_end(); AI != AE; ++AI) if (AI->hasByValAttr()) DeadStackObjects.insert(AI); - + // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; - + // If we find a store, check to see if it points into a dead stack value. if (hasMemoryWrite(BBI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts @@ -609,10 +611,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Stores to stack values are valid candidates for removal. if (DeadStackObjects.count(Pointer)) { Instruction *Dead = BBI++; - + DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " << *Dead << "\n Object: " << *Pointer << '\n'); - + // DCE instructions only used to calculate that store. DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); ++NumFastStores; @@ -620,7 +622,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } } - + // Remove any dead non-memory-mutating instructions. if (isInstructionTriviallyDead(BBI)) { Instruction *Inst = BBI++; @@ -629,55 +631,61 @@ bool DSE::handleEndBlock(BasicBlock &BB) { MadeChange = true; continue; } - + if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) { DeadStackObjects.erase(A); continue; } - + if (CallSite CS = cast<Value>(BBI)) { // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) continue; - + // If the call might load from any of our allocas, then any store above // the call is live. SmallVector<Value*, 8> LiveAllocas; for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { // See if the call site touches it. - AliasAnalysis::ModRefResult A = + AliasAnalysis::ModRefResult A = AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); - + if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) LiveAllocas.push_back(*I); } - + for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), E = LiveAllocas.end(); I != E; ++I) DeadStackObjects.erase(*I); - + // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) return MadeChange; - + continue; } - + AliasAnalysis::Location LoadedLoc; - + // If we encounter a use of the pointer, it is no longer considered dead if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { + if (!L->isUnordered()) // Be conservative with atomic/volatile load + break; LoadedLoc = AA->getLocation(L); } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { LoadedLoc = AA->getLocation(V); } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { LoadedLoc = AA->getLocationForSource(MTI); - } else { - // Not a loading instruction. + } else if (!BBI->mayReadFromMemory()) { + // Instruction doesn't read memory. Note that stores that weren't removed + // above will hit this case. continue; + } else { + // Unknown inst; assume it clobbers everything. + break; } // Remove any allocas from the DeadPointer set that are loaded, as this @@ -689,7 +697,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (DeadStackObjects.empty()) break; } - + return MadeChange; } @@ -703,14 +711,14 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, // A constant can't be in the dead pointer set. if (isa<Constant>(UnderlyingPointer)) return; - + // If the kill pointer can be easily reduced to an alloca, don't bother doing // extraneous AA queries. if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer)); return; } - + SmallVector<Value*, 16> NowLive; for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 3d3f17b..c0223d2 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -92,7 +92,7 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) { // Hash in all of the operands as pointers. unsigned Res = 0; for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) - Res ^= getHash(Inst->getOperand(i)) << i; + Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); if (CastInst *CI = dyn_cast<CastInst>(Inst)) Res ^= getHash(CI->getType()); @@ -185,7 +185,7 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) { for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) { assert(!Inst->getOperand(i)->getType()->isMetadataTy() && "Cannot value number calls with metadata operands"); - Res ^= getHash(Inst->getOperand(i)) << i; + Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); } // Mix in the opcode. @@ -357,7 +357,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If this is a non-volatile load, process it. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { // Ignore volatile loads. - if (LI->isVolatile()) { + if (!LI->isSimple()) { LastStore = 0; continue; } @@ -437,7 +437,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration)); // Remember that this was the last store we saw for DSE. - if (!SI->isVolatile()) + if (SI->isSimple()) LastStore = SI; } } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 87b7317..cbfdbcd 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -41,12 +41,16 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/IRBuilder.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; +using namespace PatternMatch; STATISTIC(NumGVNInstr, "Number of instructions deleted"); STATISTIC(NumGVNLoad, "Number of loads deleted"); STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); STATISTIC(NumGVNBlocks, "Number of blocks merged"); +STATISTIC(NumGVNSimpl, "Number of instructions simplified"); +STATISTIC(NumGVNEqProp, "Number of equalities propagated"); STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt<bool> EnablePRE("enable-pre", @@ -63,7 +67,7 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); namespace { struct Expression { uint32_t opcode; - const Type *type; + Type *type; SmallVector<uint32_t, 4> varargs; Expression(uint32_t o = ~2U) : opcode(o) { } @@ -548,6 +552,9 @@ namespace { void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); + unsigned replaceAllDominatedUsesWith(Value *From, Value *To, + BasicBlock *Root); + bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root); }; char GVN::ID = 0; @@ -655,7 +662,7 @@ SpeculationFailure: /// CanCoerceMustAliasedValueToLoad - Return true if /// CoerceAvailableValueToLoadType will succeed. static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, - const Type *LoadTy, + Type *LoadTy, const TargetData &TD) { // If the loaded or stored value is an first class array or struct, don't try // to transform them. We need to be able to bitcast to integer. @@ -680,17 +687,17 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, /// /// If we can't do it, return null. static Value *CoerceAvailableValueToLoadType(Value *StoredVal, - const Type *LoadedTy, + Type *LoadedTy, Instruction *InsertPt, const TargetData &TD) { if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) return 0; // If this is already the right type, just return it. - const Type *StoredValTy = StoredVal->getType(); + Type *StoredValTy = StoredVal->getType(); - uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); - uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy); + uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); + uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); // If the store and reload are the same size, we can always reuse it. if (StoreSize == LoadSize) { @@ -704,7 +711,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } - const Type *TypeToCastTo = LoadedTy; + Type *TypeToCastTo = LoadedTy; if (TypeToCastTo->isPointerTy()) TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); @@ -743,7 +750,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, } // Truncate the integer to the right size now. - const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); + Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); if (LoadedTy == NewIntTy) @@ -765,7 +772,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, /// Check this case to see if there is anything more we can do before we give /// up. This returns -1 if we have to give up, or a byte number in the stored /// value of the piece that feeds the load. -static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, +static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, Value *WritePtr, uint64_t WriteSizeInBits, const TargetData &TD) { @@ -839,7 +846,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, /// AnalyzeLoadFromClobberingStore - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. -static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, +static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const TargetData &TD) { // Cannot handle reading from store of first-class aggregate yet. @@ -856,7 +863,7 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, /// AnalyzeLoadFromClobberingLoad - This function is called when we have a /// memdep query of a load that ends up being clobbered by another load. See if /// the other load can feed into the second load. -static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr, +static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, const TargetData &TD){ // Cannot handle reading from store of first-class aggregate yet. if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) @@ -883,7 +890,7 @@ static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr, -static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, +static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, MemIntrinsic *MI, const TargetData &TD) { // If the mem operation is a non-constant size, we can't handle it. @@ -920,7 +927,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, llvm::Type::getInt8PtrTy(Src->getContext())); Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); if (ConstantFoldLoadFromConstPtr(Src, &TD)) return Offset; @@ -934,7 +941,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, /// mustalias. Check this case to see if there is anything more we can do /// before we give up. static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, - const Type *LoadTy, + Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = SrcVal->getType()->getContext(); @@ -946,10 +953,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (SrcVal->getType()->isPointerTy()) - SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); + SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx)); if (!SrcVal->getType()->isIntegerTy()) - SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), - "tmp"); + SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); // Shift the bits to the least significant depending on endianness. unsigned ShiftAmt; @@ -959,11 +965,10 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, ShiftAmt = (StoreSize-LoadSize-Offset)*8; if (ShiftAmt) - SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp"); + SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); if (LoadSize != StoreSize) - SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8), - "tmp"); + SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8)); return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); } @@ -974,7 +979,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, /// because the pointers don't mustalias. Check this case to see if there is /// anything more we can do before we give up. static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, - const Type *LoadTy, Instruction *InsertPt, + Type *LoadTy, Instruction *InsertPt, GVN &gvn) { const TargetData &TD = *gvn.getTargetData(); // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to @@ -982,8 +987,8 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType()); unsigned LoadSize = TD.getTypeStoreSize(LoadTy); if (Offset+LoadSize > SrcValSize) { - assert(!SrcVal->isVolatile() && "Cannot widen volatile load!"); - assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load"); + assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); + assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); // If we have a load/load clobber an DepLI can be widened to cover this // load, then we should widen it to the next power of 2 size big enough! unsigned NewLoadSize = Offset+LoadSize; @@ -996,7 +1001,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, // memdep queries will find the new load. We can't easily remove the old // load completely because it is already in the value numbering table. IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); - const Type *DestPTy = + Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize*8); DestPTy = PointerType::get(DestPTy, cast<PointerType>(PtrVal->getType())->getAddressSpace()); @@ -1034,7 +1039,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, /// GetMemInstValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering mem intrinsic. static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, - const Type *LoadTy, Instruction *InsertPt, + Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = LoadTy->getContext(); uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; @@ -1081,7 +1086,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, llvm::Type::getInt8PtrTy(Src->getContext())); Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); return ConstantFoldLoadFromConstPtr(Src, &TD); } @@ -1154,7 +1159,7 @@ struct AvailableValueInBlock { /// MaterializeAdjustedValue - Emit code into this block to adjust the value /// defined here to the specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const { + Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { Value *Res; if (isSimpleValue()) { Res = getSimpleValue(); @@ -1213,7 +1218,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, SSAUpdater SSAUpdate(&NewPHIs); SSAUpdate.Initialize(LI->getType(), LI->getName()); - const Type *LoadTy = LI->getType(); + Type *LoadTy = LI->getType(); for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { const AvailableValueInBlock &AV = ValuesPerBlock[i]; @@ -1274,7 +1279,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) { + if (Deps.size() == 1 + && !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) + { DEBUG( dbgs() << "GVN: non-local load "; WriteAsOperand(dbgs(), LI); @@ -1294,7 +1301,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); - if (DepInfo.isUnknown()) { + if (!DepInfo.isDef() && !DepInfo.isClobber()) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1359,7 +1366,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { continue; } - assert(DepInfo.isDef() && "Expecting def here"); + // DepInfo.isDef() here Instruction *DepInst = DepInfo.getInst(); @@ -1446,8 +1453,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) Blockers.insert(UnavailableBlocks[i]); - // Lets find first basic block with more than one predecessor. Walk backwards - // through predecessors if needed. + // Let's find the first basic block with more than one predecessor. Walk + // backwards through predecessors if needed. BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; @@ -1519,10 +1526,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { << Pred->getName() << "': " << *LI << '\n'); return false; } + + if (LoadBB->isLandingPad()) { + DEBUG(dbgs() + << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '" + << Pred->getName() << "': " << *LI << '\n'); + return false; + } + unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); } } + if (!NeedToSplit.empty()) { toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); return false; @@ -1660,7 +1676,7 @@ bool GVN::processLoad(LoadInst *L) { if (!MD) return false; - if (L->isVolatile()) + if (!L->isSimple()) return false; if (L->use_empty()) { @@ -1747,7 +1763,11 @@ bool GVN::processLoad(LoadInst *L) { return false; } - if (Dep.isUnknown()) { + // If it is defined in another block, try harder. + if (Dep.isNonLocal()) + return processNonLocalLoad(L); + + if (!Dep.isDef()) { DEBUG( // fast print dep, using operator<< on instruction is too slow. dbgs() << "GVN: load "; @@ -1757,12 +1777,6 @@ bool GVN::processLoad(LoadInst *L) { return false; } - // If it is defined in another block, try harder. - if (Dep.isNonLocal()) - return processNonLocalLoad(L); - - assert(Dep.isDef() && "Expecting def here"); - Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { Value *StoredVal = DepSI->getValueOperand(); @@ -1874,6 +1888,133 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { return Val; } +/// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the +/// use is dominated by the given basic block. Returns the number of uses that +/// were replaced. +unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, + BasicBlock *Root) { + unsigned Count = 0; + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE; ) { + Instruction *User = cast<Instruction>(*UI); + unsigned OpNum = UI.getOperandNo(); + ++UI; + + if (DT->dominates(Root, User->getParent())) { + User->setOperand(OpNum, To); + ++Count; + } + } + return Count; +} + +/// propagateEquality - The given values are known to be equal in every block +/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with +/// 'RHS' everywhere in the scope. Returns whether a change was made. +bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { + if (LHS == RHS) return false; + assert(LHS->getType() == RHS->getType() && "Equal but types differ!"); + + // Don't try to propagate equalities between constants. + if (isa<Constant>(LHS) && isa<Constant>(RHS)) + return false; + + // Make sure that any constants are on the right-hand side. In general the + // best results are obtained by placing the longest lived value on the RHS. + if (isa<Constant>(LHS)) + std::swap(LHS, RHS); + + // If neither term is constant then bail out. This is not for correctness, + // it's just that the non-constant case is much less useful: it occurs just + // as often as the constant case but handling it hardly ever results in an + // improvement. + if (!isa<Constant>(RHS)) + return false; + + // If value numbering later deduces that an instruction in the scope is equal + // to 'LHS' then ensure it will be turned into 'RHS'. + addToLeaderTable(VN.lookup_or_add(LHS), RHS, Root); + + // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. + unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root); + bool Changed = NumReplacements > 0; + NumGVNEqProp += NumReplacements; + + // Now try to deduce additional equalities from this one. For example, if the + // known equality was "(A != B)" == "false" then it follows that A and B are + // equal in the scope. Only boolean equalities with an explicit true or false + // RHS are currently supported. + if (!RHS->getType()->isIntegerTy(1)) + // Not a boolean equality - bail out. + return Changed; + ConstantInt *CI = dyn_cast<ConstantInt>(RHS); + if (!CI) + // RHS neither 'true' nor 'false' - bail out. + return Changed; + // Whether RHS equals 'true'. Otherwise it equals 'false'. + bool isKnownTrue = CI->isAllOnesValue(); + bool isKnownFalse = !isKnownTrue; + + // If "A && B" is known true then both A and B are known true. If "A || B" + // is known false then both A and B are known false. + Value *A, *B; + if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || + (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { + Changed |= propagateEquality(A, RHS, Root); + Changed |= propagateEquality(B, RHS, Root); + return Changed; + } + + // If we are propagating an equality like "(A == B)" == "true" then also + // propagate the equality A == B. + if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) { + // Only equality comparisons are supported. + if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || + (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) { + Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); + Changed |= propagateEquality(Op0, Op1, Root); + } + return Changed; + } + + return Changed; +} + +/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return +/// true if every path from the entry block to 'Dst' passes via this edge. In +/// particular 'Dst' must not be reachable via another edge from 'Src'. +static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst, + DominatorTree *DT) { + // First off, there must not be more than one edge from Src to Dst, there + // should be exactly one. So keep track of the number of times Src occurs + // as a predecessor of Dst and fail if it's more than once. Secondly, any + // other predecessors of Dst should be dominated by Dst (see logic below). + bool SawEdgeFromSrc = false; + for (pred_iterator PI = pred_begin(Dst), PE = pred_end(Dst); PI != PE; ++PI) { + BasicBlock *Pred = *PI; + if (Pred == Src) { + // An edge from Src to Dst. + if (SawEdgeFromSrc) + // There are multiple edges from Src to Dst - fail. + return false; + SawEdgeFromSrc = true; + continue; + } + // If the predecessor is not dominated by Dst, then it must be possible to + // reach it either without passing through Src (and thus not via the edge) + // or by passing through Src but taking a different edge out of Src. Either + // way it is possible to reach Dst without passing via the edge, so fail. + if (!DT->dominates(Dst, *PI)) + return false; + } + assert(SawEdgeFromSrc && "No edge between these basic blocks!"); + + // Every path from the entry block to Dst must at some point pass to Dst from + // a predecessor that is not dominated by Dst. This predecessor can only be + // Src, since all others are dominated by Dst. As there is only one edge from + // Src to Dst, the path passes by this edge. + return true; +} /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets @@ -1891,6 +2032,7 @@ bool GVN::processInstruction(Instruction *I) { if (MD && V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(I); + ++NumGVNSimpl; return true; } @@ -1903,30 +2045,45 @@ bool GVN::processInstruction(Instruction *I) { return false; } - // For conditions branches, we can perform simple conditional propagation on + // For conditional branches, we can perform simple conditional propagation on // the condition value itself. if (BranchInst *BI = dyn_cast<BranchInst>(I)) { if (!BI->isConditional() || isa<Constant>(BI->getCondition())) return false; - + Value *BranchCond = BI->getCondition(); - uint32_t CondVN = VN.lookup_or_add(BranchCond); - + BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); - - if (TrueSucc->getSinglePredecessor()) - addToLeaderTable(CondVN, - ConstantInt::getTrue(TrueSucc->getContext()), - TrueSucc); - if (FalseSucc->getSinglePredecessor()) - addToLeaderTable(CondVN, - ConstantInt::getFalse(TrueSucc->getContext()), - FalseSucc); - - return false; + BasicBlock *Parent = BI->getParent(); + bool Changed = false; + + if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT)) + Changed |= propagateEquality(BranchCond, + ConstantInt::getTrue(TrueSucc->getContext()), + TrueSucc); + + if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT)) + Changed |= propagateEquality(BranchCond, + ConstantInt::getFalse(FalseSucc->getContext()), + FalseSucc); + + return Changed; } - + + // For switches, propagate the case values into the case destinations. + if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { + Value *SwitchCond = SI->getCondition(); + BasicBlock *Parent = SI->getParent(); + bool Changed = false; + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { + BasicBlock *Dst = SI->getSuccessor(i); + if (isOnlyReachableViaThisEdge(Parent, Dst, DT)) + Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst); + } + return Changed; + } + // Instructions with void type don't return a value, so there's // no point in trying to find redudancies in them. if (I->getType()->isVoidTy()) return false; @@ -2071,6 +2228,9 @@ bool GVN::performPRE(Function &F) { // Nothing to PRE in the entry block. if (CurrentBlock == &F.getEntryBlock()) continue; + // Don't perform PRE on a landing pad. + if (CurrentBlock->isLandingPad()) continue; + for (BasicBlock::iterator BI = CurrentBlock->begin(), BE = CurrentBlock->end(); BI != BE; ) { Instruction *CurInst = BI++; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index dee3d38..75fa011 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -11,17 +11,6 @@ // computations derived from them) into simpler forms suitable for subsequent // analysis and transformation. // -// This transformation makes the following changes to each loop with an -// identifiable induction variable: -// 1. All loops are transformed to have a SINGLE canonical induction variable -// which starts at zero and steps by one. -// 2. The canonical induction variable is guaranteed to be the first PHI node -// in the loop header block. -// 3. The canonical induction variable is guaranteed to be in a wide enough -// type so that IV expressions need not be (directly) zero-extended or -// sign-extended. -// 4. Any pointer arithmetic recurrences are raised to use array subscripts. -// // If the trip count of a loop is computable, this pass also makes the following // changes: // 1. The exit condition for the loop is canonicalized to compare the @@ -33,9 +22,6 @@ // purpose of the loop is to compute the exit value of some derived // expression, this transformation will make the loop dead. // -// This transformation should be followed by strength reduction after all of the -// desired loop transformations have been performed. -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "indvars" @@ -57,11 +43,11 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; STATISTIC(NumRemoved , "Number of aux indvars removed"); @@ -69,15 +55,21 @@ STATISTIC(NumWidened , "Number of indvars widened"); STATISTIC(NumInserted , "Number of canonical indvars added"); STATISTIC(NumReplaced , "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); -STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); -STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); -STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); -static cl::opt<bool> DisableIVRewrite( - "disable-iv-rewrite", cl::Hidden, - cl::desc("Disable canonical induction variable rewriting")); +namespace llvm { + cl::opt<bool> EnableIVRewrite( + "enable-iv-rewrite", cl::Hidden, + cl::desc("Enable canonical induction variable rewriting")); + + // Trip count verification can be enabled by default under NDEBUG if we + // implement a strong expression equivalence checker in SCEV. Until then, we + // use the verify-indvars flag, which may assert in some cases. + cl::opt<bool> VerifyIndvars( + "verify-indvars", cl::Hidden, + cl::desc("Verify the ScalarEvolution result after running indvars")); +} namespace { class IndVarSimplify : public LoopPass { @@ -105,12 +97,12 @@ namespace { AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - if (!DisableIVRewrite) + if (EnableIVRewrite) AU.addRequired<IVUsers>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); - if (!DisableIVRewrite) + if (EnableIVRewrite) AU.addPreserved<IVUsers>(); AU.setPreservesCFG(); } @@ -125,24 +117,14 @@ namespace { void HandleFloatingPointIV(Loop *L, PHINode *PH); void RewriteNonIntegerIVs(Loop *L); - void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); - - void SimplifyIVUsers(SCEVExpander &Rewriter); - void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter); + void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM); - bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand); - void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); - void EliminateIVRemainder(BinaryOperator *Rem, - Value *IVOperand, - bool IsSigned); - - void SimplifyCongruentIVs(Loop *L); + void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); - ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - PHINode *IndVar, - SCEVExpander &Rewriter); + Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, + PHINode *IndVar, SCEVExpander &Rewriter); void SinkUnusedInvariants(Loop *L); }; @@ -211,6 +193,36 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { return true; } +/// Determine the insertion point for this user. By default, insert immediately +/// before the user. SCEVExpander or LICM will hoist loop invariants out of the +/// loop. For PHI nodes, there may be multiple uses, so compute the nearest +/// common dominator for the incoming blocks. +static Instruction *getInsertPointForUses(Instruction *User, Value *Def, + DominatorTree *DT) { + PHINode *PHI = dyn_cast<PHINode>(User); + if (!PHI) + return User; + + Instruction *InsertPt = 0; + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + if (PHI->getIncomingValue(i) != Def) + continue; + + BasicBlock *InsertBB = PHI->getIncomingBlock(i); + if (!InsertPt) { + InsertPt = InsertBB->getTerminator(); + continue; + } + InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); + InsertPt = InsertBB->getTerminator(); + } + assert(InsertPt && "Missing phi operand"); + assert((!isa<Instruction>(Def) || + DT->dominates(cast<Instruction>(Def), InsertPt)) && + "def does not dominate all uses"); + return InsertPt; +} + //===----------------------------------------------------------------------===// // RewriteNonIntegerIVs and helpers. Prefer integer IVs. //===----------------------------------------------------------------------===// @@ -337,14 +349,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // Positive and negative strides have different safety conditions. if (IncValue > 0) { // If we have a positive stride, we require the init to be less than the - // exit value and an equality or less than comparison. - if (InitValue >= ExitValue || - NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) + // exit value. + if (InitValue >= ExitValue) return; uint32_t Range = uint32_t(ExitValue-InitValue); - if (NewPred == CmpInst::ICMP_SLE) { - // Normalize SLE -> SLT, check for infinite loop. + // Check for infinite loop, either: + // while (i <= Exit) or until (i > Exit) + if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { if (++Range == 0) return; // Range overflows. } @@ -364,14 +376,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { } else { // If we have a negative stride, we require the init to be greater than the - // exit value and an equality or greater than comparison. - if (InitValue >= ExitValue || - NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) + // exit value. + if (InitValue <= ExitValue) return; uint32_t Range = uint32_t(InitValue-ExitValue); - if (NewPred == CmpInst::ICMP_SGE) { - // Normalize SGE -> SGT, check for infinite loop. + // Check for infinite loop, either: + // while (i >= Exit) or until (i < Exit) + if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { if (++Range == 0) return; // Range overflows. } @@ -390,7 +402,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { return; } - const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); + IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); // Insert new integer induction variable. PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); @@ -429,7 +441,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // platforms. if (WeakPH) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", - PN->getParent()->getFirstNonPHI()); + PN->getParent()->getFirstInsertionPt()); PN->replaceAllUsesWith(Conv); RecursivelyDeleteTriviallyDeadInstructions(PN); } @@ -437,6 +449,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // Add a new IVUsers entry for the newly-created integer PHI. if (IU) IU->AddUsersIfInteresting(NewPHI); + + Changed = true; } void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { @@ -582,45 +596,15 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { //===----------------------------------------------------------------------===// // Rewrite IV users based on a canonical IV. -// To be replaced by -disable-iv-rewrite. +// Only for use with -enable-iv-rewrite. //===----------------------------------------------------------------------===// -/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this -/// loop. IVUsers is treated as a worklist. Each successive simplification may -/// push more users which may themselves be candidates for simplification. -/// -/// This is the old approach to IV simplification to be replaced by -/// SimplifyIVUsersNoRewrite. -/// -void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { - // Each round of simplification involves a round of eliminating operations - // followed by a round of widening IVs. A single IVUsers worklist is used - // across all rounds. The inner loop advances the user. If widening exposes - // more uses, then another pass through the outer loop is triggered. - for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) { - Instruction *UseInst = I->getUser(); - Value *IVOperand = I->getOperandValToReplace(); - - if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { - EliminateIVComparison(ICmp, IVOperand); - continue; - } - if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { - bool IsSigned = Rem->getOpcode() == Instruction::SRem; - if (IsSigned || Rem->getOpcode() == Instruction::URem) { - EliminateIVRemainder(Rem, IVOperand, IsSigned); - continue; - } - } - } -} - -// FIXME: It is an extremely bad idea to indvar substitute anything more -// complex than affine induction variables. Doing so will put expensive -// polynomial evaluations inside of the loop, and the str reduction pass -// currently can only reduce affine polynomials. For now just disable -// indvar subst on anything more complex than an affine addrec, unless -// it can be expanded to a trivial value. +/// FIXME: It is an extremely bad idea to indvar substitute anything more +/// complex than affine induction variables. Doing so will put expensive +/// polynomial evaluations inside of the loop, and the str reduction pass +/// currently can only reduce affine polynomials. For now just disable +/// indvar subst on anything more complex than an affine addrec, unless +/// it can be expanded to a trivial value. static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { // Loop-invariant values are safe. if (SE->isLoopInvariant(S, L)) return true; @@ -631,7 +615,8 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { return AR->isAffine(); // An add is safe it all its operands are safe. - if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { + if (const SCEVCommutativeExpr *Commutative + = dyn_cast<SCEVCommutativeExpr>(S)) { for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), E = Commutative->op_end(); I != E; ++I) if (!isSafe(*I, L, SE)) return false; @@ -665,7 +650,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // of different sizes. for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { Value *Op = UI->getOperandValToReplace(); - const Type *UseTy = Op->getType(); + Type *UseTy = Op->getType(); Instruction *User = UI->getUser(); // Compute the final addrec to expand into code. @@ -692,18 +677,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // hoist loop invariants out of the loop. For PHI nodes, there may be // multiple uses, so compute the nearest common dominator for the // incoming blocks. - Instruction *InsertPt = User; - if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (PHI->getIncomingValue(i) == Op) { - if (InsertPt == User) - InsertPt = PHI->getIncomingBlock(i)->getTerminator(); - else - InsertPt = - DT->findNearestCommonDominator(InsertPt->getParent(), - PHI->getIncomingBlock(i)) - ->getTerminator(); - } + Instruction *InsertPt = getInsertPointForUses(User, Op, DT); // Now expand it into actual Instructions and patch it into place. Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); @@ -747,19 +721,38 @@ namespace { // extend operations. This information is recorded by CollectExtend and // provides the input to WidenIV. struct WideIVInfo { - const Type *WidestNativeType; // Widest integer type created [sz]ext - bool IsSigned; // Was an sext user seen before a zext? + PHINode *NarrowIV; + Type *WidestNativeType; // Widest integer type created [sz]ext + bool IsSigned; // Was an sext user seen before a zext? - WideIVInfo() : WidestNativeType(0), IsSigned(false) {} + WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {} + }; + + class WideIVVisitor : public IVVisitor { + ScalarEvolution *SE; + const TargetData *TD; + + public: + WideIVInfo WI; + + WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV, + const TargetData *TData) : + SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; } + + // Implement the interface used by simplifyUsersOfIV. + virtual void visitCast(CastInst *Cast); }; } -/// CollectExtend - Update information about the induction variable that is +/// visitCast - Update information about the induction variable that is /// extended by this sign or zero extend operation. This is used to determine /// the final width of the IV before actually widening it. -static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI, - ScalarEvolution *SE, const TargetData *TD) { - const Type *Ty = Cast->getType(); +void WideIVVisitor::visitCast(CastInst *Cast) { + bool IsSigned = Cast->getOpcode() == Instruction::SExt; + if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) + return; + + Type *Ty = Cast->getType(); uint64_t Width = SE->getTypeSizeInBits(Ty); if (TD && !TD->isLegalInteger(Width)) return; @@ -779,6 +772,21 @@ static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI, } namespace { + +/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the +/// WideIV that computes the same value as the Narrow IV def. This avoids +/// caching Use* pointers. +struct NarrowIVDefUse { + Instruction *NarrowDef; + Instruction *NarrowUse; + Instruction *WideDef; + + NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {} + + NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD): + NarrowDef(ND), NarrowUse(NU), WideDef(WD) {} +}; + /// WidenIV - The goal of this transform is to remove sign and zero extends /// without creating any new induction variables. To do this, it creates a new /// phi of the wider type and redirects all users, either removing extends or @@ -787,7 +795,7 @@ namespace { class WidenIV { // Parameters PHINode *OrigPhi; - const Type *WideType; + Type *WideType; bool IsSigned; // Context @@ -803,13 +811,13 @@ class WidenIV { SmallVectorImpl<WeakVH> &DeadInsts; SmallPtrSet<Instruction*,16> Widened; - SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers; + SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; public: - WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo, + WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, SmallVectorImpl<WeakVH> &DI) : - OrigPhi(PN), + OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), IsSigned(WI.IsSigned), LI(LInfo), @@ -826,21 +834,42 @@ public: PHINode *CreateWideIV(SCEVExpander &Rewriter); protected: - Instruction *CloneIVUser(Instruction *NarrowUse, - Instruction *NarrowDef, - Instruction *WideDef); + Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned, + Instruction *Use); + + Instruction *CloneIVUser(NarrowIVDefUse DU); const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); - Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, - Instruction *WideDef); + const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU); + + Instruction *WidenIVUse(NarrowIVDefUse DU); void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); }; } // anonymous namespace -static Value *getExtend( Value *NarrowOper, const Type *WideType, - bool IsSigned, IRBuilder<> &Builder) { +/// isLoopInvariant - Perform a quick domtree based check for loop invariance +/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems +/// gratuitous for this purpose. +static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) { + Instruction *Inst = dyn_cast<Instruction>(V); + if (!Inst) + return true; + + return DT->properlyDominates(Inst->getParent(), L->getHeader()); +} + +Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned, + Instruction *Use) { + // Set the debug location and conservative insertion point. + IRBuilder<> Builder(Use); + // Hoist the insertion point into loop preheaders as far as possible. + for (const Loop *L = LI->getLoopFor(Use->getParent()); + L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT); + L = L->getParentLoop()) + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : Builder.CreateZExt(NarrowOper, WideType); } @@ -848,10 +877,8 @@ static Value *getExtend( Value *NarrowOper, const Type *WideType, /// CloneIVUser - Instantiate a wide operation to replace a narrow /// operation. This only needs to handle operations that can evaluation to /// SCEVAddRec. It can safely return 0 for any operation we decide not to clone. -Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, - Instruction *NarrowDef, - Instruction *WideDef) { - unsigned Opcode = NarrowUse->getOpcode(); +Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { + unsigned Opcode = DU.NarrowUse->getOpcode(); switch (Opcode) { default: return 0; @@ -865,24 +892,23 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n"); - - IRBuilder<> Builder(NarrowUse); + DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n"); // Replace NarrowDef operands with WideDef. Otherwise, we don't know // anything about the narrow operand yet so must insert a [sz]ext. It is // probably loop invariant and will be folded or hoisted. If it actually // comes from a widened IV, it should be removed during a future call to // WidenIVUse. - Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : - getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder); - Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef : - getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder); + Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef : + getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse); + Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef : + getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse); - BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse); + BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse); BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, NarrowBO->getName()); + IRBuilder<> Builder(DU.NarrowUse); Builder.Insert(WideBO); if (const OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(NarrowBO)) { @@ -894,45 +920,46 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, llvm_unreachable(0); } -/// HoistStep - Attempt to hoist an IV increment above a potential use. -/// -/// To successfully hoist, two criteria must be met: -/// - IncV operands dominate InsertPos and -/// - InsertPos dominates IncV -/// -/// Meeting the second condition means that we don't need to check all of IncV's -/// existing uses (it's moving up in the domtree). -/// -/// This does not yet recursively hoist the operands, although that would -/// not be difficult. -static bool HoistStep(Instruction *IncV, Instruction *InsertPos, - const DominatorTree *DT) -{ - if (DT->dominates(IncV, InsertPos)) - return true; +/// No-wrap operations can transfer sign extension of their result to their +/// operands. Generate the SCEV value for the widened operation without +/// actually modifying the IR yet. If the expression after extending the +/// operands is an AddRec for this loop, return it. +const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { + // Handle the common case of add<nsw/nuw> + if (DU.NarrowUse->getOpcode() != Instruction::Add) + return 0; - if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) - return false; + // One operand (NarrowDef) has already been extended to WideDef. Now determine + // if extending the other will lead to a recurrence. + unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; + assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); + + const SCEV *ExtendOperExpr = 0; + const OverflowingBinaryOperator *OBO = + cast<OverflowingBinaryOperator>(DU.NarrowUse); + if (IsSigned && OBO->hasNoSignedWrap()) + ExtendOperExpr = SE->getSignExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else if(!IsSigned && OBO->hasNoUnsignedWrap()) + ExtendOperExpr = SE->getZeroExtendExpr( + SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); + else + return 0; - if (IncV->mayHaveSideEffects()) - return false; + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>( + SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr, + IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW)); - // Attempt to hoist IncV - for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); - OI != OE; ++OI) { - Instruction *OInst = dyn_cast<Instruction>(OI); - if (OInst && !DT->dominates(OInst, InsertPos)) - return false; - } - IncV->moveBefore(InsertPos); - return true; + if (!AddRec || AddRec->getLoop() != L) + return 0; + return AddRec; } -// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' -// perspective after widening it's type? In other words, can the extend be -// safely hoisted out of the loop with SCEV reducing the value to a recurrence -// on the same loop. If so, return the sign or zero extended -// recurrence. Otherwise return NULL. +/// GetWideRecurrence - Is this instruction potentially interesting from +/// IVUsers' perspective after widening it's type? In other words, can the +/// extend be safely hoisted out of the loop with SCEV reducing the value to a +/// recurrence on the same loop. If so, return the sign or zero extended +/// recurrence. Otherwise return NULL. const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { if (!SE->isSCEVable(NarrowUse->getType())) return 0; @@ -951,47 +978,45 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); if (!AddRec || AddRec->getLoop() != L) return 0; - return AddRec; } /// WidenIVUse - Determine whether an individual user of the narrow IV can be /// widened. If so, return the wide clone of the user. -Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, - Instruction *WideDef) { - Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser()); +Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) { // Stop traversing the def-use chain at inner-loop phis or post-loop phis. - if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) + if (isa<PHINode>(DU.NarrowUse) && + LI->getLoopFor(DU.NarrowUse->getParent()) != L) return 0; // Our raison d'etre! Eliminate sign and zero extension. - if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) { - Value *NewDef = WideDef; - if (NarrowUse->getType() != WideType) { - unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType()); + if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) { + Value *NewDef = DU.WideDef; + if (DU.NarrowUse->getType() != WideType) { + unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); unsigned IVWidth = SE->getTypeSizeInBits(WideType); if (CastWidth < IVWidth) { // The cast isn't as wide as the IV, so insert a Trunc. - IRBuilder<> Builder(NarrowDefUse); - NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType()); + IRBuilder<> Builder(DU.NarrowUse); + NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); } else { // A wider extend was hidden behind a narrower one. This may induce // another round of IV widening in which the intermediate IV becomes // dead. It should be very rare. DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi - << " not wide enough to subsume " << *NarrowUse << "\n"); - NarrowUse->replaceUsesOfWith(NarrowDef, WideDef); - NewDef = NarrowUse; + << " not wide enough to subsume " << *DU.NarrowUse << "\n"); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + NewDef = DU.NarrowUse; } } - if (NewDef != NarrowUse) { - DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse - << " replaced by " << *WideDef << "\n"); + if (NewDef != DU.NarrowUse) { + DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse + << " replaced by " << *DU.WideDef << "\n"); ++NumElimExt; - NarrowUse->replaceAllUsesWith(NewDef); - DeadInsts.push_back(NarrowUse); + DU.NarrowUse->replaceAllUsesWith(NewDef); + DeadInsts.push_back(DU.NarrowUse); } // Now that the extend is gone, we want to expose it's uses for potential // further simplification. We don't need to directly inform SimplifyIVUsers @@ -1004,29 +1029,32 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, } // Does this user itself evaluate to a recurrence after widening? - const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse); + const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse); + if (!WideAddRec) { + WideAddRec = GetExtendedOperandRecurrence(DU); + } if (!WideAddRec) { // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - IRBuilder<> Builder(NarrowDefUse); - Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType()); - NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); + IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); return 0; } - // We assume that block terminators are not SCEVable. We wouldn't want to + // Assume block terminators cannot evaluate to a recurrence. We can't to // insert a Trunc after a terminator if there happens to be a critical edge. - assert(NarrowUse != NarrowUse->getParent()->getTerminator() && + assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && "SCEV is not expected to evaluate a block terminator"); // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. Instruction *WideUse = 0; - if (WideAddRec == WideIncExpr && HoistStep(WideInc, NarrowUse, DT)) { + if (WideAddRec == WideIncExpr + && SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT)) WideUse = WideInc; - } else { - WideUse = CloneIVUser(NarrowUse, NarrowDef, WideDef); + WideUse = CloneIVUser(DU); if (!WideUse) return 0; } @@ -1051,13 +1079,13 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { for (Value::use_iterator UI = NarrowDef->use_begin(), UE = NarrowDef->use_end(); UI != UE; ++UI) { - Use &U = UI.getUse(); + Instruction *NarrowUse = cast<Instruction>(*UI); // Handle data flow merges and bizarre phi cycles. - if (!Widened.insert(cast<Instruction>(U.getUser()))) + if (!Widened.insert(NarrowUse)) continue; - NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef)); + NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef)); } } @@ -1124,23 +1152,19 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { pushNarrowIVUsers(OrigPhi, WidePhi); while (!NarrowIVUsers.empty()) { - Use *UsePtr; - Instruction *WideDef; - tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val(); - Use &NarrowDefUse = *UsePtr; + NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); // Process a def-use edge. This may replace the use, so don't hold a // use_iterator across it. - Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get()); - Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef); + Instruction *WideUse = WidenIVUse(DU); // Follow all def-use edges from the previous narrow use. if (WideUse) - pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse); + pushNarrowIVUsers(DU.NarrowUse, WideUse); // WidenIVUse may have removed the def-use edge. - if (NarrowDef->use_empty()) - DeadInsts.push_back(NarrowDef); + if (DU.NarrowDef->use_empty()) + DeadInsts.push_back(DU.NarrowDef); } return WidePhi; } @@ -1149,187 +1173,17 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Simplification of IV users based on SCEV evaluation. //===----------------------------------------------------------------------===// -void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { - unsigned IVOperIdx = 0; - ICmpInst::Predicate Pred = ICmp->getPredicate(); - if (IVOperand != ICmp->getOperand(0)) { - // Swapped - assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); - IVOperIdx = 1; - Pred = ICmpInst::getSwappedPredicate(Pred); - } - - // Get the SCEVs for the ICmp operands. - const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); - const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); - - // Simplify unnecessary loops away. - const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); - S = SE->getSCEVAtScope(S, ICmpLoop); - X = SE->getSCEVAtScope(X, ICmpLoop); - - // If the condition is always true or always false, replace it with - // a constant value. - if (SE->isKnownPredicate(Pred, S, X)) - ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); - else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) - ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); - else - return; - - DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); - ++NumElimCmp; - Changed = true; - DeadInsts.push_back(ICmp); -} - -void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem, - Value *IVOperand, - bool IsSigned) { - // We're only interested in the case where we know something about - // the numerator. - if (IVOperand != Rem->getOperand(0)) - return; - - // Get the SCEVs for the ICmp operands. - const SCEV *S = SE->getSCEV(Rem->getOperand(0)); - const SCEV *X = SE->getSCEV(Rem->getOperand(1)); - - // Simplify unnecessary loops away. - const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); - S = SE->getSCEVAtScope(S, ICmpLoop); - X = SE->getSCEVAtScope(X, ICmpLoop); - - // i % n --> i if i is in [0,n). - if ((!IsSigned || SE->isKnownNonNegative(S)) && - SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - S, X)) - Rem->replaceAllUsesWith(Rem->getOperand(0)); - else { - // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). - const SCEV *LessOne = - SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); - if (IsSigned && !SE->isKnownNonNegative(LessOne)) - return; - - if (!SE->isKnownPredicate(IsSigned ? - ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - LessOne, X)) - return; - - ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, - Rem->getOperand(0), Rem->getOperand(1), - "tmp"); - SelectInst *Sel = - SelectInst::Create(ICmp, - ConstantInt::get(Rem->getType(), 0), - Rem->getOperand(0), "tmp", Rem); - Rem->replaceAllUsesWith(Sel); - } - - // Inform IVUsers about the new users. - if (IU) { - if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) - IU->AddUsersIfInteresting(I); - } - DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); - ++NumElimRem; - Changed = true; - DeadInsts.push_back(Rem); -} - -/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has -/// no observable side-effect given the range of IV values. -bool IndVarSimplify::EliminateIVUser(Instruction *UseInst, - Instruction *IVOperand) { - if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { - EliminateIVComparison(ICmp, IVOperand); - return true; - } - if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { - bool IsSigned = Rem->getOpcode() == Instruction::SRem; - if (IsSigned || Rem->getOpcode() == Instruction::URem) { - EliminateIVRemainder(Rem, IVOperand, IsSigned); - return true; - } - } - - // Eliminate any operation that SCEV can prove is an identity function. - if (!SE->isSCEVable(UseInst->getType()) || - (UseInst->getType() != IVOperand->getType()) || - (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) - return false; - - DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); - - UseInst->replaceAllUsesWith(IVOperand); - ++NumElimIdentity; - Changed = true; - DeadInsts.push_back(UseInst); - return true; -} - -/// pushIVUsers - Add all uses of Def to the current IV's worklist. -/// -static void pushIVUsers( - Instruction *Def, - SmallPtrSet<Instruction*,16> &Simplified, - SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { - - for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); - UI != E; ++UI) { - Instruction *User = cast<Instruction>(*UI); - - // Avoid infinite or exponential worklist processing. - // Also ensure unique worklist users. - // If Def is a LoopPhi, it may not be in the Simplified set, so check for - // self edges first. - if (User != Def && Simplified.insert(User)) - SimpleIVUsers.push_back(std::make_pair(User, Def)); - } -} - -/// isSimpleIVUser - Return true if this instruction generates a simple SCEV -/// expression in terms of that IV. -/// -/// This is similar to IVUsers' isInsteresting() but processes each instruction -/// non-recursively when the operand is already known to be a simpleIVUser. -/// -static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { - if (!SE->isSCEVable(I->getType())) - return false; - - // Get the symbolic expression for this instruction. - const SCEV *S = SE->getSCEV(I); - - // We assume that terminators are not SCEVable. - assert((!S || I != I->getParent()->getTerminator()) && - "can't fold terminators"); - - // Only consider affine recurrences. - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); - if (AR && AR->getLoop() == L) - return true; - - return false; -} -/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist -/// of IV users. Each successive simplification may push more users which may +/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV +/// users. Each successive simplification may push more users which may /// themselves be candidates for simplification. /// -/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it -/// simplifies instructions in-place during analysis. Rather than rewriting -/// induction variables bottom-up from their users, it transforms a chain of -/// IVUsers top-down, updating the IR only when it encouters a clear -/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still -/// needed, but only used to generate a new IV (phi) of wider type for sign/zero -/// extend elimination. +/// Sign/Zero extend elimination is interleaved with IV simplification. /// -/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. -/// -void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { - std::map<PHINode *, WideIVInfo> WideIVMap; +void IndVarSimplify::SimplifyAndExtend(Loop *L, + SCEVExpander &Rewriter, + LPPassManager &LPM) { + SmallVector<WideIVInfo, 8> WideIVs; SmallVector<PHINode*, 8> LoopPhis; for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { @@ -1345,108 +1199,81 @@ void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { // extension. The first time SCEV attempts to normalize sign/zero extension, // the result becomes final. So for the most predictable results, we delay // evaluation of sign/zero extend evaluation until needed, and avoid running - // other SCEV based analysis prior to SimplifyIVUsersNoRewrite. + // other SCEV based analysis prior to SimplifyAndExtend. do { PHINode *CurrIV = LoopPhis.pop_back_val(); // Information about sign/zero extensions of CurrIV. - WideIVInfo WI; - - // Instructions processed by SimplifyIVUsers for CurrIV. - SmallPtrSet<Instruction*,16> Simplified; - - // Use-def pairs if IV users waiting to be processed for CurrIV. - SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; - - // Push users of the current LoopPhi. In rare cases, pushIVUsers may be - // called multiple times for the same LoopPhi. This is the proper thing to - // do for loop header phis that use each other. - pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + WideIVVisitor WIV(CurrIV, SE, TD); - while (!SimpleIVUsers.empty()) { - Instruction *UseInst, *Operand; - tie(UseInst, Operand) = SimpleIVUsers.pop_back_val(); - // Bypass back edges to avoid extra work. - if (UseInst == CurrIV) continue; + Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV); - if (EliminateIVUser(UseInst, Operand)) { - pushIVUsers(Operand, Simplified, SimpleIVUsers); - continue; - } - if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { - bool IsSigned = Cast->getOpcode() == Instruction::SExt; - if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { - CollectExtend(Cast, IsSigned, WI, SE, TD); - } - continue; - } - if (isSimpleIVUser(UseInst, L, SE)) { - pushIVUsers(UseInst, Simplified, SimpleIVUsers); - } - } - if (WI.WidestNativeType) { - WideIVMap[CurrIV] = WI; + if (WIV.WI.WidestNativeType) { + WideIVs.push_back(WIV.WI); } } while(!LoopPhis.empty()); - for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(), - E = WideIVMap.end(); I != E; ++I) { - WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts); + for (; !WideIVs.empty(); WideIVs.pop_back()) { + WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts); if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) { Changed = true; LoopPhis.push_back(WidePhi); } } - WideIVMap.clear(); } } -/// SimplifyCongruentIVs - Check for congruent phis in this loop header and -/// populate ExprToIVMap for use later. -/// -void IndVarSimplify::SimplifyCongruentIVs(Loop *L) { - DenseMap<const SCEV *, PHINode *> ExprToIVMap; - for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { - PHINode *Phi = cast<PHINode>(I); - if (!SE->isSCEVable(Phi->getType())) - continue; +//===----------------------------------------------------------------------===// +// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. +//===----------------------------------------------------------------------===// - const SCEV *S = SE->getSCEV(Phi); - DenseMap<const SCEV *, PHINode *>::const_iterator Pos; - bool Inserted; - tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi)); - if (Inserted) - continue; - PHINode *OrigPhi = Pos->second; - // Replacing the congruent phi is sufficient because acyclic redundancy - // elimination, CSE/GVN, should handle the rest. However, once SCEV proves - // that a phi is congruent, it's almost certain to be the head of an IV - // user cycle that is isomorphic with the original phi. So it's worth - // eagerly cleaning up the common case of a single IV increment. - if (BasicBlock *LatchBlock = L->getLoopLatch()) { - Instruction *OrigInc = - cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); - Instruction *IsomorphicInc = - cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); - if (OrigInc != IsomorphicInc && - SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) && - HoistStep(OrigInc, IsomorphicInc, DT)) { - DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: " - << *IsomorphicInc << '\n'); - IsomorphicInc->replaceAllUsesWith(OrigInc); - DeadInsts.push_back(IsomorphicInc); - } +/// Check for expressions that ScalarEvolution generates to compute +/// BackedgeTakenInfo. If these expressions have not been reduced, then +/// expanding them may incur additional cost (albeit in the loop preheader). +static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, + ScalarEvolution *SE) { + // If the backedge-taken count is a UDiv, it's very likely a UDiv that + // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a + // precise expression, rather than a UDiv from the user's code. If we can't + // find a UDiv in the code with some simple searching, assume the former and + // forego rewriting the loop. + if (isa<SCEVUDivExpr>(S)) { + ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!OrigCond) return true; + const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); + R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); + if (R != S) { + const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); + L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); + if (L != S) + return true; } - DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); - ++NumElimIV; - Phi->replaceAllUsesWith(OrigPhi); - DeadInsts.push_back(Phi); } -} -//===----------------------------------------------------------------------===// -// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. -//===----------------------------------------------------------------------===// + if (EnableIVRewrite) + return false; + + // Recurse past add expressions, which commonly occur in the + // BackedgeTakenCount. They may already exist in program code, and if not, + // they are not too expensive rematerialize. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + if (isHighCostExpansion(*I, BI, SE)) + return true; + } + return false; + } + + // HowManyLessThans uses a Max expression whenever the loop is not guarded by + // the exit condition. + if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) + return true; + + // If we haven't recognized an expensive SCEV patter, assume its an expression + // produced by program code. + return false; +} /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken /// count expression can be safely and cheaply expanded into an instruction @@ -1465,31 +1292,17 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { if (!BI) return false; - // Special case: If the backedge-taken count is a UDiv, it's very likely a - // UDiv that ScalarEvolution produced in order to compute a precise - // expression, rather than a UDiv from the user's code. If we can't find a - // UDiv in the code with some simple searching, assume the former and forego - // rewriting the loop. - if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { - ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!OrigCond) return false; - const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); - R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); - if (R != BackedgeTakenCount) { - const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); - L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); - if (L != BackedgeTakenCount) - return false; - } - } + if (isHighCostExpansion(BackedgeTakenCount, BI, SE)) + return false; + return true; } /// getBackedgeIVType - Get the widest type used by the loop test after peeking /// through Truncs. /// -/// TODO: Unnecessary if LFTR does not force a canonical IV. -static const Type *getBackedgeIVType(Loop *L) { +/// TODO: Unnecessary when ForceLFTR is removed. +static Type *getBackedgeIVType(Loop *L) { if (!L->getExitingBlock()) return 0; @@ -1502,7 +1315,7 @@ static const Type *getBackedgeIVType(Loop *L) { if (!Cond) return 0; - const Type *Ty = 0; + Type *Ty = 0; for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); OI != OE; ++OI) { assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); @@ -1515,12 +1328,187 @@ static const Type *getBackedgeIVType(Loop *L) { return Ty; } +/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop +/// invariant value to the phi. +static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { + Instruction *IncI = dyn_cast<Instruction>(IncV); + if (!IncI) + return 0; + + switch (IncI->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + break; + case Instruction::GetElementPtr: + // An IV counter must preserve its type. + if (IncI->getNumOperands() == 2) + break; + default: + return 0; + } + + PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); + if (Phi && Phi->getParent() == L->getHeader()) { + if (isLoopInvariant(IncI->getOperand(1), L, DT)) + return Phi; + return 0; + } + if (IncI->getOpcode() == Instruction::GetElementPtr) + return 0; + + // Allow add/sub to be commuted. + Phi = dyn_cast<PHINode>(IncI->getOperand(1)); + if (Phi && Phi->getParent() == L->getHeader()) { + if (isLoopInvariant(IncI->getOperand(0), L, DT)) + return Phi; + } + return 0; +} + +/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show +/// that the current exit test is already sufficiently canonical. +static bool needsLFTR(Loop *L, DominatorTree *DT) { + assert(L->getExitingBlock() && "expected loop exit"); + + BasicBlock *LatchBlock = L->getLoopLatch(); + // Don't bother with LFTR if the loop is not properly simplified. + if (!LatchBlock) + return false; + + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + assert(BI && "expected exit branch"); + + // Do LFTR to simplify the exit condition to an ICMP. + ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cond) + return true; + + // Do LFTR to simplify the exit ICMP to EQ/NE + ICmpInst::Predicate Pred = Cond->getPredicate(); + if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) + return true; + + // Look for a loop invariant RHS + Value *LHS = Cond->getOperand(0); + Value *RHS = Cond->getOperand(1); + if (!isLoopInvariant(RHS, L, DT)) { + if (!isLoopInvariant(LHS, L, DT)) + return true; + std::swap(LHS, RHS); + } + // Look for a simple IV counter LHS + PHINode *Phi = dyn_cast<PHINode>(LHS); + if (!Phi) + Phi = getLoopPhiForCounter(LHS, L, DT); + + if (!Phi) + return true; + + // Do LFTR if the exit condition's IV is *not* a simple counter. + Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch()); + return Phi != getLoopPhiForCounter(IncV, L, DT); +} + +/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to +/// be rewritten) loop exit test. +static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { + int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); + Value *IncV = Phi->getIncomingValue(LatchIdx); + + for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end(); + UI != UE; ++UI) { + if (*UI != Cond && *UI != IncV) return false; + } + + for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end(); + UI != UE; ++UI) { + if (*UI != Cond && *UI != Phi) return false; + } + return true; +} + +/// FindLoopCounter - Find an affine IV in canonical form. +/// +/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount +/// +/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. +/// This is difficult in general for SCEV because of potential overflow. But we +/// could at least handle constant BECounts. +static PHINode * +FindLoopCounter(Loop *L, const SCEV *BECount, + ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) { + // I'm not sure how BECount could be a pointer type, but we definitely don't + // want to LFTR that. + if (BECount->getType()->isPointerTy()) + return 0; + + uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); + + Value *Cond = + cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition(); + + // Loop over all of the PHI nodes, looking for a simple counter. + PHINode *BestPhi = 0; + const SCEV *BestInit = 0; + BasicBlock *LatchBlock = L->getLoopLatch(); + assert(LatchBlock && "needsLFTR should guarantee a loop latch"); + + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + PHINode *Phi = cast<PHINode>(I); + if (!SE->isSCEVable(Phi->getType())) + continue; + + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); + if (!AR || AR->getLoop() != L || !AR->isAffine()) + continue; + + // AR may be a pointer type, while BECount is an integer type. + // AR may be wider than BECount. With eq/ne tests overflow is immaterial. + // AR may not be a narrower type, or we may never exit. + uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); + if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth))) + continue; + + const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); + if (!Step || !Step->isOne()) + continue; + + int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); + Value *IncV = Phi->getIncomingValue(LatchIdx); + if (getLoopPhiForCounter(IncV, L, DT) != Phi) + continue; + + const SCEV *Init = AR->getStart(); + + if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { + // Don't force a live loop counter if another IV can be used. + if (AlmostDeadIV(Phi, LatchBlock, Cond)) + continue; + + // Prefer to count-from-zero. This is a more "canonical" counter form. It + // also prefers integer to pointer IVs. + if (BestInit->isZero() != Init->isZero()) { + if (BestInit->isZero()) + continue; + } + // If two IVs both count from zero or both count from nonzero then the + // narrower is likely a dead phi that has been widened. Use the wider phi + // to allow the other to be eliminated. + if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) + continue; + } + BestPhi = Phi; + BestInit = Init; + } + return BestPhi; +} + /// LinearFunctionTestReplace - This method rewrites the exit condition of the /// loop to be a canonical != comparison against the incremented loop induction /// variable. This pass is able to rewrite the exit tests of any loop where the /// SCEV analysis can determine a loop-invariant trip count of the loop, which /// is actually a much broader range than just linear tests. -ICmpInst *IndVarSimplify:: +Value *IndVarSimplify:: LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, @@ -1528,62 +1516,117 @@ LinearFunctionTestReplace(Loop *L, assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); + // LFTR can ignore IV overflow and truncate to the width of + // BECount. This avoids materializing the add(zext(add)) expression. + Type *CntTy = !EnableIVRewrite ? + BackedgeTakenCount->getType() : IndVar->getType(); + + const SCEV *IVLimit = BackedgeTakenCount; + // If the exiting block is not the same as the backedge block, we must compare // against the preincremented value, otherwise we prefer to compare against // the post-incremented value. Value *CmpIndVar; - const SCEV *RHS = BackedgeTakenCount; if (L->getExitingBlock() == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // If this addition may overflow, we have to be more pessimistic and // cast the induction variable before doing the add. - const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); const SCEV *N = - SE->getAddExpr(BackedgeTakenCount, - SE->getConstant(BackedgeTakenCount->getType(), 1)); - if ((isa<SCEVConstant>(N) && !N->isZero()) || - SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { - // No overflow. Cast the sum. - RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); - } else { - // Potential overflow. Cast before doing the add. - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); - RHS = SE->getAddExpr(RHS, - SE->getConstant(IndVar->getType(), 1)); + SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1)); + if (CntTy == IVLimit->getType()) + IVLimit = N; + else { + const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0); + if ((isa<SCEVConstant>(N) && !N->isZero()) || + SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { + // No overflow. Cast the sum. + IVLimit = SE->getTruncateOrZeroExtend(N, CntTy); + } else { + // Potential overflow. Cast before doing the add. + IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy); + IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1)); + } } - // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); } else { // We have to use the preincremented value... - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); + IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy); CmpIndVar = IndVar; } + // For unit stride, IVLimit = Start + BECount with 2's complement overflow. + // So for, non-zero start compute the IVLimit here. + bool isPtrIV = false; + Type *CmpTy = CntTy; + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); + assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter"); + if (!AR->getStart()->isZero()) { + assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); + const SCEV *IVInit = AR->getStart(); + + // For pointer types, sign extend BECount in order to materialize a GEP. + // Note that for without EnableIVRewrite, we never run SCEVExpander on a + // pointer type, because we must preserve the existing GEPs. Instead we + // directly generate a GEP later. + if (IVInit->getType()->isPointerTy()) { + isPtrIV = true; + CmpTy = SE->getEffectiveSCEVType(IVInit->getType()); + IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy); + } + // For integer types, truncate the IV before computing IVInit + BECount. + else { + if (SE->getTypeSizeInBits(IVInit->getType()) + > SE->getTypeSizeInBits(CmpTy)) + IVInit = SE->getTruncateExpr(IVInit, CmpTy); + + IVLimit = SE->getAddExpr(IVInit, IVLimit); + } + } // Expand the code for the iteration count. - assert(SE->isLoopInvariant(RHS, L) && + IRBuilder<> Builder(BI); + + assert(SE->isLoopInvariant(IVLimit, L) && "Computed iteration count is not loop invariant!"); - Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); + Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI); + + // Create a gep for IVInit + IVLimit from on an existing pointer base. + assert(isPtrIV == IndVar->getType()->isPointerTy() && + "IndVar type must match IVInit type"); + if (isPtrIV) { + Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); + assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter"); + assert(SE->getSizeOfExpr( + cast<PointerType>(IVStart->getType())->getElementType())->isOne() + && "unit stride pointer IV must be i8*"); + + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit"); + Builder.SetInsertPoint(BI); + } // Insert a new icmp_ne or icmp_eq instruction before the branch. - ICmpInst::Predicate Opcode; + ICmpInst::Predicate P; if (L->contains(BI->getSuccessor(0))) - Opcode = ICmpInst::ICMP_NE; + P = ICmpInst::ICMP_NE; else - Opcode = ICmpInst::ICMP_EQ; + P = ICmpInst::ICMP_EQ; DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" << " LHS:" << *CmpIndVar << '\n' << " op:\t" - << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" - << " RHS:\t" << *RHS << "\n"); + << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" + << " RHS:\t" << *ExitCnt << "\n" + << " Expr:\t" << *IVLimit << "\n"); + + if (SE->getTypeSizeInBits(CmpIndVar->getType()) + > SE->getTypeSizeInBits(CmpTy)) { + CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv"); + } - ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); - Cond->setDebugLoc(BI->getDebugLoc()); + Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); Value *OrigCond = BI->getCondition(); // It's tempting to use replaceAllUsesWith here to fully replace the old // comparison, but that's not immediately safe, since users of the old @@ -1612,7 +1655,7 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) return; - Instruction *InsertPt = ExitBlock->getFirstNonPHI(); + Instruction *InsertPt = ExitBlock->getFirstInsertionPt(); BasicBlock::iterator I = Preheader->getTerminator(); while (I != Preheader->begin()) { --I; @@ -1633,6 +1676,10 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { if (isa<DbgInfoIntrinsic>(I)) continue; + // Skip landingpad instructions. + if (isa<LandingPadInst>(I)) + continue; + // Don't sink static AllocaInsts out of the entry block, which would // turn them into dynamic allocas! if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) @@ -1699,7 +1746,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (!L->isLoopSimplifyForm()) return false; - if (!DisableIVRewrite) + if (EnableIVRewrite) IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); @@ -1717,6 +1764,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE, "indvars"); +#ifndef NDEBUG + Rewriter.setDebugType(DEBUG_TYPE); +#endif // Eliminate redundant IV users. // @@ -1724,9 +1774,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // attempt to avoid evaluating SCEVs for sign/zero extend operations until // other expressions involving loop IVs have been evaluated. This helps SCEV // set no-wrap flags before normalizing sign/zero extension. - if (DisableIVRewrite) { + if (!EnableIVRewrite) { Rewriter.disableCanonicalMode(); - SimplifyIVUsersNoRewrite(L, Rewriter); + SimplifyAndExtend(L, Rewriter, LPM); } // Check to see if this loop has a computable loop-invariant execution count. @@ -1739,25 +1789,25 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { RewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV users. - if (!DisableIVRewrite) - SimplifyIVUsers(Rewriter); + if (EnableIVRewrite) + Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts); // Eliminate redundant IV cycles. - if (DisableIVRewrite) - SimplifyCongruentIVs(L); + if (!EnableIVRewrite) + NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); // Compute the type of the largest recurrence expression, and decide whether // a canonical induction variable should be inserted. - const Type *LargestType = 0; + Type *LargestType = 0; bool NeedCannIV = false; bool ExpandBECount = canExpandBackedgeTakenCount(L, SE); - if (ExpandBECount) { + if (EnableIVRewrite && ExpandBECount) { // If we have a known trip count and a single exit block, we'll be // rewriting the loop exit test condition below, which requires a // canonical induction variable. NeedCannIV = true; - const Type *Ty = BackedgeTakenCount->getType(); - if (DisableIVRewrite) { + Type *Ty = BackedgeTakenCount->getType(); + if (!EnableIVRewrite) { // In this mode, SimplifyIVUsers may have already widened the IV used by // the backedge test and inserted a Trunc on the compare's operand. Get // the wider type to avoid creating a redundant narrow IV only used by the @@ -1769,10 +1819,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SE->getTypeSizeInBits(LargestType)) LargestType = SE->getEffectiveSCEVType(Ty); } - if (!DisableIVRewrite) { + if (EnableIVRewrite) { for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { NeedCannIV = true; - const Type *Ty = + Type *Ty = SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); if (!LargestType || SE->getTypeSizeInBits(Ty) > @@ -1811,18 +1861,16 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // the end of the pass. while (!OldCannIVs.empty()) { PHINode *OldCannIV = OldCannIVs.pop_back_val(); - OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI()); + OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt()); } } - + else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) { + IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD); + } // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - ICmpInst *NewICmp = 0; - if (ExpandBECount) { - assert(canExpandBackedgeTakenCount(L, SE) && - "canonical IV disrupted BackedgeTaken expansion"); - assert(NeedCannIV && - "LinearFunctionTestReplace requires a canonical induction variable"); + Value *NewICmp = 0; + if (ExpandBECount && IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not // express SCEVExpander's dependencies, such as LoopSimplify. Instead any // pass that uses the SCEVExpander must do it. This does not work well for @@ -1837,7 +1885,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter); } // Rewrite IV-derived expressions. - if (!DisableIVRewrite) + if (EnableIVRewrite) RewriteIVExpressions(L, Rewriter); // Clear the rewriter cache, because values that are in the rewriter's cache @@ -1860,12 +1908,34 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // For completeness, inform IVUsers of the IV use in the newly-created // loop exit test instruction. - if (NewICmp && IU) - IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0))); - + if (IU && NewICmp) { + ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp); + if (NewICmpInst) + IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0))); + } // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader()); // Check a post-condition. - assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); + assert(L->isLCSSAForm(*DT) && + "Indvars did not leave the loop in lcssa form!"); + + // Verify that LFTR, and any other change have not interfered with SCEV's + // ability to compute trip count. +#ifndef NDEBUG + if (!EnableIVRewrite && VerifyIndvars && + !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { + SE->forgetLoop(L); + const SCEV *NewBECount = SE->getBackedgeTakenCount(L); + if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < + SE->getTypeSizeInBits(NewBECount->getType())) + NewBECount = SE->getTruncateOrNoop(NewBECount, + BackedgeTakenCount->getType()); + else + BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, + NewBECount->getType()); + assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV"); + } +#endif + return Changed; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index b500d5b..f410af3 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -811,8 +811,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { /// important optimization that encourages jump threading, and needs to be run /// interlaced with other jump threading tasks. bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { - // Don't hack volatile loads. - if (LI->isVolatile()) return false; + // Don't hack volatile/atomic loads. + if (!LI->isSimple()) return false; // If the load is defined in a block with exactly one predecessor, it can't be // partially redundant. diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 66add6c..b79bb13 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -151,6 +151,11 @@ namespace { /// bool isSafeToExecuteUnconditionally(Instruction &I); + /// isGuaranteedToExecute - Check that the instruction is guaranteed to + /// execute. + /// + bool isGuaranteedToExecute(Instruction &I); + /// pointerInvalidatedByLoop - Return true if the body of this loop may /// store into the memory location pointed to by V. /// @@ -357,8 +362,8 @@ void LICM::HoistRegion(DomTreeNode *N) { bool LICM::canSinkOrHoistInst(Instruction &I) { // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { - if (LI->isVolatile()) - return false; // Don't hoist volatile loads! + if (!LI->isUnordered()) + return false; // Don't hoist volatile/atomic loads! // Loads from constant memory are always safe to move, even if they end up // in the same alias set as something that ends up being modified. @@ -461,7 +466,7 @@ void LICM::sink(Instruction &I) { } else { // Move the instruction to the start of the exit block, after any PHI // nodes in it. - I.moveBefore(ExitBlocks[0]->getFirstNonPHI()); + I.moveBefore(ExitBlocks[0]->getFirstInsertionPt()); // This instruction is no longer in the AST for the current loop, because // we just sunk it out of the loop. If we just sunk it into an outer @@ -504,7 +509,7 @@ void LICM::sink(Instruction &I) { continue; // Insert the code after the last PHI node. - BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); + BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); // If this is the first exit block processed, just move the original // instruction, otherwise clone the original instruction and insert @@ -577,6 +582,10 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { if (Inst.isSafeToSpeculativelyExecute()) return true; + return isGuaranteedToExecute(Inst); +} + +bool LICM::isGuaranteedToExecute(Instruction &Inst) { // Otherwise we have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop // which does not execute this instruction, so we can't hoist it. @@ -635,7 +644,7 @@ namespace { for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); - Instruction *InsertPos = ExitBlock->getFirstNonPHI(); + Instruction *InsertPos = ExitBlock->getFirstInsertionPt(); StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); @@ -713,34 +722,41 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // If there is an non-load/store instruction in the loop, we can't promote // it. - unsigned InstAlignment; if (LoadInst *load = dyn_cast<LoadInst>(Use)) { - assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); - InstAlignment = load->getAlignment(); + assert(!load->isVolatile() && "AST broken"); + if (!load->isSimple()) + return; } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (Use->getOperand(1) != ASIV) continue; - InstAlignment = store->getAlignment(); - assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); + assert(!store->isVolatile() && "AST broken"); + if (!store->isSimple()) + return; + + // Note that we only check GuaranteedToExecute inside the store case + // so that we do not introduce stores where they did not exist before + // (which would break the LLVM concurrency model). + + // If the alignment of this instruction allows us to specify a more + // restrictive (and performant) alignment and if we are sure this + // instruction will be executed, update the alignment. + // Larger is better, with the exception of 0 being the best alignment. + unsigned InstAlignment = store->getAlignment(); + if ((InstAlignment > Alignment || InstAlignment == 0) + && (Alignment != 0)) + if (isGuaranteedToExecute(*Use)) { + GuaranteedToExecute = true; + Alignment = InstAlignment; + } + + if (!GuaranteedToExecute) + GuaranteedToExecute = isGuaranteedToExecute(*Use); + } else return; // Not a load or store. - // If the alignment of this instruction allows us to specify a more - // restrictive (and performant) alignment and if we are sure this - // instruction will be executed, update the alignment. - // Larger is better, with the exception of 0 being the best alignment. - if ((InstAlignment > Alignment || InstAlignment == 0) - && (Alignment != 0)) - if (isSafeToExecuteUnconditionally(*Use)) { - GuaranteedToExecute = true; - Alignment = InstAlignment; - } - - if (!GuaranteedToExecute) - GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); - LoopUses.push_back(Use); } } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index a0e41d9..ad15cbb 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -267,7 +267,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, /// processLoopStore - See if this store can be promoted to a memset or memcpy. bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { - if (SI->isVolatile()) return false; + if (!SI->isSimple()) return false; Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); @@ -314,7 +314,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { const SCEVAddRecExpr *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0))); if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() && - StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile()) + StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple()) if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount)) return true; } @@ -463,7 +463,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, SplatValue = 0; } else { // Otherwise, this isn't an idiom we can transform. For example, we can't - // do anything with a 3-byte store, for example. + // do anything with a 3-byte store. return false; } @@ -498,7 +498,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); + Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), @@ -604,7 +604,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - const Type *IntPtr = TD->getIntPtrType(SI->getContext()); + Type *IntPtr = TD->getIntPtrType(SI->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 509d026..3e122c2 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -70,12 +70,27 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; +namespace llvm { +cl::opt<bool> EnableNested( + "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops")); + +cl::opt<bool> EnableRetry( + "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry")); + +// Temporary flag to cleanup congruent phis after LSR phi expansion. +// It's currently disabled until we can determine whether it's truly useful or +// not. The flag should be removed after the v3.0 release. +cl::opt<bool> EnablePhiElim( + "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination")); +} + namespace { /// RegSortData - This class holds data which is used to order reuse candidates. @@ -219,7 +234,7 @@ struct Formula { void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); unsigned getNumRegs() const; - const Type *getType() const; + Type *getType() const; void DeleteBaseReg(const SCEV *&S); @@ -319,7 +334,7 @@ unsigned Formula::getNumRegs() const { /// getType - Return the type of this formula, if it has one, or null /// otherwise. This type is meaningless except for the bit size. -const Type *Formula::getType() const { +Type *Formula::getType() const { return !BaseRegs.empty() ? BaseRegs.front()->getType() : ScaledReg ? ScaledReg->getType() : AM.BaseGV ? AM.BaseGV->getType() : @@ -397,7 +412,7 @@ void Formula::dump() const { /// isAddRecSExtable - Return true if the given addrec can be sign-extended /// without changing its value. static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { - const Type *WideTy = + Type *WideTy = IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1); return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); } @@ -405,7 +420,7 @@ static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { /// isAddSExtable - Return true if the given add can be sign-extended /// without changing its value. static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { - const Type *WideTy = + Type *WideTy = IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy)); } @@ -413,7 +428,7 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { /// isMulSExtable - Return true if the given mul can be sign-extended /// without changing its value. static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { - const Type *WideTy = + Type *WideTy = IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(M->getType()) * M->getNumOperands()); return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy)); @@ -594,8 +609,8 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { } /// getAccessType - Return the type of the memory being accessed. -static const Type *getAccessType(const Instruction *Inst) { - const Type *AccessTy = Inst->getType(); +static Type *getAccessType(const Instruction *Inst) { + Type *AccessTy = Inst->getType(); if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) AccessTy = SI->getOperand(0)->getType(); else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -614,7 +629,7 @@ static const Type *getAccessType(const Instruction *Inst) { // All pointers have the same requirements, so canonicalize them to an // arbitrary pointer type to minimize variation. - if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy)) + if (PointerType *PTy = dyn_cast<PointerType>(AccessTy)) AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), PTy->getAddressSpace()); @@ -670,6 +685,21 @@ public: void Loose(); +#ifndef NDEBUG + // Once any of the metrics loses, they must all remain losers. + bool isValid() { + return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds + | ImmCost | SetupCost) != ~0u) + || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds + & ImmCost & SetupCost) == ~0u); + } +#endif + + bool isLoser() { + assert(isValid() && "invalid cost"); + return NumRegs == ~0u; + } + void RateFormula(const Formula &F, SmallPtrSet<const SCEV *, 16> &Regs, const DenseSet<const SCEV *> &VisitedRegs, @@ -702,34 +732,48 @@ void Cost::RateRegister(const SCEV *Reg, if (AR->getLoop() == L) AddRecCost += 1; /// TODO: This should be a function of the stride. - // If this is an addrec for a loop that's already been visited by LSR, - // don't second-guess its addrec phi nodes. LSR isn't currently smart - // enough to reason about more than one loop at a time. Consider these - // registers free and leave them alone. - else if (L->contains(AR->getLoop()) || + // If this is an addrec for another loop, don't second-guess its addrec phi + // nodes. LSR isn't currently smart enough to reason about more than one + // loop at a time. LSR has either already run on inner loops, will not run + // on other loops, and cannot be expected to change sibling loops. If the + // AddRec exists, consider it's register free and leave it alone. Otherwise, + // do not consider this formula at all. + // FIXME: why do we need to generate such fomulae? + else if (!EnableNested || L->contains(AR->getLoop()) || (!AR->getLoop()->contains(L) && DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) { for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) + PHINode *PN = dyn_cast<PHINode>(I); ++I) { if (SE.isSCEVable(PN->getType()) && (SE.getEffectiveSCEVType(PN->getType()) == SE.getEffectiveSCEVType(AR->getType())) && SE.getSCEV(PN) == AR) return; - + } + if (!EnableNested) { + Loose(); + return; + } // If this isn't one of the addrecs that the loop already has, it // would require a costly new phi and add. TODO: This isn't // precisely modeled right now. ++NumBaseAdds; - if (!Regs.count(AR->getStart())) + if (!Regs.count(AR->getStart())) { RateRegister(AR->getStart(), Regs, L, SE, DT); + if (isLoser()) + return; + } } // Add the step value register, if it needs one. // TODO: The non-affine case isn't precisely modeled here. - if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) - if (!Regs.count(AR->getStart())) + if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) { + if (!Regs.count(AR->getOperand(1))) { RateRegister(AR->getOperand(1), Regs, L, SE, DT); + if (isLoser()) + return; + } + } } ++NumRegs; @@ -769,6 +813,8 @@ void Cost::RateFormula(const Formula &F, return; } RatePrimaryRegister(ScaledReg, Regs, L, SE, DT); + if (isLoser()) + return; } for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) { @@ -778,6 +824,8 @@ void Cost::RateFormula(const Formula &F, return; } RatePrimaryRegister(BaseReg, Regs, L, SE, DT); + if (isLoser()) + return; } // Determine how many (unfolded) adds we'll need inside the loop. @@ -795,6 +843,7 @@ void Cost::RateFormula(const Formula &F, else if (Offset != 0) ImmCost += APInt(64, Offset, true).getMinSignedBits(); } + assert(isValid() && "invalid cost"); } /// Loose - Set this cost to a losing value. @@ -980,7 +1029,7 @@ public: }; KindType Kind; - const Type *AccessTy; + Type *AccessTy; SmallVector<int64_t, 8> Offsets; int64_t MinOffset; @@ -995,7 +1044,7 @@ public: /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different /// max fixup widths to be equivalent, because the narrower one may be relying /// on the implicit truncation to truncate away bogus bits. - const Type *WidestFixupType; + Type *WidestFixupType; /// Formulae - A list of ways to build a value that can satisfy this user. /// After the list is populated, one of these is selected heuristically and @@ -1005,7 +1054,7 @@ public: /// Regs - The set of register candidates used by all formulae in this LSRUse. SmallPtrSet<const SCEV *, 4> Regs; - LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T), + LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), AllFixupsOutsideLoop(true), @@ -1127,7 +1176,7 @@ void LSRUse::dump() const { /// be completely folded into the user instruction at isel time. This includes /// address-mode folding and special icmp tricks. static bool isLegalUse(const TargetLowering::AddrMode &AM, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI) { switch (Kind) { case LSRUse::Address: @@ -1156,7 +1205,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM, // If we have low-level target information, ask the target if it can fold an // integer immediate on an icmp. if (AM.BaseOffs != 0) { - if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs); + if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs); return false; } @@ -1176,7 +1225,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM, static bool isLegalUse(TargetLowering::AddrMode AM, int64_t MinOffset, int64_t MaxOffset, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI) { // Check for overflow. if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) != @@ -1198,7 +1247,7 @@ static bool isLegalUse(TargetLowering::AddrMode AM, static bool isAlwaysFoldable(int64_t BaseOffs, GlobalValue *BaseGV, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI) { // Fast-path: zero is always foldable. if (BaseOffs == 0 && !BaseGV) return true; @@ -1224,7 +1273,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs, static bool isAlwaysFoldable(const SCEV *S, int64_t MinOffset, int64_t MaxOffset, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy, + LSRUse::KindType Kind, Type *AccessTy, const TargetLowering *TLI, ScalarEvolution &SE) { // Fast-path: zero is always foldable. @@ -1299,7 +1348,7 @@ class LSRInstance { SmallSetVector<int64_t, 8> Factors; /// Types - Interesting use types, to facilitate truncation reuse. - SmallSetVector<const Type *, 4> Types; + SmallSetVector<Type *, 4> Types; /// Fixups - The list of operands which are to be replaced. SmallVector<LSRFixup, 16> Fixups; @@ -1330,11 +1379,11 @@ class LSRInstance { UseMapTy UseMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy); + LSRUse::KindType Kind, Type *AccessTy); std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind, - const Type *AccessTy); + Type *AccessTy); void DeleteUse(LSRUse &LU, size_t LUIdx); @@ -1426,7 +1475,8 @@ void LSRInstance::OptimizeShadowIV() { IVUsers::const_iterator CandidateUI = UI; ++UI; Instruction *ShadowUse = CandidateUI->getUser(); - const Type *DestTy = NULL; + Type *DestTy = NULL; + bool IsSigned = false; /* If shadow use is a int->float cast then insert a second IV to eliminate this cast. @@ -1440,10 +1490,14 @@ void LSRInstance::OptimizeShadowIV() { for (unsigned i = 0; i < n; ++i, ++d) foo(d); */ - if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) + if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) { + IsSigned = false; DestTy = UCast->getDestTy(); - else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) + } + else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) { + IsSigned = true; DestTy = SCast->getDestTy(); + } if (!DestTy) continue; if (TLI) { @@ -1457,7 +1511,7 @@ void LSRInstance::OptimizeShadowIV() { if (!PH) continue; if (PH->getNumIncomingValues() != 2) continue; - const Type *SrcTy = PH->getType(); + Type *SrcTy = PH->getType(); int Mantissa = DestTy->getFPMantissaWidth(); if (Mantissa == -1) continue; if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa) @@ -1474,7 +1528,9 @@ void LSRInstance::OptimizeShadowIV() { ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); if (!Init) continue; - Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); + Constant *NewInit = ConstantFP::get(DestTy, IsSigned ? + (double)Init->getSExtValue() : + (double)Init->getZExtValue()); BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); @@ -1776,7 +1832,7 @@ LSRInstance::OptimizeLoopTermCond() { if (!TLI) goto decline_post_inc; // Check for possible scaled-address reuse. - const Type *AccessTy = getAccessType(UI->getUser()); + Type *AccessTy = getAccessType(UI->getUser()); TargetLowering::AddrMode AM; AM.Scale = C->getSExtValue(); if (TLI->isLegalAddressingMode(AM, AccessTy)) @@ -1840,10 +1896,10 @@ LSRInstance::OptimizeLoopTermCond() { /// return true. bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, - LSRUse::KindType Kind, const Type *AccessTy) { + LSRUse::KindType Kind, Type *AccessTy) { int64_t NewMinOffset = LU.MinOffset; int64_t NewMaxOffset = LU.MaxOffset; - const Type *NewAccessTy = AccessTy; + Type *NewAccessTy = AccessTy; // Check for a mismatched kind. It's tempting to collapse mismatched kinds to // something conservative, however this can pessimize in the case that one of @@ -1882,7 +1938,7 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, /// Either reuse an existing use or create a new one, as needed. std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr, - LSRUse::KindType Kind, const Type *AccessTy) { + LSRUse::KindType Kind, Type *AccessTy) { const SCEV *Copy = Expr; int64_t Offset = ExtractImmediate(Expr, SE); @@ -2044,7 +2100,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.PostIncLoops = UI->getPostIncLoops(); LSRUse::KindType Kind = LSRUse::Basic; - const Type *AccessTy = 0; + Type *AccessTy = 0; if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { Kind = LSRUse::Address; AccessTy = getAccessType(LF.UserInst); @@ -2464,7 +2520,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, if (LU.Kind != LSRUse::ICmpZero) return; // Determine the integer type for the base formula. - const Type *IntTy = Base.getType(); + Type *IntTy = Base.getType(); if (!IntTy) return; if (SE.getTypeSizeInBits(IntTy) > 64) return; @@ -2538,7 +2594,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, /// scaled-offset address modes, for example. void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { // Determine the integer type for the base formula. - const Type *IntTy = Base.getType(); + Type *IntTy = Base.getType(); if (!IntTy) return; // If this Formula already has a scaled register, we can't add another one. @@ -2598,13 +2654,13 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { if (Base.AM.BaseGV) return; // Determine the integer type for the base formula. - const Type *DstTy = Base.getType(); + Type *DstTy = Base.getType(); if (!DstTy) return; DstTy = SE.getEffectiveSCEVType(DstTy); - for (SmallSetVector<const Type *, 4>::const_iterator + for (SmallSetVector<Type *, 4>::const_iterator I = Types.begin(), E = Types.end(); I != E; ++I) { - const Type *SrcTy = *I; + Type *SrcTy = *I; if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) { Formula F = Base; @@ -2741,7 +2797,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { int64_t Imm = WI.Imm; const SCEV *OrigReg = WI.OrigReg; - const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); + Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm)); unsigned BitWidth = SE.getTypeSizeInBits(IntTy); @@ -3275,6 +3331,9 @@ retry: skip:; } + if (!EnableRetry && !AnySatisfiedReqRegs) + return; + // If none of the formulae had all of the required registers, relax the // constraint so that we don't exclude all formulae. if (!AnySatisfiedReqRegs) { @@ -3298,6 +3357,10 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { // SolveRecurse does all the work. SolveRecurse(Solution, SolutionCost, Workspace, CurCost, CurRegs, VisitedRegs); + if (Solution.empty()) { + DEBUG(dbgs() << "\nNo Satisfactory Solution\n"); + return; + } // Ok, we've now made all our decisions. DEBUG(dbgs() << "\n" @@ -3416,6 +3479,9 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, // Don't insert instructions before PHI nodes. while (isa<PHINode>(IP)) ++IP; + // Ignore landingpad instructions. + while (isa<LandingPadInst>(IP)) ++IP; + // Ignore debug intrinsics. while (isa<DbgInfoIntrinsic>(IP)) ++IP; @@ -3440,9 +3506,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Rewriter.setPostInc(LF.PostIncLoops); // This is the type that the user actually needs. - const Type *OpTy = LF.OperandValToReplace->getType(); + Type *OpTy = LF.OperandValToReplace->getType(); // This will be the type that we'll initially expand to. - const Type *Ty = F.getType(); + Type *Ty = F.getType(); if (!Ty) // No type known; just expand directly to the ultimate type. Ty = OpTy; @@ -3450,7 +3516,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Expand directly to the ultimate type if it's the right size. Ty = OpTy; // This is the type to do integer arithmetic in. - const Type *IntTy = SE.getEffectiveSCEVType(Ty); + Type *IntTy = SE.getEffectiveSCEVType(Ty); // Build up a list of operands to add together to form the full base. SmallVector<const SCEV *, 8> Ops; @@ -3527,7 +3593,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // The other interesting way of "folding" with an ICmpZero is to use a // negated immediate. if (!ICmpScaledV) - ICmpScaledV = ConstantInt::get(IntTy, -Offset); + ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset); else { Ops.push_back(SE.getUnknown(ICmpScaledV)); ICmpScaledV = ConstantInt::get(IntTy, Offset); @@ -3611,10 +3677,20 @@ void LSRInstance::RewriteForPHI(PHINode *PN, // users. if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BB->getTerminator())) { - Loop *PNLoop = LI.getLoopFor(PN->getParent()); - if (!PNLoop || PN->getParent() != PNLoop->getHeader()) { + BasicBlock *Parent = PN->getParent(); + Loop *PNLoop = LI.getLoopFor(Parent); + if (!PNLoop || Parent != PNLoop->getHeader()) { // Split the critical edge. - BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P); + BasicBlock *NewBB = 0; + if (!Parent->isLandingPad()) { + NewBB = SplitCriticalEdge(BB, Parent, P, + /*MergeIdenticalEdges=*/true, + /*DontDeleteUselessPhis=*/true); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs); + NewBB = NewBBs[0]; + } // If PN is outside of the loop and BB is in the loop, we want to // move the block to be immediately before the PHI block, not @@ -3637,7 +3713,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. - const Type *OpTy = LF.OperandValToReplace->getType(); + Type *OpTy = LF.OperandValToReplace->getType(); if (FullV->getType() != OpTy) FullV = CastInst::Create(CastInst::getCastOpcode(FullV, false, @@ -3667,7 +3743,7 @@ void LSRInstance::Rewrite(const LSRFixup &LF, Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. - const Type *OpTy = LF.OperandValToReplace->getType(); + Type *OpTy = LF.OperandValToReplace->getType(); if (FullV->getType() != OpTy) { Instruction *Cast = CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), @@ -3700,6 +3776,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, SCEVExpander Rewriter(SE, "lsr"); Rewriter.disableCanonicalMode(); + Rewriter.enableLSRMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); // Expand the new value definitions and update the users. @@ -3740,6 +3817,23 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) OptimizeShadowIV(); OptimizeLoopTermCond(); + // If loop preparation eliminates all interesting IV users, bail. + if (IU.empty()) return; + + // Skip nested loops until we can model them better with formulae. + if (!EnableNested && !L->empty()) { + + if (EnablePhiElim) { + // Remove any extra phis created by processing inner loops. + SmallVector<WeakVH, 16> DeadInsts; + SCEVExpander Rewriter(SE, "lsr"); + Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts); + Changed |= DeleteTriviallyDeadInstructions(DeadInsts); + } + DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); + return; + } + // Start collecting data and preparing for the solver. CollectInterestingTypesAndFactors(); CollectFixupsAndInitialFormulae(); @@ -3763,6 +3857,9 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) Types.clear(); RegUses.clear(); + if (Solution.empty()) + return; + #ifndef NDEBUG // Formulae should be legal. for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), @@ -3778,6 +3875,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) // Now that we've decided what we want, make it so. ImplementSolution(Solution, P); + + if (EnablePhiElim) { + // Remove any extra phis created by processing inner loops. + SmallVector<WeakVH, 16> DeadInsts; + SCEVExpander Rewriter(SE, "lsr"); + Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts); + Changed |= DeleteTriviallyDeadInstructions(DeadInsts); + } } void LSRInstance::print_factors_and_types(raw_ostream &OS) const { @@ -3793,7 +3898,7 @@ void LSRInstance::print_factors_and_types(raw_ostream &OS) const { OS << '*' << *I; } - for (SmallSetVector<const Type *, 4>::const_iterator + for (SmallSetVector<Type *, 4>::const_iterator I = Types.begin(), E = Types.end(); I != E; ++I) { if (!First) OS << ", "; First = false; diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index fef6bc3..91395b2 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/Target/TargetData.h" #include <climits> using namespace llvm; @@ -39,6 +40,11 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached.")); +// Temporary flag to be removed in 3.0 +static cl::opt<bool> +NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden, + cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling")); + namespace { class LoopUnroll : public LoopPass { public: @@ -49,7 +55,7 @@ namespace { CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); - + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -57,11 +63,11 @@ namespace { /// that the loop unroll should be performed regardless of how much /// code expansion would result. static const unsigned NoThreshold = UINT_MAX; - + // Threshold to use when optsize is specified (and there is no // explicit -unroll-threshold). static const unsigned OptSizeUnrollThreshold = 50; - + unsigned CurrentCount; unsigned CurrentThreshold; bool CurrentAllowPartial; @@ -79,6 +85,7 @@ namespace { AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); + AU.addRequired<ScalarEvolution>(); AU.addPreserved<ScalarEvolution>(); // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. // If loop unroll does not preserve dom info then LCSSA pass on next @@ -101,45 +108,62 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { } /// ApproximateLoopSize - Approximate the size of the loop. -static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) { +static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, + const TargetData *TD) { CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) - Metrics.analyzeBasicBlock(*I); + Metrics.analyzeBasicBlock(*I, TD); NumCalls = Metrics.NumInlineCandidates; - + unsigned LoopSize = Metrics.NumInsts; - + // Don't allow an estimate of size zero. This would allows unrolling of loops // with huge iteration counts, which is a compile time problem even if it's // not a problem for code quality. if (LoopSize == 0) LoopSize = 1; - + return LoopSize; } bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { LoopInfo *LI = &getAnalysis<LoopInfo>(); + ScalarEvolution *SE = &getAnalysis<ScalarEvolution>(); BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() << "] Loop %" << Header->getName() << "\n"); (void)Header; - + // Determine the current unrolling threshold. While this is normally set // from UnrollThreshold, it is overridden to a smaller value if the current // function is marked as optimize-for-size, and the unroll threshold was // not user specified. unsigned Threshold = CurrentThreshold; - if (!UserThreshold && + if (!UserThreshold && Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) Threshold = OptSizeUnrollThreshold; - // Find trip count - unsigned TripCount = L->getSmallConstantTripCount(); - unsigned Count = CurrentCount; - + // Find trip count and trip multiple if count is not available + unsigned TripCount = 0; + unsigned TripMultiple = 1; + if (!NoSCEVUnroll) { + // Find "latch trip count". UnrollLoop assumes that control cannot exit + // via the loop latch on any iteration prior to TripCount. The loop may exit + // early via an earlier branch. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + TripCount = SE->getSmallConstantTripCount(L, LatchBlock); + TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); + } + } + else { + TripCount = L->getSmallConstantTripCount(); + if (TripCount == 0) + TripMultiple = L->getSmallConstantTripMultiple(); + } // Automatically select an unroll count. + unsigned Count = CurrentCount; if (Count == 0) { // Conservative heuristic: if we know the trip count, see if we can // completely unroll (subject to the threshold, checked below); otherwise @@ -152,8 +176,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Enforce the threshold. if (Threshold != NoThreshold) { + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); unsigned NumInlineCandidates; - unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates); + unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (NumInlineCandidates != 0) { DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); @@ -182,12 +207,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Unroll the loop. - Function *F = L->getHeader()->getParent(); - if (!UnrollLoop(L, Count, LI, &LPM)) + if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM)) return false; - // FIXME: Reconstruct dom info, because it is not preserved properly. - if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) - DT->runOnFunction(*F); return true; } diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 840c4b6..458949c 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -492,7 +492,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, Value *BranchVal = LIC; if (!isa<ConstantInt>(Val) || Val->getType() != Type::getInt1Ty(LIC->getContext())) - BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp"); + BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val); else if (Val != ConstantInt::getTrue(Val->getContext())) // We want to enter the new loop when the condition is true. std::swap(TrueDest, FalseDest); @@ -561,10 +561,17 @@ void LoopUnswitch::SplitExitEdges(Loop *L, BasicBlock *ExitBlock = ExitBlocks[i]; SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), pred_end(ExitBlock)); + // Although SplitBlockPredecessors doesn't preserve loop-simplify in // general, if we call it on all predecessors of all exits then it does. - SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), - ".us-lcssa", this); + if (!ExitBlock->isLandingPad()) { + SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), + ".us-lcssa", this); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa", + this, NewBBs); + } } } @@ -632,7 +639,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // as well. ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase()); } - + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]); // The new exit block should be in the same loop as the old one. @@ -653,6 +660,19 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, if (It != VMap.end()) V = It->second; PN->addIncoming(V, NewExit); } + + if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { + PN = PHINode::Create(LPad->getType(), 0, "", + ExitSucc->getFirstInsertionPt()); + + for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc); + I != E; ++I) { + BasicBlock *BB = *I; + LandingPadInst *LPI = BB->getLandingPadInst(); + LPI->replaceAllUsesWith(PN); + PN->addIncoming(LPI, BB); + } + } } // Rewrite the code to refer to itself. diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 9087b46..689bbe9 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -20,98 +20,88 @@ #include "llvm/Support/IRBuilder.h" using namespace llvm; -static bool LowerAtomicIntrinsic(IntrinsicInst *II) { - IRBuilder<> Builder(II->getParent(), II); - unsigned IID = II->getIntrinsicID(); - switch (IID) { - case Intrinsic::memory_barrier: - break; +static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { + IRBuilder<> Builder(CXI->getParent(), CXI); + Value *Ptr = CXI->getPointerOperand(); + Value *Cmp = CXI->getCompareOperand(); + Value *Val = CXI->getNewValOperand(); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); + Value *Res = Builder.CreateSelect(Equal, Val, Orig); + Builder.CreateStore(Res, Ptr); + + CXI->replaceAllUsesWith(Orig); + CXI->eraseFromParent(); + return true; +} - case Intrinsic::atomic_load_add: - case Intrinsic::atomic_load_sub: - case Intrinsic::atomic_load_and: - case Intrinsic::atomic_load_nand: - case Intrinsic::atomic_load_or: - case Intrinsic::atomic_load_xor: - case Intrinsic::atomic_load_max: - case Intrinsic::atomic_load_min: - case Intrinsic::atomic_load_umax: - case Intrinsic::atomic_load_umin: { - Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1); +static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) { + IRBuilder<> Builder(RMWI->getParent(), RMWI); + Value *Ptr = RMWI->getPointerOperand(); + Value *Val = RMWI->getValOperand(); - LoadInst *Orig = Builder.CreateLoad(Ptr); - Value *Res = NULL; - switch (IID) { - default: assert(0 && "Unrecognized atomic modify operation"); - case Intrinsic::atomic_load_add: - Res = Builder.CreateAdd(Orig, Delta); - break; - case Intrinsic::atomic_load_sub: - Res = Builder.CreateSub(Orig, Delta); - break; - case Intrinsic::atomic_load_and: - Res = Builder.CreateAnd(Orig, Delta); - break; - case Intrinsic::atomic_load_nand: - Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta)); - break; - case Intrinsic::atomic_load_or: - Res = Builder.CreateOr(Orig, Delta); - break; - case Intrinsic::atomic_load_xor: - Res = Builder.CreateXor(Orig, Delta); - break; - case Intrinsic::atomic_load_max: - Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), - Delta, Orig); - break; - case Intrinsic::atomic_load_min: - Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), - Orig, Delta); - break; - case Intrinsic::atomic_load_umax: - Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), - Delta, Orig); - break; - case Intrinsic::atomic_load_umin: - Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), - Orig, Delta); - break; - } - Builder.CreateStore(Res, Ptr); + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Res = NULL; - II->replaceAllUsesWith(Orig); + switch (RMWI->getOperation()) { + default: llvm_unreachable("Unexpected RMW operation"); + case AtomicRMWInst::Xchg: + Res = Val; break; - } - - case Intrinsic::atomic_swap: { - Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1); - LoadInst *Orig = Builder.CreateLoad(Ptr); - Builder.CreateStore(Val, Ptr); - II->replaceAllUsesWith(Orig); + case AtomicRMWInst::Add: + Res = Builder.CreateAdd(Orig, Val); break; - } - - case Intrinsic::atomic_cmp_swap: { - Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1); - Value *Val = II->getArgOperand(2); - - LoadInst *Orig = Builder.CreateLoad(Ptr); - Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); - Value *Res = Builder.CreateSelect(Equal, Val, Orig); - Builder.CreateStore(Res, Ptr); - II->replaceAllUsesWith(Orig); + case AtomicRMWInst::Sub: + Res = Builder.CreateSub(Orig, Val); + break; + case AtomicRMWInst::And: + Res = Builder.CreateAnd(Orig, Val); + break; + case AtomicRMWInst::Nand: + Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val)); + break; + case AtomicRMWInst::Or: + Res = Builder.CreateOr(Orig, Val); + break; + case AtomicRMWInst::Xor: + Res = Builder.CreateXor(Orig, Val); + break; + case AtomicRMWInst::Max: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val), + Val, Orig); + break; + case AtomicRMWInst::Min: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val), + Orig, Val); + break; + case AtomicRMWInst::UMax: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val), + Val, Orig); + break; + case AtomicRMWInst::UMin: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val), + Orig, Val); break; } + Builder.CreateStore(Res, Ptr); + RMWI->replaceAllUsesWith(Orig); + RMWI->eraseFromParent(); + return true; +} - default: - return false; - } +static bool LowerFenceInst(FenceInst *FI) { + FI->eraseFromParent(); + return true; +} - assert(II->use_empty() && - "Lowering should have eliminated any uses of the intrinsic call!"); - II->eraseFromParent(); +static bool LowerLoadInst(LoadInst *LI) { + LI->setAtomic(NotAtomic); + return true; +} +static bool LowerStoreInst(StoreInst *SI) { + SI->setAtomic(NotAtomic); return true; } @@ -123,9 +113,22 @@ namespace { } bool runOnBasicBlock(BasicBlock &BB) { bool Changed = false; - for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++)) - Changed |= LowerAtomicIntrinsic(II); + for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) { + Instruction *Inst = DI++; + if (FenceInst *FI = dyn_cast<FenceInst>(Inst)) + Changed |= LowerFenceInst(FI); + else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst)) + Changed |= LowerAtomicCmpXchgInst(CXI); + else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Inst)) + Changed |= LowerAtomicRMWInst(RMWI); + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + if (LI->isAtomic()) + LowerLoadInst(LI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (SI->isAtomic()) + LowerStoreInst(SI); + } + } return Changed; } }; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 7ed3db6..eeb8931 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -54,7 +54,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, if (OpC->isZero()) continue; // No offset. // Handle struct indices, which add their field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); continue; } @@ -384,7 +384,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { // If this is a store, see if we can merge it in. - if (NextStore->isVolatile()) break; + if (!NextStore->isSimple()) break; // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) @@ -448,7 +448,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { - const Type *EltType = + Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } @@ -479,7 +479,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { - if (SI->isVolatile()) return false; + if (!SI->isSimple()) return false; if (TD == 0) return false; @@ -487,7 +487,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // happen to be using a load-store pair to implement it, rather than // a memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { - if (!LI->isVolatile() && LI->hasOneUse() && + if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { MemDepResult ldep = MD->getDependency(LI); CallInst *C = 0; @@ -616,7 +616,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (!A->hasStructRetAttr()) return false; - const Type *StructTy = cast<PointerType>(A->getType())->getElementType(); + Type *StructTy = cast<PointerType>(A->getType())->getElementType(); uint64_t destSize = TD->getTypeAllocSize(StructTy); if (destSize < srcSize) @@ -860,7 +860,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { // Find out what feeds this byval argument. Value *ByValArg = CS.getArgument(ArgNo); - const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType(); + Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); uint64_t ByValSize = TD->getTypeAllocSize(ByValTy); MemDepResult DepInfo = MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index ee132d3..da74e9c 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -180,7 +180,7 @@ static bool IsPotentialUse(const Value *Op) { Arg->hasStructRetAttr()) return false; // Only consider values with pointer types, and not function pointers. - const PointerType *Ty = dyn_cast<PointerType>(Op->getType()); + PointerType *Ty = dyn_cast<PointerType>(Op->getType()); if (!Ty || isa<FunctionType>(Ty->getElementType())) return false; // Conservatively assume anything else is a potential use. @@ -213,8 +213,8 @@ static InstructionClass GetFunctionClass(const Function *F) { const Argument *A0 = AI++; if (AI == AE) // Argument is a pointer. - if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { - const Type *ETy = PTy->getElementType(); + if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { + Type *ETy = PTy->getElementType(); // Argument is i8*. if (ETy->isIntegerTy(8)) return StringSwitch<InstructionClass>(F->getName()) @@ -234,7 +234,7 @@ static InstructionClass GetFunctionClass(const Function *F) { .Default(IC_CallOrUser); // Argument is i8** - if (const PointerType *Pte = dyn_cast<PointerType>(ETy)) + if (PointerType *Pte = dyn_cast<PointerType>(ETy)) if (Pte->getElementType()->isIntegerTy(8)) return StringSwitch<InstructionClass>(F->getName()) .Case("objc_loadWeakRetained", IC_LoadWeakRetained) @@ -246,11 +246,11 @@ static InstructionClass GetFunctionClass(const Function *F) { // Two arguments, first is i8**. const Argument *A1 = AI++; if (AI == AE) - if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) - if (const PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) + if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) + if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) if (Pte->getElementType()->isIntegerTy(8)) - if (const PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { - const Type *ETy1 = PTy1->getElementType(); + if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { + Type *ETy1 = PTy1->getElementType(); // Second argument is i8* if (ETy1->isIntegerTy(8)) return StringSwitch<InstructionClass>(F->getName()) @@ -258,7 +258,7 @@ static InstructionClass GetFunctionClass(const Function *F) { .Case("objc_initWeak", IC_InitWeak) .Default(IC_CallOrUser); // Second argument is i8**. - if (const PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) + if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) if (Pte1->getElementType()->isIntegerTy(8)) return StringSwitch<InstructionClass>(F->getName()) .Case("objc_moveWeak", IC_MoveWeak) @@ -344,6 +344,10 @@ static InstructionClass GetInstructionClass(const Value *V) { break; default: // For anything else, check all the operands. + // Note that this includes both operands of a Store: while the first + // operand isn't actually being dereferenced, it is being stored to + // memory where we can no longer track who might read it and dereference + // it, so we have to consider it potentially used. for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) if (IsPotentialUse(*OI)) @@ -421,9 +425,10 @@ static bool IsAlwaysTail(InstructionClass Class) { /// IsNoThrow - Test if the given class represents instructions which are always /// safe to mark with the nounwind attribute.. static bool IsNoThrow(InstructionClass Class) { + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. return Class == IC_Retain || Class == IC_RetainRV || - Class == IC_RetainBlock || Class == IC_Release || Class == IC_Autorelease || Class == IC_AutoreleaseRV || @@ -515,6 +520,10 @@ static bool IsObjCIdentifiedObject(const Value *V) { const Value *Pointer = StripPointerCastsAndObjCCalls(LI->getPointerOperand()); if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) { + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (GV->isConstant()) + return true; StringRef Name = GV->getName(); // These special variables are known to hold values which are not // reference-counted pointers. @@ -738,7 +747,6 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { switch (GetBasicInstructionClass(CS.getInstruction())) { case IC_Retain: case IC_RetainRV: - case IC_RetainBlock: case IC_Autorelease: case IC_AutoreleaseRV: case IC_NoopCast: @@ -746,6 +754,8 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { case IC_FusedRetainAutorelease: case IC_FusedRetainAutoreleaseRV: // These functions don't access any memory visible to the compiler. + // Note that this doesn't include objc_retainBlock, becuase it updates + // pointers when it copies block data. return NoModRef; default: break; @@ -877,7 +887,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) { // usually can't sink them past other calls, which would be the main // case where it would be useful. -/// TODO: The pointer returned from objc_loadWeakRetained is retained. +// TODO: The pointer returned from objc_loadWeakRetained is retained. + +// TODO: Delete release+retain pairs (rare). #include "llvm/GlobalAlias.h" #include "llvm/Constants.h" @@ -1098,16 +1110,16 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { if (A == S_None || B == S_None) return S_None; - // Note that we can't merge S_CanRelease and S_Use. if (A > B) std::swap(A, B); if (TopDown) { // Choose the side which is further along in the sequence. - if (A == S_Retain && (B == S_CanRelease || B == S_Use)) + if ((A == S_Retain || A == S_CanRelease) && + (B == S_CanRelease || B == S_Use)) return B; } else { // Choose the side which is further along in the sequence. if ((A == S_Use || A == S_CanRelease) && - (B == S_Release || B == S_Stop || B == S_MovableRelease)) + (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) return A; // If both sides are releases, choose the more conservative one. if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) @@ -1124,13 +1136,19 @@ namespace { /// retain-decrement-use-release sequence or release-use-decrement-retain /// reverese sequence. struct RRInfo { - /// KnownIncremented - After an objc_retain, the reference count of the - /// referenced object is known to be positive. Similarly, before an - /// objc_release, the reference count of the referenced object is known to - /// be positive. If there are retain-release pairs in code regions where the - /// retain count is known to be positive, they can be eliminated, regardless - /// of any side effects between them. - bool KnownIncremented; + /// KnownSafe - After an objc_retain, the reference count of the referenced + /// object is known to be positive. Similarly, before an objc_release, the + /// reference count of the referenced object is known to be positive. If + /// there are retain-release pairs in code regions where the retain count + /// is known to be positive, they can be eliminated, regardless of any side + /// effects between them. + /// + /// Also, a retain+release pair nested within another retain+release + /// pair all on the known same pointer value can be eliminated, regardless + /// of any intervening side effects. + /// + /// KnownSafe is true when either of these conditions is satisfied. + bool KnownSafe; /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as /// opposed to objc_retain calls). @@ -1153,7 +1171,7 @@ namespace { SmallPtrSet<Instruction *, 2> ReverseInsertPts; RRInfo() : - KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false), + KnownSafe(false), IsRetainBlock(false), IsTailCallRelease(false), ReleaseMetadata(0) {} void clear(); @@ -1161,7 +1179,7 @@ namespace { } void RRInfo::clear() { - KnownIncremented = false; + KnownSafe = false; IsRetainBlock = false; IsTailCallRelease = false; ReleaseMetadata = 0; @@ -1176,6 +1194,9 @@ namespace { /// RefCount - The known minimum number of reference count increments. unsigned RefCount; + /// NestCount - The known minimum level of retain+release nesting. + unsigned NestCount; + /// Seq - The current position in the sequence. Sequence Seq; @@ -1184,7 +1205,11 @@ namespace { /// TODO: Encapsulate this better. RRInfo RRI; - PtrState() : RefCount(0), Seq(S_None) {} + PtrState() : RefCount(0), NestCount(0), Seq(S_None) {} + + void SetAtLeastOneRefCount() { + if (RefCount == 0) RefCount = 1; + } void IncrementRefCount() { if (RefCount != UINT_MAX) ++RefCount; @@ -1194,14 +1219,22 @@ namespace { if (RefCount != 0) --RefCount; } - void ClearRefCount() { - RefCount = 0; - } - bool IsKnownIncremented() const { return RefCount > 0; } + void IncrementNestCount() { + if (NestCount != UINT_MAX) ++NestCount; + } + + void DecrementNestCount() { + if (NestCount != 0) --NestCount; + } + + bool IsKnownNested() const { + return NestCount > 0; + } + void SetSeq(Sequence NewSeq) { Seq = NewSeq; } @@ -1233,6 +1266,7 @@ void PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); RefCount = std::min(RefCount, Other.RefCount); + NestCount = std::min(NestCount, Other.NestCount); // We can't merge a plain objc_retain with an objc_retainBlock. if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) @@ -1245,7 +1279,7 @@ PtrState::Merge(const PtrState &Other, bool TopDown) { if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) RRI.ReleaseMetadata = 0; - RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented; + RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe; RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease; RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(), @@ -1316,7 +1350,7 @@ namespace { } void clearBottomUpPointers() { - PerPtrTopDown.clear(); + PerPtrBottomUp.clear(); } void clearTopDownPointers() { @@ -1334,6 +1368,12 @@ namespace { unsigned GetAllPathCount() const { return TopDownPathCount * BottomUpPathCount; } + + /// IsVisitedTopDown - Test whether the block for this BBState has been + /// visited by the top-down portion of the algorithm. + bool isVisitedTopDown() const { + return TopDownPathCount != 0; + } }; } @@ -1364,7 +1404,7 @@ void BBState::MergePred(const BBState &Other) { /*TopDown=*/true); } - // For each entry in our set, if the other set doens't have an entry with the + // For each entry in our set, if the other set doesn't have an entry with the // same key, force it to merge with an empty entry. for (ptr_iterator MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI) @@ -1389,7 +1429,7 @@ void BBState::MergeSucc(const BBState &Other) { /*TopDown=*/false); } - // For each entry in our set, if the other set doens't have an entry + // For each entry in our set, if the other set doesn't have an entry // with the same key, force it to merge with an empty entry. for (ptr_iterator MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME; ++MI) @@ -1406,15 +1446,11 @@ namespace { /// Run - A flag indicating whether this optimization pass should run. bool Run; - /// RetainFunc, RelaseFunc - Declarations for objc_retain, - /// objc_retainBlock, and objc_release. - Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc; - /// RetainRVCallee, etc. - Declarations for ObjC runtime /// functions, for use in creating calls to them. These are initialized /// lazily to avoid cluttering up the Module with unused declarations. Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee, - *RetainCallee, *AutoreleaseCallee; + *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee; /// UsedInThisFunciton - Flags which determine whether each of the /// interesting runtine functions is in fact used in the current function. @@ -1428,6 +1464,7 @@ namespace { Constant *getAutoreleaseRVCallee(Module *M); Constant *getReleaseCallee(Module *M); Constant *getRetainCallee(Module *M); + Constant *getRetainBlockCallee(Module *M); Constant *getAutoreleaseCallee(Module *M); void OptimizeRetainCall(Function &F, Instruction *Retain); @@ -1452,11 +1489,13 @@ namespace { void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, MapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases, - SmallVectorImpl<Instruction *> &DeadInsts); + SmallVectorImpl<Instruction *> &DeadInsts, + Module *M); bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases); + DenseMap<Value *, RRInfo> &Releases, + Module *M); void OptimizeWeakCalls(Function &F); @@ -1501,7 +1540,7 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); std::vector<Type *> Params; Params.push_back(I8X); - const FunctionType *FTy = + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttrListPtr Attributes; Attributes.addAttr(~0u, Attribute::NoUnwind); @@ -1518,7 +1557,7 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); std::vector<Type *> Params; Params.push_back(I8X); - const FunctionType *FTy = + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttrListPtr Attributes; Attributes.addAttr(~0u, Attribute::NoUnwind); @@ -1561,6 +1600,23 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) { return RetainCallee; } +Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { + if (!RetainBlockCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. + RetainBlockCallee = + M->getOrInsertFunction( + "objc_retainBlock", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attributes); + } + return RetainBlockCallee; +} + Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { if (!AutoreleaseCallee) { LLVMContext &C = M->getContext(); @@ -1904,12 +1960,19 @@ void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) { // Check for a return of the pointer value. const Value *Ptr = GetObjCArg(AutoreleaseRV); - for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); - UI != UE; ++UI) { - const User *I = *UI; - if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV) - return; - } + SmallVector<const Value *, 2> Users; + Users.push_back(Ptr); + do { + Ptr = Users.pop_back_val(); + for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); + UI != UE; ++UI) { + const User *I = *UI; + if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV) + return; + if (isa<BitCastInst>(I)) + Users.push_back(I); + } + } while (!Users.empty()); Changed = true; ++NumPeeps; @@ -1953,7 +2016,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_DestroyWeak: { CallInst *CI = cast<CallInst>(Inst); if (isNullOrUndef(CI->getArgOperand(0))) { - const Type *Ty = CI->getArgOperand(0)->getType(); + Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), Constant::getNullValue(Ty), CI); @@ -1968,7 +2031,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { CallInst *CI = cast<CallInst>(Inst); if (isNullOrUndef(CI->getArgOperand(0)) || isNullOrUndef(CI->getArgOperand(1))) { - const Type *Ty = CI->getArgOperand(0)->getType(); + Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), Constant::getNullValue(Ty), CI); @@ -2090,7 +2153,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { ++NumPartialNoops; // Clone the call into each predecessor that has a non-null value. CallInst *CInst = cast<CallInst>(Inst); - const Type *ParamTy = CInst->getArgOperand(0)->getType(); + Type *ParamTy = CInst->getArgOperand(0)->getType(); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); @@ -2132,41 +2195,49 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); bool SomeSuccHasSame = false; bool AllSuccsHaveSame = true; - for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) - switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) { + PtrState &S = MyStates.getPtrTopDownState(Arg); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg); + switch (SuccS.GetSeq()) { case S_None: - case S_CanRelease: - MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); - SomeSuccHasSame = false; - break; + case S_CanRelease: { + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + S.ClearSequenceProgress(); + continue; + } case S_Use: SomeSuccHasSame = true; break; case S_Stop: case S_Release: case S_MovableRelease: - AllSuccsHaveSame = false; + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + AllSuccsHaveSame = false; break; case S_Retain: llvm_unreachable("bottom-up pointer in retain state!"); } + } // If the state at the other end of any of the successor edges // matches the current state, require all edges to match. This // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) - MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + S.ClearSequenceProgress(); } case S_CanRelease: { const Value *Arg = I->first; const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); bool SomeSuccHasSame = false; bool AllSuccsHaveSame = true; - for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) - switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) { - case S_None: - MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); - SomeSuccHasSame = false; - break; + PtrState &S = MyStates.getPtrTopDownState(Arg); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg); + switch (SuccS.GetSeq()) { + case S_None: { + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + S.ClearSequenceProgress(); + continue; + } case S_CanRelease: SomeSuccHasSame = true; break; @@ -2174,16 +2245,18 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, case S_Release: case S_MovableRelease: case S_Use: - AllSuccsHaveSame = false; + if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe) + AllSuccsHaveSame = false; break; case S_Retain: llvm_unreachable("bottom-up pointer in retain state!"); } + } // If the state at the other end of any of the successor edges // matches the current state, require all edges to match. This // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) - MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + S.ClearSequenceProgress(); } } } @@ -2207,6 +2280,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, if (Succ == BB) continue; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); + // If we haven't seen this node yet, then we've found a CFG cycle. + // Be optimistic here; it's CheckForCFGHazards' job detect trouble. if (I == BBStates.end()) continue; MyStates.InitFromSucc(I->second); @@ -2245,11 +2320,12 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind)); S.RRI.clear(); - S.RRI.KnownIncremented = S.IsKnownIncremented(); + S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); S.IncrementRefCount(); + S.IncrementNestCount(); break; } case IC_RetainBlock: @@ -2259,6 +2335,13 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, PtrState &S = MyStates.getPtrBottomUpState(Arg); S.DecrementRefCount(); + S.SetAtLeastOneRefCount(); + S.DecrementNestCount(); + + // An objc_retainBlock call with just a use still needs to be kept, + // because it may be copying a block from the stack to the heap. + if (Class == IC_RetainBlock && S.GetSeq() == S_Use) + S.SetSeq(S_CanRelease); switch (S.GetSeq()) { case S_Stop: @@ -2281,7 +2364,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, case S_Retain: llvm_unreachable("bottom-up pointer in retain state!"); } - break; + continue; } case IC_AutoreleasepoolPop: // Conservatively, clear MyStates for all known pointers. @@ -2305,26 +2388,22 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, PtrState &S = MI->second; Sequence Seq = S.GetSeq(); - // Check for possible retains and releases. + // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - // Check for a retain (we're going bottom-up here). S.DecrementRefCount(); - - // Check for a release. - if (!IsRetain(Class) && Class != IC_RetainBlock) - switch (Seq) { - case S_Use: - S.SetSeq(S_CanRelease); - continue; - case S_CanRelease: - case S_Release: - case S_MovableRelease: - case S_Stop: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } + switch (Seq) { + case S_Use: + S.SetSeq(S_CanRelease); + continue; + case S_CanRelease: + case S_Release: + case S_MovableRelease: + case S_Stop: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } } // Check for possible direct uses. @@ -2332,14 +2411,14 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, case S_Release: case S_MovableRelease: if (CanUse(Inst, Ptr, PA, Class)) { - S.RRI.ReverseInsertPts.clear(); + assert(S.RRI.ReverseInsertPts.empty()); S.RRI.ReverseInsertPts.insert(Inst); S.SetSeq(S_Use); } else if (Seq == S_Release && (Class == IC_User || Class == IC_CallOrUser)) { // Non-movable releases depend on any possible objc pointer use. S.SetSeq(S_Stop); - S.RRI.ReverseInsertPts.clear(); + assert(S.RRI.ReverseInsertPts.empty()); S.RRI.ReverseInsertPts.insert(Inst); } break; @@ -2378,14 +2457,18 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, if (Pred == BB) continue; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); - if (I == BBStates.end()) + assert(I != BBStates.end()); + // If we haven't seen this node yet, then we've found a CFG cycle. + // Be optimistic here; it's CheckForCFGHazards' job detect trouble. + if (!I->second.isVisitedTopDown()) continue; MyStates.InitFromPred(I->second); while (PI != PE) { Pred = *PI++; if (Pred != BB) { I = BBStates.find(Pred); - if (I != BBStates.end()) + assert(I != BBStates.end()); + if (I->second.isVisitedTopDown()) MyStates.MergePred(I->second); } } @@ -2422,18 +2505,23 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, S.SetSeq(S_Retain); S.RRI.clear(); S.RRI.IsRetainBlock = Class == IC_RetainBlock; - S.RRI.KnownIncremented = S.IsKnownIncremented(); + // Don't check S.IsKnownIncremented() here because it's not + // sufficient. + S.RRI.KnownSafe = S.IsKnownNested(); S.RRI.Calls.insert(Inst); } + S.SetAtLeastOneRefCount(); S.IncrementRefCount(); - break; + S.IncrementNestCount(); + continue; } case IC_Release: { Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrTopDownState(Arg); S.DecrementRefCount(); + S.DecrementNestCount(); switch (S.GetSeq()) { case S_Retain: @@ -2478,16 +2566,12 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, Sequence Seq = S.GetSeq(); // Check for possible releases. - if (!IsRetain(Class) && Class != IC_RetainBlock && - CanAlterRefCount(Inst, Ptr, PA, Class)) { - // Check for a release. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { S.DecrementRefCount(); - - // Check for a release. switch (Seq) { case S_Retain: S.SetSeq(S_CanRelease); - S.RRI.ReverseInsertPts.clear(); + assert(S.RRI.ReverseInsertPts.empty()); S.RRI.ReverseInsertPts.insert(Inst); // One call can't cause a transition from S_Retain to S_CanRelease @@ -2511,8 +2595,18 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, if (CanUse(Inst, Ptr, PA, Class)) S.SetSeq(S_Use); break; - case S_Use: case S_Retain: + // An objc_retainBlock call may be responsible for copying the block + // data from the stack to the heap. Model this by moving it straight + // from S_Retain to S_Use. + if (S.RRI.IsRetainBlock && + CanUse(Inst, Ptr, PA, Class)) { + assert(S.RRI.ReverseInsertPts.empty()); + S.RRI.ReverseInsertPts.insert(Inst); + S.SetSeq(S_Use); + } + break; + case S_Use: case S_None: break; case S_Stop: @@ -2533,28 +2627,43 @@ ObjCARCOpt::Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates, MapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases) { - // Use postorder for bottom-up, and reverse-postorder for top-down, because we + // Use reverse-postorder on the reverse CFG for bottom-up, because we // magically know that loops will be well behaved, i.e. they won't repeatedly - // call retain on a single pointer without doing a release. + // call retain on a single pointer without doing a release. We can't use + // ReversePostOrderTraversal here because we want to walk up from each + // function exit point. + SmallPtrSet<BasicBlock *, 16> Visited; + SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack; + SmallVector<BasicBlock *, 16> Order; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BasicBlock *BB = I; + if (BB->getTerminator()->getNumSuccessors() == 0) + Stack.push_back(std::make_pair(BB, pred_begin(BB))); + } + while (!Stack.empty()) { + pred_iterator End = pred_end(Stack.back().first); + while (Stack.back().second != End) { + BasicBlock *BB = *Stack.back().second++; + if (Visited.insert(BB)) + Stack.push_back(std::make_pair(BB, pred_begin(BB))); + } + Order.push_back(Stack.pop_back_val().first); + } bool BottomUpNestingDetected = false; - SmallVector<BasicBlock *, 8> PostOrder; - for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) { + for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = + Order.rbegin(), E = Order.rend(); I != E; ++I) { BasicBlock *BB = *I; - PostOrder.push_back(BB); - BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); } - // Iterate through the post-order in reverse order, achieving a - // reverse-postorder traversal. We don't use the ReversePostOrderTraversal - // class here because it works by computing its own full postorder iteration, - // recording the sequence, and playing it back in reverse. Since we're already - // doing a full iteration above, we can just record the sequence manually and - // avoid the cost of having ReversePostOrderTraversal compute it. + // Use regular reverse-postorder for top-down. bool TopDownNestingDetected = false; - for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator - RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI) - TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases); + typedef ReversePostOrderTraversal<Function *> RPOTType; + RPOTType RPOT(&F); + for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { + BasicBlock *BB = *I; + TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases); + } return TopDownNestingDetected && BottomUpNestingDetected; } @@ -2565,12 +2674,10 @@ void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &ReleasesToMove, MapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases, - SmallVectorImpl<Instruction *> &DeadInsts) { - const Type *ArgTy = Arg->getType(); - const Type *ParamTy = - (RetainRVFunc ? RetainRVFunc : - RetainFunc ? RetainFunc : - RetainBlockFunc)->arg_begin()->getType(); + SmallVectorImpl<Instruction *> &DeadInsts, + Module *M) { + Type *ArgTy = Arg->getType(); + Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); // Insert the new retain and release calls. for (SmallPtrSet<Instruction *, 2>::const_iterator @@ -2581,7 +2688,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, new BitCastInst(Arg, ParamTy, "", InsertPt); CallInst *Call = CallInst::Create(RetainsToMove.IsRetainBlock ? - RetainBlockFunc : RetainFunc, + getRetainBlockCallee(M) : getRetainCallee(M), MyArg, "", InsertPt); Call->setDoesNotThrow(); if (!RetainsToMove.IsRetainBlock) @@ -2598,8 +2705,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, // The invoke's return value isn't available in the unwind block, // but our releases will never depend on it, because they must be // paired with retains from before the invoke. - InsertPts[0] = II->getNormalDest()->getFirstNonPHI(); - InsertPts[1] = II->getUnwindDest()->getFirstNonPHI(); + InsertPts[0] = II->getNormalDest()->getFirstInsertionPt(); + InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt(); } else { // Insert code immediately after the last use. InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse)); @@ -2609,7 +2716,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Instruction *InsertPt = *I; Value *MyArg = ArgTy == ParamTy ? Arg : new BitCastInst(Arg, ParamTy, "", InsertPt); - CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt); + CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, + "", InsertPt); // Attach a clang.imprecise_release metadata tag, if appropriate. if (MDNode *M = ReleasesToMove.ReleaseMetadata) Call->setMetadata(ImpreciseReleaseMDKind, M); @@ -2640,7 +2748,8 @@ bool ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases) { + DenseMap<Value *, RRInfo> &Releases, + Module *M) { bool AnyPairsCompletelyEliminated = false; RRInfo RetainsToMove; RRInfo ReleasesToMove; @@ -2649,21 +2758,36 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> SmallVector<Instruction *, 8> DeadInsts; for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), - E = Retains.end(); I != E; ) { - Value *V = (I++)->first; + E = Retains.end(); I != E; ++I) { + Value *V = I->first; if (!V) continue; // blotted Instruction *Retain = cast<Instruction>(V); Value *Arg = GetObjCArg(Retain); - // If the object being released is in static or stack storage, we know it's + // If the object being released is in static storage, we know it's // not being managed by ObjC reference counting, so we can delete pairs // regardless of what possible decrements or uses lie between them. - bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); + bool KnownSafe = isa<Constant>(Arg); + + // Same for stack storage, unless this is an objc_retainBlock call, + // which is responsible for copying the block data from the stack to + // the heap. + if (!I->second.IsRetainBlock && isa<AllocaInst>(Arg)) + KnownSafe = true; + + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (const LoadInst *LI = dyn_cast<LoadInst>(Arg)) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>( + StripPointerCastsAndObjCCalls(LI->getPointerOperand()))) + if (GV->isConstant()) + KnownSafe = true; // If a pair happens in a region where it is known that the reference count // is already incremented, we can similarly ignore possible decrements. - bool KnownIncrementedTD = true, KnownIncrementedBU = true; + bool KnownSafeTD = true, KnownSafeBU = true; // Connect the dots between the top-down-collected RetainsToMove and // bottom-up-collected ReleasesToMove to form sets of related calls. @@ -2683,7 +2807,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain); assert(It != Retains.end()); const RRInfo &NewRetainRRI = It->second; - KnownIncrementedTD &= NewRetainRRI.KnownIncremented; + KnownSafeTD &= NewRetainRRI.KnownSafe; for (SmallPtrSet<Instruction *, 2>::const_iterator LI = NewRetainRRI.Calls.begin(), LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { @@ -2739,7 +2863,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> Releases.find(NewRelease); assert(It != Releases.end()); const RRInfo &NewReleaseRRI = It->second; - KnownIncrementedBU &= NewReleaseRRI.KnownIncremented; + KnownSafeBU &= NewReleaseRRI.KnownSafe; for (SmallPtrSet<Instruction *, 2>::const_iterator LI = NewReleaseRRI.Calls.begin(), LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { @@ -2787,12 +2911,19 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> if (NewRetains.empty()) break; } - // If the pointer is known incremented, we can safely delete the pair - // regardless of what's between them. - if (KnownIncrementedTD || KnownIncrementedBU) { + // If the pointer is known incremented or nested, we can safely delete the + // pair regardless of what's between them. + if (KnownSafeTD || KnownSafeBU) { RetainsToMove.ReverseInsertPts.clear(); ReleasesToMove.ReverseInsertPts.clear(); NewCount = 0; + } else { + // Determine whether the new insertion points we computed preserve the + // balance of retain and release calls through the program. + // TODO: If the fully aggressive solution isn't valid, try to find a + // less aggressive solution which is. + if (NewDelta != 0) + goto next_retain; } // Determine whether the original call points are balanced in the retain and @@ -2803,18 +2934,12 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> if (OldDelta != 0) goto next_retain; - // Determine whether the new insertion points we computed preserve the - // balance of retain and release calls through the program. - // TODO: If the fully aggressive solution isn't valid, try to find a - // less aggressive solution which is. - if (NewDelta != 0) - goto next_retain; - // Ok, everything checks out and we're all set. Let's move some code! Changed = true; AnyPairsCompletelyEliminated = NewCount == 0; NumRRs += OldCount - NewCount; - MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts); + MoveCalls(Arg, RetainsToMove, ReleasesToMove, + Retains, Releases, DeadInsts, M); next_retain: NewReleases.clear(); @@ -2993,7 +3118,8 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) { bool NestingDetected = Visit(F, BBStates, Retains, Releases); // Transform. - return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected; + return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) && + NestingDetected; } /// OptimizeReturns - Look for this pattern: @@ -3072,7 +3198,8 @@ void ObjCARCOpt::OptimizeReturns(Function &F) { // Check that there is nothing that can affect the reference // count between the retain and the call. - FindDependencies(CanChangeRetainCount, Arg, BB, Retain, + // Note that Retain need not be in BB. + FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, DependingInstructions, Visited, PA); if (DependingInstructions.size() != 1) goto next_block; @@ -3117,12 +3244,6 @@ bool ObjCARCOpt::doInitialization(Module &M) { ImpreciseReleaseMDKind = M.getContext().getMDKindID("clang.imprecise_release"); - // Identify the declarations for objc_retain and friends. - RetainFunc = M.getFunction("objc_retain"); - RetainBlockFunc = M.getFunction("objc_retainBlock"); - RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue"); - ReleaseFunc = M.getFunction("objc_release"); - // Intuitively, objc_retain and others are nocapture, however in practice // they are not, because they return their argument value. And objc_release // calls finalizers. @@ -3132,6 +3253,7 @@ bool ObjCARCOpt::doInitialization(Module &M) { AutoreleaseRVCallee = 0; ReleaseCallee = 0; RetainCallee = 0; + RetainBlockCallee = 0; AutoreleaseCallee = 0; return false; @@ -3294,7 +3416,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); std::vector<Type *> Params; Params.push_back(I8X); - const FunctionType *FTy = + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttrListPtr Attributes; Attributes.addAttr(~0u, Attribute::NoUnwind); @@ -3310,7 +3432,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); std::vector<Type *> Params; Params.push_back(I8X); - const FunctionType *FTy = + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); AttrListPtr Attributes; Attributes.addAttr(~0u, Attribute::NoUnwind); @@ -3377,7 +3499,7 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, void ObjCARCContract::ContractRelease(Instruction *Release, inst_iterator &Iter) { LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release)); - if (!Load || Load->isVolatile()) return; + if (!Load || !Load->isSimple()) return; // For now, require everything to be in one basic block. BasicBlock *BB = Release->getParent(); @@ -3393,7 +3515,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release, !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod))) ++I; StoreInst *Store = dyn_cast<StoreInst>(I); - if (!Store || Store->isVolatile()) return; + if (!Store || !Store->isSimple()) return; if (Store->getPointerOperand() != Loc.Ptr) return; Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); @@ -3411,8 +3533,8 @@ void ObjCARCContract::ContractRelease(Instruction *Release, ++NumStoreStrongs; LLVMContext &C = Release->getContext(); - const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - const Type *I8XX = PointerType::getUnqual(I8X); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); Value *Args[] = { Load->getPointerOperand(), New }; if (Args[0]->getType() != I8XX) @@ -3548,7 +3670,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (Inst != UserInst && DT->dominates(Inst, UserInst)) { Changed = true; Instruction *Replacement = Inst; - const Type *UseTy = U.get()->getType(); + Type *UseTy = U.get()->getType(); if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) { // For PHI nodes, insert the bitcast in the predecessor block. unsigned ValNo = diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index e6341ae..8f98a5b 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -309,7 +309,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, std::swap(LHS, RHS); bool Success = !I->swapOperands(); assert(Success && "swapOperands failed"); - Success = false; + (void)Success; MadeChange = true; } else if (RHSBO) { // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 083412e..196a847 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -156,7 +156,7 @@ namespace { /// class SCCPSolver : public InstVisitor<SCCPSolver> { const TargetData *TD; - SmallPtrSet<BasicBlock*, 8> BBExecutable;// The BBs that are executable. + SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable. DenseMap<Value*, LatticeVal> ValueState; // The state each value is in. /// StructValueState - This maintains ValueState for values that have @@ -241,7 +241,7 @@ public: /// this method must be called. void AddTrackedFunction(Function *F) { // Add an entry, F -> undef. - if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) { + if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) { MRVFunctionsTracked.insert(F); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i), @@ -302,7 +302,7 @@ public: /// markAnythingOverdefined - Mark the specified value overdefined. This /// works with both scalars and structs. void markAnythingOverdefined(Value *V) { - if (const StructType *STy = dyn_cast<StructType>(V->getType())) + if (StructType *STy = dyn_cast<StructType>(V->getType())) for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) markOverdefined(getStructValueState(V, i), V); else @@ -417,7 +417,7 @@ private: else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) LV.markConstant(CS->getOperand(i)); // Constants are constant. else if (isa<ConstantAggregateZero>(C)) { - const Type *FieldTy = cast<StructType>(V->getType())->getElementType(i); + Type *FieldTy = cast<StructType>(V->getType())->getElementType(i); LV.markConstant(Constant::getNullValue(FieldTy)); } else LV.markOverdefined(); // Unknown sort of constant. @@ -471,9 +471,9 @@ private: /// UsersOfOverdefinedPHIs map for PN, remove them now. void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) { if (UsersOfOverdefinedPHIs.empty()) return; - std::multimap<PHINode*, Instruction*>::iterator It, E; - tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN); - while (It != E) { + typedef std::multimap<PHINode*, Instruction*>::iterator ItTy; + std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN); + for (ItTy It = Range.first, E = Range.second; It != E;) { if (It->second == I) UsersOfOverdefinedPHIs.erase(It++); else @@ -486,9 +486,9 @@ private: /// (Duplicate entries do not break anything directly, but can lead to /// exponential growth of the table in rare cases.) void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) { - std::multimap<PHINode*, Instruction*>::iterator J, E; - tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN); - for (; J != E; ++J) + typedef std::multimap<PHINode*, Instruction*>::iterator ItTy; + std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN); + for (ItTy J = Range.first, E = Range.second; J != E; ++J) if (J->second == I) return; UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I)); @@ -515,6 +515,7 @@ private: void visitShuffleVectorInst(ShuffleVectorInst &I); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); + void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); } // Instructions that cannot be folded away. void visitStoreInst (StoreInst &I); @@ -528,8 +529,12 @@ private: visitTerminatorInst(II); } void visitCallSite (CallSite CS); + void visitResumeInst (TerminatorInst &I) { /*returns void*/ } void visitUnwindInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } + void visitFenceInst (FenceInst &I) { /*returns void*/ } + void visitAtomicCmpXchgInst (AtomicCmpXchgInst &I) { markOverdefined(&I); } + void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); } void visitAllocaInst (Instruction &I) { markOverdefined(&I); } void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); } @@ -577,6 +582,10 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, } if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) { + if (TI.getNumSuccessors() < 2) { + Succs[0] = true; + return; + } LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); @@ -637,6 +646,9 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { return true; if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + if (SI->getNumSuccessors() < 2) + return true; + LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); @@ -692,13 +704,14 @@ void SCCPSolver::visitPHINode(PHINode &PN) { // There may be instructions using this PHI node that are not overdefined // themselves. If so, make sure that they know that the PHI node operand // changed. - std::multimap<PHINode*, Instruction*>::iterator I, E; - tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN); - if (I == E) + typedef std::multimap<PHINode*, Instruction*>::iterator ItTy; + std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(&PN); + + if (Range.first == Range.second) return; SmallVector<Instruction*, 16> Users; - for (; I != E; ++I) + for (ItTy I = Range.first, E = Range.second; I != E; ++I) Users.push_back(I->second); while (!Users.empty()) visit(Users.pop_back_val()); @@ -772,7 +785,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) { // Handle functions that return multiple values. if (!TrackedMultipleRetVals.empty()) { - if (const StructType *STy = dyn_cast<StructType>(ResultOp->getType())) + if (StructType *STy = dyn_cast<StructType>(ResultOp->getType())) if (MRVFunctionsTracked.count(F)) for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F, @@ -825,7 +838,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) { } void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) { - const StructType *STy = dyn_cast<StructType>(IVI.getType()); + StructType *STy = dyn_cast<StructType>(IVI.getType()); if (STy == 0) return markOverdefined(&IVI); @@ -925,7 +938,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { // Could annihilate value. if (I.getOpcode() == Instruction::And) markConstant(IV, &I, Constant::getNullValue(I.getType())); - else if (const VectorType *PT = dyn_cast<VectorType>(I.getType())) + else if (VectorType *PT = dyn_cast<VectorType>(I.getType())) markConstant(IV, &I, Constant::getAllOnesValue(PT)); else markConstant(IV, &I, @@ -1179,8 +1192,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) { } Constant *Ptr = Operands[0]; - markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0]+1, - Operands.size()-1)); + ArrayRef<Constant *> Indices(Operands.begin() + 1, Operands.end()); + markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices)); } void SCCPSolver::visitStoreInst(StoreInst &SI) { @@ -1278,7 +1291,7 @@ CallOverdefined: // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size())) + if (Constant *C = ConstantFoldCall(F, Operands)) return markConstant(I, C); } @@ -1303,7 +1316,7 @@ CallOverdefined: continue; } - if (const StructType *STy = dyn_cast<StructType>(AI->getType())) { + if (StructType *STy = dyn_cast<StructType>(AI->getType())) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { LatticeVal CallArg = getStructValueState(*CAI, i); mergeInValue(getStructValueState(AI, i), AI, CallArg); @@ -1315,7 +1328,7 @@ CallOverdefined: } // If this is a single/zero retval case, see if we're tracking the function. - if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) { + if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) { if (!MRVFunctionsTracked.count(F)) goto CallOverdefined; // Not tracking this callee. @@ -1419,67 +1432,116 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // Look for instructions which produce undef values. if (I->getType()->isVoidTy()) continue; - if (const StructType *STy = dyn_cast<StructType>(I->getType())) { - // Only a few things that can be structs matter for undef. Just send - // all their results to overdefined. We could be more precise than this - // but it isn't worth bothering. - if (isa<CallInst>(I) || isa<SelectInst>(I)) { - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - LatticeVal &LV = getStructValueState(I, i); - if (LV.isUndefined()) - markOverdefined(LV, I); - } + if (StructType *STy = dyn_cast<StructType>(I->getType())) { + // Only a few things that can be structs matter for undef. + + // Tracked calls must never be marked overdefined in ResolvedUndefsIn. + if (CallSite CS = CallSite(I)) + if (Function *F = CS.getCalledFunction()) + if (MRVFunctionsTracked.count(F)) + continue; + + // extractvalue and insertvalue don't need to be marked; they are + // tracked as precisely as their operands. + if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I)) + continue; + + // Send the results of everything else to overdefined. We could be + // more precise than this but it isn't worth bothering. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + LatticeVal &LV = getStructValueState(I, i); + if (LV.isUndefined()) + markOverdefined(LV, I); } continue; } - + LatticeVal &LV = getValueState(I); if (!LV.isUndefined()) continue; - // No instructions using structs need disambiguation. - if (I->getOperand(0)->getType()->isStructTy()) + // extractvalue is safe; check here because the argument is a struct. + if (isa<ExtractValueInst>(I)) continue; - // Get the lattice values of the first two operands for use below. + // Compute the operand LatticeVals, for convenience below. + // Anything taking a struct is conservatively assumed to require + // overdefined markings. + if (I->getOperand(0)->getType()->isStructTy()) { + markOverdefined(I); + return true; + } LatticeVal Op0LV = getValueState(I->getOperand(0)); LatticeVal Op1LV; if (I->getNumOperands() == 2) { - // No instructions using structs need disambiguation. - if (I->getOperand(1)->getType()->isStructTy()) - continue; - - // If this is a two-operand instruction, and if both operands are - // undefs, the result stays undef. + if (I->getOperand(1)->getType()->isStructTy()) { + markOverdefined(I); + return true; + } + Op1LV = getValueState(I->getOperand(1)); - if (Op0LV.isUndefined() && Op1LV.isUndefined()) - continue; } - // If this is an instructions whose result is defined even if the input is // not fully defined, propagate the information. - const Type *ITy = I->getType(); + Type *ITy = I->getType(); switch (I->getOpcode()) { - default: break; // Leave the instruction as an undef. + case Instruction::Add: + case Instruction::Sub: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + break; // Any undef -> undef + case Instruction::FSub: + case Instruction::FAdd: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + // Floating-point binary operation: be conservative. + if (Op0LV.isUndefined() && Op1LV.isUndefined()) + markForcedConstant(I, Constant::getNullValue(ITy)); + else + markOverdefined(I); + return true; case Instruction::ZExt: - // After a zero extend, we know the top part is zero. SExt doesn't have - // to be handled here, because we don't know whether the top part is 1's - // or 0's. - case Instruction::SIToFP: // some FP values are not possible, just use 0. - case Instruction::UIToFP: // some FP values are not possible, just use 0. + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + // undef -> 0; some outputs are impossible markForcedConstant(I, Constant::getNullValue(ITy)); return true; case Instruction::Mul: case Instruction::And: + // Both operands undef -> undef + if (Op0LV.isUndefined() && Op1LV.isUndefined()) + break; // undef * X -> 0. X could be zero. // undef & X -> 0. X could be zero. markForcedConstant(I, Constant::getNullValue(ITy)); return true; case Instruction::Or: + // Both operands undef -> undef + if (Op0LV.isUndefined() && Op1LV.isUndefined()) + break; // undef | X -> -1. X could be -1. markForcedConstant(I, Constant::getAllOnesValue(ITy)); return true; + case Instruction::Xor: + // undef ^ undef -> 0; strictly speaking, this is not strictly + // necessary, but we try to be nice to people who expect this + // behavior in simple cases + if (Op0LV.isUndefined() && Op1LV.isUndefined()) { + markForcedConstant(I, Constant::getNullValue(ITy)); + return true; + } + // undef ^ X -> undef + break; + case Instruction::SDiv: case Instruction::UDiv: case Instruction::SRem: @@ -1494,26 +1556,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { return true; case Instruction::AShr: - // undef >>s X -> undef. No change. - if (Op0LV.isUndefined()) break; - - // X >>s undef -> X. X could be 0, X could have the high-bit known set. - if (Op0LV.isConstant()) - markForcedConstant(I, Op0LV.getConstant()); - else - markOverdefined(I); + // X >>a undef -> undef. + if (Op1LV.isUndefined()) break; + + // undef >>a X -> all ones + markForcedConstant(I, Constant::getAllOnesValue(ITy)); return true; case Instruction::LShr: case Instruction::Shl: - // undef >> X -> undef. No change. - // undef << X -> undef. No change. - if (Op0LV.isUndefined()) break; - - // X >> undef -> 0. X could be 0. - // X << undef -> 0. X could be 0. + // X << undef -> undef. + // X >> undef -> undef. + if (Op1LV.isUndefined()) break; + + // undef << X -> 0 + // undef >> X -> 0 markForcedConstant(I, Constant::getNullValue(ITy)); return true; case Instruction::Select: + Op1LV = getValueState(I->getOperand(1)); // undef ? X : Y -> X or Y. There could be commonality between X/Y. if (Op0LV.isUndefined()) { if (!Op1LV.isConstant()) // Pick the constant one if there is any. @@ -1533,9 +1593,35 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { else markOverdefined(I); return true; + case Instruction::Load: + // A load here means one of two things: a load of undef from a global, + // a load from an unknown pointer. Either way, having it return undef + // is okay. + break; + case Instruction::ICmp: + // X == undef -> undef. Other comparisons get more complicated. + if (cast<ICmpInst>(I)->isEquality()) + break; + markOverdefined(I); + return true; case Instruction::Call: - // If a call has an undef result, it is because it is constant foldable - // but one of the inputs was undef. Just force the result to + case Instruction::Invoke: { + // There are two reasons a call can have an undef result + // 1. It could be tracked. + // 2. It could be constant-foldable. + // Because of the way we solve return values, tracked calls must + // never be marked overdefined in ResolvedUndefsIn. + if (Function *F = CallSite(I).getCalledFunction()) + if (TrackedRetVals.count(F)) + break; + + // If the call is constant-foldable, we mark it overdefined because + // we do not know what return values are valid. + markOverdefined(I); + return true; + } + default: + // If we don't know what should happen here, conservatively mark it // overdefined. markOverdefined(I); return true; @@ -1621,15 +1707,25 @@ FunctionPass *llvm::createSCCPPass() { static void DeleteInstructionInBlock(BasicBlock *BB) { DEBUG(dbgs() << " BasicBlock Dead:" << *BB); ++NumDeadBlocks; - - // Delete the instructions backwards, as it has a reduced likelihood of - // having to update as many def-use and use-def chains. - while (!isa<TerminatorInst>(BB->begin())) { - Instruction *I = --BasicBlock::iterator(BB->getTerminator()); - - if (!I->use_empty()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); - BB->getInstList().erase(I); + + // Check to see if there are non-terminating instructions to delete. + if (isa<TerminatorInst>(BB->begin())) + return; + + // Delete the instructions backwards, as it has a reduced likelihood of having + // to update as many def-use and use-def chains. + Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. + while (EndInst != BB->begin()) { + // Delete the next to last instruction. + BasicBlock::iterator I = EndInst; + Instruction *Inst = --I; + if (!Inst->use_empty()) + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + if (isa<LandingPadInst>(Inst)) { + EndInst = Inst; + continue; + } + BB->getInstList().erase(Inst); ++NumInstRemoved; } } diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 302c287..f6918de 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -63,7 +63,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeCFGSimplifyPassPass(Registry); initializeSimplifyLibCallsPass(Registry); initializeSinkingPass(Registry); - initializeTailDupPass(Registry); initializeTailCallElimPass(Registry); } @@ -187,3 +186,7 @@ void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createBasicAliasAnalysisPass()); } + +void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLowerExpectIntrinsicPass()); +} diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 7d6349c..c6d9123 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -129,11 +129,11 @@ namespace { AllocaInfo &Info); void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info); void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, - const Type *MemOpType, bool isStore, AllocaInfo &Info, + Type *MemOpType, bool isStore, AllocaInfo &Info, Instruction *TheAccess, bool AllowWholeAccess); - bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); - uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset, - const Type *&IdxTy); + bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size); + uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, + Type *&IdxTy); void DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList); @@ -145,6 +145,9 @@ namespace { SmallVector<AllocaInst*, 32> &NewElts); void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts); + void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, + uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts); void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); @@ -253,7 +256,7 @@ class ConvertToScalarInfo { /// VectorTy - This tracks the type that we should promote the vector to if /// it is possible to turn it into a vector. This starts out null, and if it /// isn't possible to turn into a vector type, it gets set to VoidTy. - const VectorType *VectorTy; + VectorType *VectorTy; /// HadNonMemTransferAccess - True if there is at least one access to the /// alloca that is not a MemTransferInst. We don't want to turn structs into @@ -269,11 +272,11 @@ public: private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset); - bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset); + void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset); + bool MergeInVectorType(VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); - Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, + Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, uint64_t Offset, IRBuilder<> &Builder); Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, uint64_t Offset, IRBuilder<> &Builder); @@ -295,8 +298,6 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { if (ScalarKind == Unknown) ScalarKind = Integer; - // FIXME: It should be possible to promote the vector type up to the alloca's - // size. if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8) ScalarKind = Integer; @@ -306,7 +307,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // random stuff that doesn't use vectors (e.g. <9 x double>) because then // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. - const Type *NewTy; + Type *NewTy; if (ScalarKind == Vector) { assert(VectorTy && "Missing type for vector scalar."); DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " @@ -331,20 +332,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { /// (VectorTy) so far at the offset specified by Offset (which is specified in /// bytes). /// -/// There are three cases we handle here: +/// There are two cases we handle here: /// 1) A union of vector types of the same size and potentially its elements. /// Here we turn element accesses into insert/extract element operations. /// This promotes a <4 x float> with a store of float to the third element /// into a <4 x float> that uses insert element. -/// 2) A union of vector types with power-of-2 size differences, e.g. a float, -/// <2 x float> and <4 x float>. Here we turn element accesses into insert -/// and extract element operations, and <2 x float> accesses into a cast to -/// <2 x double>, an extract, and a cast back to <2 x float>. -/// 3) A fully general blob of memory, which we turn into some (potentially +/// 2) A fully general blob of memory, which we turn into some (potentially /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, +void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In, uint64_t Offset) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. @@ -355,7 +352,7 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. - if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { + if (VectorType *VInTy = dyn_cast<VectorType>(In)) { if (MergeInVectorType(VInTy, Offset)) return; } else if (In->isFloatTy() || In->isDoubleTy() || @@ -371,20 +368,13 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, // if the implied vector agrees with what we already have and if Offset is // compatible with it. if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && - (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) { + (!VectorTy || EltSize == VectorTy->getElementType() + ->getPrimitiveSizeInBits()/8)) { if (!VectorTy) { ScalarKind = ImplicitVector; VectorTy = VectorType::get(In, AllocaSize/EltSize); - return; } - - unsigned CurrentEltSize = VectorTy->getElementType() - ->getPrimitiveSizeInBits()/8; - if (EltSize == CurrentEltSize) - return; - - if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize)) - return; + return; } } @@ -395,74 +385,21 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, /// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore, /// returning true if the type was successfully merged and false otherwise. -bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, +bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy, uint64_t Offset) { - // TODO: Support nonzero offsets? - if (Offset != 0) - return false; - - // Only allow vectors that are a power-of-2 away from the size of the alloca. - if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8))) - return false; - - // If this the first vector we see, remember the type so that we know the - // element size. - if (!VectorTy) { + if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { + // If we're storing/loading a vector of the right size, allow it as a + // vector. If this the first vector we see, remember the type so that + // we know the element size. If this is a subsequent access, ignore it + // even if it is a differing type but the same size. Worst case we can + // bitcast the resultant vectors. + if (!VectorTy) + VectorTy = VInTy; ScalarKind = Vector; - VectorTy = VInTy; return true; } - unsigned BitWidth = VectorTy->getBitWidth(); - unsigned InBitWidth = VInTy->getBitWidth(); - - // Vectors of the same size can be converted using a simple bitcast. - if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) { - ScalarKind = Vector; - return true; - } - - const Type *ElementTy = VectorTy->getElementType(); - const Type *InElementTy = VInTy->getElementType(); - - // Do not allow mixed integer and floating-point accesses from vectors of - // different sizes. - if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy()) - return false; - - if (ElementTy->isFloatingPointTy()) { - // Only allow floating-point vectors of different sizes if they have the - // same element type. - // TODO: This could be loosened a bit, but would anything benefit? - if (ElementTy != InElementTy) - return false; - - // There are no arbitrary-precision floating-point types, which limits the - // number of legal vector types with larger element types that we can form - // to bitcast and extract a subvector. - // TODO: We could support some more cases with mixed fp128 and double here. - if (!(BitWidth == 64 || BitWidth == 128) || - !(InBitWidth == 64 || InBitWidth == 128)) - return false; - } else { - assert(ElementTy->isIntegerTy() && "Vector elements must be either integer " - "or floating-point."); - unsigned BitWidth = ElementTy->getPrimitiveSizeInBits(); - unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits(); - - // Do not allow integer types smaller than a byte or types whose widths are - // not a multiple of a byte. - if (BitWidth < 8 || InBitWidth < 8 || - BitWidth % 8 != 0 || InBitWidth % 8 != 0) - return false; - } - - // Pick the largest of the two vector types. - ScalarKind = Vector; - if (InBitWidth > BitWidth) - VectorTy = VInTy; - - return true; + return false; } /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all @@ -480,7 +417,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (LoadInst *LI = dyn_cast<LoadInst>(User)) { // Don't break volatile loads. - if (LI->isVolatile()) + if (!LI->isSimple()) return false; // Don't touch MMX operations. if (LI->getType()->isX86_MMXTy()) @@ -492,7 +429,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Storing the pointer, not into the value? - if (SI->getOperand(0) == V || SI->isVolatile()) return false; + if (SI->getOperand(0) == V || !SI->isSimple()) return false; // Don't touch MMX operations. if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; @@ -502,7 +439,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { } if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { - IsNotTrivial = true; // Can't be mem2reg'd. + if (!onlyUsedByLifetimeMarkers(BCI)) + IsNotTrivial = true; // Can't be mem2reg'd. if (!CanConvertToScalar(BCI, Offset)) return false; continue; @@ -516,7 +454,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); + Indices); // See if all uses can be converted. if (!CanConvertToScalar(GEP, Offset+GEPOffset)) return false; @@ -560,6 +498,14 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { continue; } + // If this is a lifetime intrinsic, we can handle it. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + continue; + } + } + // Otherwise, we cannot handle this! return false; } @@ -589,7 +535,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); + Indices); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); continue; @@ -599,7 +545,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (LoadInst *LI = dyn_cast<LoadInst>(User)) { // The load is a bit extract from NewAI shifted right by Offset bits. - Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); + Value *LoadedVal = Builder.CreateLoad(NewAI); Value *NewLoadVal = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); LI->replaceAllUsesWith(NewLoadVal); @@ -668,8 +614,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // pointer (bitcasted), then a store to our new alloca. assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); Value *SrcPtr = MTI->getSource(); - const PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); - const PointerType* AIPTy = cast<PointerType>(NewAI->getType()); + PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); + PointerType* AIPTy = cast<PointerType>(NewAI->getType()); if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) { AIPTy = PointerType::get(AIPTy->getElementType(), SPTy->getAddressSpace()); @@ -685,8 +631,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); - const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); - const PointerType* AIPTy = cast<PointerType>(NewAI->getType()); + PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); + PointerType* AIPTy = cast<PointerType>(NewAI->getType()); if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) { AIPTy = PointerType::get(AIPTy->getElementType(), DPTy->getAddressSpace()); @@ -703,65 +649,18 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, continue; } - llvm_unreachable("Unsupported operation!"); - } -} + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + // There's no need to preserve these, as the resulting alloca will be + // converted to a register anyways. + II->eraseFromParent(); + continue; + } + } -/// getScaledElementType - Gets a scaled element type for a partial vector -/// access of an alloca. The input types must be integer or floating-point -/// scalar or vector types, and the resulting type is an integer, float or -/// double. -static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2, - unsigned NewBitWidth) { - bool IsFP1 = Ty1->isFloatingPointTy() || - (Ty1->isVectorTy() && - cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy()); - bool IsFP2 = Ty2->isFloatingPointTy() || - (Ty2->isVectorTy() && - cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy()); - - LLVMContext &Context = Ty1->getContext(); - - // Prefer floating-point types over integer types, as integer types may have - // been created by earlier scalar replacement. - if (IsFP1 || IsFP2) { - if (NewBitWidth == 32) - return Type::getFloatTy(Context); - if (NewBitWidth == 64) - return Type::getDoubleTy(Context); + llvm_unreachable("Unsupported operation!"); } - - return Type::getIntNTy(Context, NewBitWidth); -} - -/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector -/// to another vector of the same element type which has the same allocation -/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>). -static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType, - IRBuilder<> &Builder) { - const Type *FromType = FromVal->getType(); - const VectorType *FromVTy = cast<VectorType>(FromType); - const VectorType *ToVTy = cast<VectorType>(ToType); - assert((ToVTy->getElementType() == FromVTy->getElementType()) && - "Vectors must have the same element type"); - Value *UnV = UndefValue::get(FromType); - unsigned numEltsFrom = FromVTy->getNumElements(); - unsigned numEltsTo = ToVTy->getNumElements(); - - SmallVector<Constant*, 3> Args; - const Type* Int32Ty = Builder.getInt32Ty(); - unsigned minNumElts = std::min(numEltsFrom, numEltsTo); - unsigned i; - for (i=0; i != minNumElts; ++i) - Args.push_back(ConstantInt::get(Int32Ty, i)); - - if (i < numEltsTo) { - Constant* UnC = UndefValue::get(Int32Ty); - for (; i != numEltsTo; ++i) - Args.push_back(UnC); - } - Constant *Mask = ConstantVector::get(Args); - return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV"); } /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer @@ -775,50 +674,20 @@ static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType, /// Offset is an offset from the original alloca, in bits that need to be /// shifted to the right. Value *ConvertToScalarInfo:: -ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, +ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, uint64_t Offset, IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. - const Type *FromType = FromVal->getType(); + Type *FromType = FromVal->getType(); if (FromType == ToType && Offset == 0) return FromVal; // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) { + if (VectorType *VTy = dyn_cast<VectorType>(FromType)) { unsigned FromTypeSize = TD.getTypeAllocSize(FromType); unsigned ToTypeSize = TD.getTypeAllocSize(ToType); - if (FromTypeSize == ToTypeSize) { - // If the two types have the same primitive size, use a bit cast. - // Otherwise, it is two vectors with the same element type that has - // the same allocation size but different number of elements so use - // a shuffle vector. - if (FromType->getPrimitiveSizeInBits() == - ToType->getPrimitiveSizeInBits()) - return Builder.CreateBitCast(FromVal, ToType, "tmp"); - else - return CreateShuffleVectorCast(FromVal, ToType, Builder); - } - - if (isPowerOf2_64(FromTypeSize / ToTypeSize)) { - assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " - "of a smaller vector type at a nonzero offset."); - - const Type *CastElementTy = getScaledElementType(FromType, ToType, - ToTypeSize * 8); - unsigned NumCastVectorElements = FromTypeSize / ToTypeSize; - - LLVMContext &Context = FromVal->getContext(); - const Type *CastTy = VectorType::get(CastElementTy, - NumCastVectorElements); - Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp"); - - unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); - unsigned Elt = Offset/EltSize; - assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); - Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get( - Type::getInt32Ty(Context), Elt), "tmp"); - return Builder.CreateBitCast(Extract, ToType, "tmp"); - } + if (FromTypeSize == ToTypeSize) + return Builder.CreateBitCast(FromVal, ToType); // Otherwise it must be an element access. unsigned Elt = 0; @@ -828,40 +697,39 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); } // Return the element extracted out of it. - Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( - Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); + Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt)); if (V->getType() != ToType) - V = Builder.CreateBitCast(V, ToType, "tmp"); + V = Builder.CreateBitCast(V, ToType); return V; } // If ToType is a first class aggregate, extract out each of the pieces and // use insertvalue's to form the FCA. - if (const StructType *ST = dyn_cast<StructType>(ToType)) { + if (StructType *ST = dyn_cast<StructType>(ToType)) { const StructLayout &Layout = *TD.getStructLayout(ST); Value *Res = UndefValue::get(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), Offset+Layout.getElementOffsetInBits(i), Builder); - Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; } - if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { + if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) { uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), Offset+i*EltSize, Builder); - Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; } // Otherwise, this must be a union that was converted to an integer value. - const IntegerType *NTy = cast<IntegerType>(FromVal->getType()); + IntegerType *NTy = cast<IntegerType>(FromVal->getType()); // If this is a big-endian system and the load is narrower than the // full alloca type, we need to do a shift to get the right bits. @@ -881,33 +749,31 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // only some bits are used. if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) FromVal = Builder.CreateLShr(FromVal, - ConstantInt::get(FromVal->getType(), - ShAmt), "tmp"); + ConstantInt::get(FromVal->getType(), ShAmt)); else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) FromVal = Builder.CreateShl(FromVal, - ConstantInt::get(FromVal->getType(), - -ShAmt), "tmp"); + ConstantInt::get(FromVal->getType(), -ShAmt)); // Finally, unconditionally truncate the integer to the right width. unsigned LIBitWidth = TD.getTypeSizeInBits(ToType); if (LIBitWidth < NTy->getBitWidth()) FromVal = Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), - LIBitWidth), "tmp"); + LIBitWidth)); else if (LIBitWidth > NTy->getBitWidth()) FromVal = Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), - LIBitWidth), "tmp"); + LIBitWidth)); // If the result is an integer, this is a trunc or bitcast. if (ToType->isIntegerTy()) { // Should be done. } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { // Just do a bitcast, we know the sizes match up. - FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); + FromVal = Builder.CreateBitCast(FromVal, ToType); } else { // Otherwise must be a pointer. - FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); + FromVal = Builder.CreateIntToPtr(FromVal, ToType); } assert(FromVal->getType() == ToType && "Didn't convert right?"); return FromVal; @@ -927,65 +793,30 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, uint64_t Offset, IRBuilder<> &Builder) { // Convert the stored type to the actual type, shift it left to insert // then 'or' into place. - const Type *AllocaType = Old->getType(); + Type *AllocaType = Old->getType(); LLVMContext &Context = Old->getContext(); - if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { + if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy); uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType()); // Changing the whole vector with memset or with an access of a different // vector type? - if (ValSize == VecSize) { - // If the two types have the same primitive size, use a bit cast. - // Otherwise, it is two vectors with the same element type that has - // the same allocation size but different number of elements so use - // a shuffle vector. - if (VTy->getPrimitiveSizeInBits() == - SV->getType()->getPrimitiveSizeInBits()) - return Builder.CreateBitCast(SV, AllocaType, "tmp"); - else - return CreateShuffleVectorCast(SV, VTy, Builder); - } - - if (isPowerOf2_64(VecSize / ValSize)) { - assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a " - "value of a smaller vector type at a nonzero offset."); - - const Type *CastElementTy = getScaledElementType(VTy, SV->getType(), - ValSize); - unsigned NumCastVectorElements = VecSize / ValSize; - - LLVMContext &Context = SV->getContext(); - const Type *OldCastTy = VectorType::get(CastElementTy, - NumCastVectorElements); - Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp"); - - Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp"); - - unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); - unsigned Elt = Offset/EltSize; - assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); - Value *Insert = - Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get( - Type::getInt32Ty(Context), Elt), "tmp"); - return Builder.CreateBitCast(Insert, AllocaType, "tmp"); - } + if (ValSize == VecSize) + return Builder.CreateBitCast(SV, AllocaType); // Must be an element insertion. assert(SV->getType() == VTy->getElementType()); uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); unsigned Elt = Offset/EltSize; - return Builder.CreateInsertElement(Old, SV, - ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), - "tmp"); + return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt)); } // If SV is a first-class aggregate value, insert each value recursively. - if (const StructType *ST = dyn_cast<StructType>(SV->getType())) { + if (StructType *ST = dyn_cast<StructType>(SV->getType())) { const StructLayout &Layout = *TD.getStructLayout(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { - Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+Layout.getElementOffsetInBits(i), Builder); @@ -993,10 +824,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, return Old; } - if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { + if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { - Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); } return Old; @@ -1009,20 +840,19 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType()); unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType); if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) - SV = Builder.CreateBitCast(SV, - IntegerType::get(SV->getContext(),SrcWidth), "tmp"); + SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth)); else if (SV->getType()->isPointerTy()) - SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp"); + SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext())); // Zero extend or truncate the value if needed. if (SV->getType() != AllocaType) { if (SV->getType()->getPrimitiveSizeInBits() < AllocaType->getPrimitiveSizeInBits()) - SV = Builder.CreateZExt(SV, AllocaType, "tmp"); + SV = Builder.CreateZExt(SV, AllocaType); else { // Truncation may be needed if storing more than the alloca can hold // (undefined behavior). - SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); + SV = Builder.CreateTrunc(SV, AllocaType); SrcWidth = DestWidth; SrcStoreWidth = DestStoreWidth; } @@ -1045,12 +875,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, // only some bits in the structure are set. APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { - SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), - ShAmt), "tmp"); + SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt)); Mask <<= ShAmt; } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { - SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), - -ShAmt), "tmp"); + SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt)); Mask = Mask.lshr(-ShAmt); } @@ -1196,7 +1024,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); UI != UE; ++UI) { LoadInst *LI = dyn_cast<LoadInst>(*UI); - if (LI == 0 || LI->isVolatile()) return false; + if (LI == 0 || !LI->isSimple()) return false; // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. @@ -1237,7 +1065,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); UI != UE; ++UI) { LoadInst *LI = dyn_cast<LoadInst>(*UI); - if (LI == 0 || LI->isVolatile()) return false; + if (LI == 0 || !LI->isSimple()) return false; // For now we only allow loads in the same block as the PHI. This is a // common case that happens when instcombine merges two loads through a PHI. @@ -1258,17 +1086,21 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { // trapping load in the predecessor if it is a critical edge. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = PN->getIncomingBlock(i); + Value *InVal = PN->getIncomingValue(i); + + // If the terminator of the predecessor has side-effects (an invoke), + // there is no safe place to put a load in the predecessor. + if (Pred->getTerminator()->mayHaveSideEffects()) + return false; + + // If the value is produced by the terminator of the predecessor + // (an invoke), there is no valid place to put a load in the predecessor. + if (Pred->getTerminator() == InVal) + return false; // If the predecessor has a single successor, then the edge isn't critical. if (Pred->getTerminator()->getNumSuccessors() == 1) continue; - - Value *InVal = PN->getIncomingValue(i); - - // If the InVal is an invoke in the pred, we can't put a load on the edge. - if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) - if (II->getParent() == Pred) - return false; // If this pointer is always safe to load, or if we can prove that there is // already a load in the block, then we can move the load to the pred block. @@ -1295,13 +1127,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { UI != UE; ++UI) { User *U = *UI; if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - if (LI->isVolatile()) + if (!LI->isSimple()) return false; continue; } if (StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (SI->getOperand(0) == AI || SI->isVolatile()) + if (SI->getOperand(0) == AI || !SI->isSimple()) return false; // Don't allow a store OF the AI, only INTO the AI. continue; } @@ -1343,6 +1175,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { continue; } + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (onlyUsedByLifetimeMarkers(BCI)) { + InstsToRewrite.insert(BCI); + continue; + } + } + return false; } @@ -1354,6 +1193,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { // If we have instructions that need to be rewritten for this to be promotable // take care of it now. for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) { + // This could only be a bitcast used by nothing but lifetime intrinsics. + for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end(); + I != E;) { + Use &U = I.getUse(); + ++I; + cast<Instruction>(U.getUser())->eraseFromParent(); + } + BCI->eraseFromParent(); + continue; + } + if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) { // Selects in InstsToRewrite only have load uses. Rewrite each as two // loads with a new select. @@ -1393,7 +1244,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { continue; } - const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); + Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), PN->getName()+".ld", PN); @@ -1483,13 +1334,13 @@ bool SROA::performPromotion(Function &F) { /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for /// SROA. It must be a struct or array type with a small number of elements. static bool ShouldAttemptScalarRepl(AllocaInst *AI) { - const Type *T = AI->getAllocatedType(); + Type *T = AI->getAllocatedType(); // Do not promote any struct into more than 32 separate vars. - if (const StructType *ST = dyn_cast<StructType>(T)) + if (StructType *ST = dyn_cast<StructType>(T)) return ST->getNumElements() <= 32; // Arrays are much less likely to be safe for SROA; only consider // them if they are very small. - if (const ArrayType *AT = dyn_cast<ArrayType>(T)) + if (ArrayType *AT = dyn_cast<ArrayType>(T)) return AT->getNumElements() <= 8; return false; } @@ -1594,7 +1445,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList) { DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); SmallVector<AllocaInst*, 32> ElementAllocas; - if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { + if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, @@ -1604,9 +1455,9 @@ void SROA::DoScalarReplacement(AllocaInst *AI, WorkList.push_back(NA); // Add to worklist for recursive processing } } else { - const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); + ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); ElementAllocas.reserve(AT->getNumElements()); - const Type *ElTy = AT->getElementType(); + Type *ElTy = AT->getElementType(); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), AI->getName() + "." + Twine(i), AI); @@ -1670,22 +1521,26 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, UI.getOperandNo() == 0, Info, MI, true /*AllowWholeAccess*/); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - if (LI->isVolatile()) + if (!LI->isSimple()) return MarkUnsafe(Info, User); - const Type *LIType = LI->getType(); + Type *LIType = LI->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), LIType, false, Info, LI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Store is ok if storing INTO the pointer, not storing the pointer - if (SI->isVolatile() || SI->getOperand(0) == I) + if (!SI->isSimple() || SI->getOperand(0) == I) return MarkUnsafe(Info, User); - const Type *SIType = SI->getOperand(0)->getType(); + Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), SIType, true, Info, SI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return MarkUnsafe(Info, User); } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { isSafePHISelectUseForScalarRepl(User, Offset, Info); } else { @@ -1725,19 +1580,19 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, return MarkUnsafe(Info, User); isSafePHISelectUseForScalarRepl(GEPI, Offset, Info); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - if (LI->isVolatile()) + if (!LI->isSimple()) return MarkUnsafe(Info, User); - const Type *LIType = LI->getType(); + Type *LIType = LI->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), LIType, false, Info, LI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Store is ok if storing INTO the pointer, not storing the pointer - if (SI->isVolatile() || SI->getOperand(0) == I) + if (!SI->isSimple() || SI->getOperand(0) == I) return MarkUnsafe(Info, User); - const Type *SIType = SI->getOperand(0)->getType(); + Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), SIType, true, Info, SI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; @@ -1776,8 +1631,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, // Compute the offset due to this GEP and check if the alloca has a // component element at that offset. SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0)) MarkUnsafe(Info, GEPI); } @@ -1786,14 +1640,14 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, /// elements of the same type (which is always true for arrays). If so, /// return true with NumElts and EltTy set to the number of elements and the /// element type, respectively. -static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts, - const Type *&EltTy) { - if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { +static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, + Type *&EltTy) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) { NumElts = AT->getNumElements(); EltTy = (NumElts == 0 ? 0 : AT->getElementType()); return true; } - if (const StructType *ST = dyn_cast<StructType>(T)) { + if (StructType *ST = dyn_cast<StructType>(T)) { NumElts = ST->getNumContainedTypes(); EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0)); for (unsigned n = 1; n < NumElts; ++n) { @@ -1807,12 +1661,12 @@ static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts, /// isCompatibleAggregate - Check if T1 and T2 are either the same type or are /// "homogeneous" aggregates with the same element type and number of elements. -static bool isCompatibleAggregate(const Type *T1, const Type *T2) { +static bool isCompatibleAggregate(Type *T1, Type *T2) { if (T1 == T2) return true; unsigned NumElts1, NumElts2; - const Type *EltTy1, *EltTy2; + Type *EltTy1, *EltTy2; if (isHomogeneousAggregate(T1, NumElts1, EltTy1) && isHomogeneousAggregate(T2, NumElts2, EltTy2) && NumElts1 == NumElts2 && @@ -1830,7 +1684,7 @@ static bool isCompatibleAggregate(const Type *T1, const Type *T2) { /// If AllowWholeAccess is true, then this allows uses of the entire alloca as a /// unit. If false, it only allows accesses known to be in a single element. void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, - const Type *MemOpType, bool isStore, + Type *MemOpType, bool isStore, AllocaInfo &Info, Instruction *TheAccess, bool AllowWholeAccess) { // Check if this is a load/store of the entire alloca. @@ -1857,7 +1711,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, } } // Check if the offset/size correspond to a component within the alloca type. - const Type *T = Info.AI->getAllocatedType(); + Type *T = Info.AI->getAllocatedType(); if (TypeHasComponent(T, Offset, MemSize)) { Info.hasSubelementAccess = true; return; @@ -1868,16 +1722,16 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, /// TypeHasComponent - Return true if T has a component type with the /// specified offset and size. If Size is zero, do not check the size. -bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { - const Type *EltTy; +bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { + Type *EltTy; uint64_t EltSize; - if (const StructType *ST = dyn_cast<StructType>(T)) { + if (StructType *ST = dyn_cast<StructType>(T)) { const StructLayout *Layout = TD->getStructLayout(ST); unsigned EltIdx = Layout->getElementContainingOffset(Offset); EltTy = ST->getContainedType(EltIdx); EltSize = TD->getTypeAllocSize(EltTy); Offset -= Layout->getElementOffset(EltIdx); - } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { + } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { EltTy = AT->getElementType(); EltSize = TD->getTypeAllocSize(EltTy); if (Offset >= AT->getNumElements() * EltSize) @@ -1924,9 +1778,17 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // address operand will be updated, so nothing else needs to be done. continue; } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + RewriteLifetimeIntrinsic(II, AI, Offset, NewElts); + } + continue; + } if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - const Type *LIType = LI->getType(); + Type *LIType = LI->getType(); if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { // Replace: @@ -1956,7 +1818,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, if (StoreInst *SI = dyn_cast<StoreInst>(User)) { Value *Val = SI->getOperand(0); - const Type *SIType = Val->getType(); + Type *SIType = Val->getType(); if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { // Replace: // store { i32, i32 } %val, { i32, i32 }* %alloc @@ -2026,10 +1888,10 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, /// Sets T to the type of the element and Offset to the offset within that /// element. IdxTy is set to the type of the index result to be used in a /// GEP instruction. -uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, - const Type *&IdxTy) { +uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, + Type *&IdxTy) { uint64_t Idx = 0; - if (const StructType *ST = dyn_cast<StructType>(T)) { + if (StructType *ST = dyn_cast<StructType>(T)) { const StructLayout *Layout = TD->getStructLayout(ST); Idx = Layout->getElementContainingOffset(Offset); T = ST->getContainedType(Idx); @@ -2037,7 +1899,7 @@ uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, IdxTy = Type::getInt32Ty(T->getContext()); return Idx; } - const ArrayType *AT = cast<ArrayType>(T); + ArrayType *AT = cast<ArrayType>(T); T = AT->getElementType(); uint64_t EltSize = TD->getTypeAllocSize(T); Idx = Offset / EltSize; @@ -2053,13 +1915,12 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts) { uint64_t OldOffset = Offset; SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); RewriteForScalarRepl(GEPI, AI, Offset, NewElts); - const Type *T = AI->getAllocatedType(); - const Type *IdxTy; + Type *T = AI->getAllocatedType(); + Type *IdxTy; uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); if (GEPI->getOperand(0) == AI) OldIdx = ~0ULL; // Force the GEP to be rewritten. @@ -2073,7 +1934,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, if (Idx == OldIdx) return; - const Type *i32Ty = Type::getInt32Ty(AI->getContext()); + Type *i32Ty = Type::getInt32Ty(AI->getContext()); SmallVector<Value*, 8> NewArgs; NewArgs.push_back(Constant::getNullValue(i32Ty)); while (EltOffset != 0) { @@ -2082,8 +1943,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, } Instruction *Val = NewElts[Idx]; if (NewArgs.size() > 1) { - Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), - NewArgs.end(), "", GEPI); + Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI); Val->takeName(GEPI); } if (Val->getType() != GEPI->getType()) @@ -2092,6 +1952,62 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, DeadInsts.push_back(GEPI); } +/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it +/// to mark the lifetime of the scalarized memory. +void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, + uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts) { + ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0)); + // Put matching lifetime markers on everything from Offset up to + // Offset+OldSize. + Type *AIType = AI->getAllocatedType(); + uint64_t NewOffset = Offset; + Type *IdxTy; + uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy); + + IRBuilder<> Builder(II); + uint64_t Size = OldSize->getLimitedValue(); + + if (NewOffset) { + // Splice the first element and index 'NewOffset' bytes in. SROA will + // split the alloca again later. + Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy()); + V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); + + IdxTy = NewElts[Idx]->getAllocatedType(); + uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset; + if (EltSize > Size) { + EltSize = Size; + Size = 0; + } else { + Size -= EltSize; + } + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize)); + else + Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize)); + ++Idx; + } + + for (; Idx != NewElts.size() && Size; ++Idx) { + IdxTy = NewElts[Idx]->getAllocatedType(); + uint64_t EltSize = TD->getTypeAllocSize(IdxTy); + if (EltSize > Size) { + EltSize = Size; + Size = 0; + } else { + Size -= EltSize; + } + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Builder.CreateLifetimeStart(NewElts[Idx], + Builder.getInt64(EltSize)); + else + Builder.CreateLifetimeEnd(NewElts[Idx], + Builder.getInt64(EltSize)); + } + DeadInsts.push_back(II); +} + /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, @@ -2139,7 +2055,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If the pointer is not the right type, insert a bitcast to the right // type. - const Type *NewTy = + Type *NewTy = PointerType::get(AI->getType()->getElementType(), AddrSpace); if (OtherPtr->getType() != NewTy) @@ -2159,16 +2075,16 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, + OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, OtherPtr->getName()+"."+Twine(i), MI); uint64_t EltOffset; - const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); - const Type *OtherTy = OtherPtrTy->getElementType(); - if (const StructType *ST = dyn_cast<StructType>(OtherTy)) { + PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); + Type *OtherTy = OtherPtrTy->getElementType(); + if (StructType *ST = dyn_cast<StructType>(OtherTy)) { EltOffset = TD->getStructLayout(ST)->getElementOffset(i); } else { - const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); + Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); EltOffset = TD->getTypeAllocSize(EltTy)*i; } @@ -2181,7 +2097,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } Value *EltPtr = NewElts[i]; - const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); + Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); // If we got down to a scalar, insert a load or store as appropriate. if (EltTy->isSingleValueType()) { @@ -2207,7 +2123,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { // If EltTy is a vector type, get the element type. - const Type *ValTy = EltTy->getScalarType(); + Type *ValTy = EltTy->getScalarType(); // Construct an integer with the right value. unsigned EltSize = TD->getTypeSizeInBits(ValTy); @@ -2228,8 +2144,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, assert(StoreVal->getType() == ValTy && "Type mismatch!"); // If the requested value was a vector constant, create it. - if (EltTy != ValTy) { - unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); + if (EltTy->isVectorTy()) { + unsigned NumElts = cast<VectorType>(EltTy)->getNumElements(); SmallVector<Constant*, 16> Elts(NumElts, StoreVal); StoreVal = ConstantVector::get(Elts); } @@ -2271,7 +2187,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getAllocatedType(); + Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); IRBuilder<> Builder(SI); @@ -2286,12 +2202,12 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. - if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { + if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { const StructLayout *Layout = TD->getStructLayout(EltSTy); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Get the number of bits to shift SrcVal to get the value. - const Type *FieldTy = EltSTy->getElementType(i); + Type *FieldTy = EltSTy->getElementType(i); uint64_t Shift = Layout->getElementOffsetInBits(i); if (TD->isBigEndian()) @@ -2327,8 +2243,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, } } else { - const ArrayType *ATy = cast<ArrayType>(AllocaEltTy); - const Type *ArrayEltTy = ATy->getElementType(); + ArrayType *ATy = cast<ArrayType>(AllocaEltTy); + Type *ArrayEltTy = ATy->getElementType(); uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); @@ -2384,7 +2300,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getAllocatedType(); + Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI @@ -2394,10 +2310,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // have different ways to compute the element offset. const StructLayout *Layout = 0; uint64_t ArrayEltBitOffset = 0; - if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { + if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { Layout = TD->getStructLayout(EltSTy); } else { - const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); + Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); } @@ -2408,14 +2324,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // Load the value from the alloca. If the NewElt is an aggregate, cast // the pointer to an integer of the same size before doing the load. Value *SrcField = NewElts[i]; - const Type *FieldTy = + Type *FieldTy = cast<PointerType>(SrcField->getType())->getElementType(); uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; - const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), + IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), FieldSizeBits); if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && !FieldTy->isVectorTy()) @@ -2468,14 +2384,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, /// HasPadding - Return true if the specified type has any structure or /// alignment padding in between the elements that would be split apart /// by SROA; return false otherwise. -static bool HasPadding(const Type *Ty, const TargetData &TD) { - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { +static bool HasPadding(Type *Ty, const TargetData &TD) { + if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Ty = ATy->getElementType(); return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); } // SROA currently handles only Arrays and Structs. - const StructType *STy = cast<StructType>(Ty); + StructType *STy = cast<StructType>(Ty); const StructLayout *SL = TD.getStructLayout(STy); unsigned PrevFieldBitOffset = 0; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { @@ -2530,7 +2446,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // and fusion code. if (!Info.hasSubelementAccess && Info.hasALoadOrStore) { // If the struct/array just has one element, use basic SRoA. - if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { + if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { if (ST->getNumElements() > 1) return false; } else { if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1) @@ -2576,7 +2492,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (LoadInst *LI = dyn_cast<LoadInst>(U)) { // Ignore non-volatile loads, they are always ok. - if (LI->isVolatile()) return false; + if (!LI->isSimple()) return false; continue; } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 7c415e5..fbb9465 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -134,7 +134,7 @@ namespace { struct StrCatOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcat" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || @@ -184,7 +184,7 @@ struct StrCatOpt : public LibCallOptimization { struct StrNCatOpt : public StrCatOpt { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strncat" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || @@ -232,7 +232,7 @@ struct StrNCatOpt : public StrCatOpt { struct StrChrOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strchr" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || @@ -282,7 +282,7 @@ struct StrChrOpt : public LibCallOptimization { struct StrRChrOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strrchr" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || @@ -323,7 +323,7 @@ struct StrRChrOpt : public LibCallOptimization { struct StrCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcmp" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || @@ -338,16 +338,17 @@ struct StrCmpOpt : public LibCallOptimization { bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); - if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x - return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); - - if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x - return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - // strcmp(x, y) -> cnst (if both x and y are constant strings) if (HasStr1 && HasStr2) return ConstantInt::get(CI->getType(), - strcmp(Str1.c_str(),Str2.c_str())); + StringRef(Str1).compare(Str2)); + + if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); // strcmp(P, "x") -> memcmp(P, "x", 2) uint64_t Len1 = GetStringLength(Str1P); @@ -371,7 +372,7 @@ struct StrCmpOpt : public LibCallOptimization { struct StrNCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strncmp" function prototype. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || @@ -400,16 +401,20 @@ struct StrNCmpOpt : public LibCallOptimization { bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); - if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x - return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); + // strncmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) { + StringRef SubStr1 = StringRef(Str1).substr(0, Length); + StringRef SubStr2 = StringRef(Str2).substr(0, Length); + return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2)); + } + + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - // strncmp(x, y) -> cnst (if both x and y are constant strings) - if (HasStr1 && HasStr2) - return ConstantInt::get(CI->getType(), - strncmp(Str1.c_str(), Str2.c_str(), Length)); return 0; } }; @@ -426,7 +431,7 @@ struct StrCpyOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcpy" function prototype. unsigned NumParams = OptChkCall ? 3 : 2; - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != NumParams || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || @@ -462,7 +467,7 @@ struct StrCpyOpt : public LibCallOptimization { struct StrNCpyOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy() || @@ -511,7 +516,7 @@ struct StrNCpyOpt : public LibCallOptimization { struct StrLenOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || FT->getParamType(0) != B.getInt8PtrTy() || !FT->getReturnType()->isIntegerTy()) @@ -537,7 +542,7 @@ struct StrLenOpt : public LibCallOptimization { struct StrPBrkOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || @@ -575,7 +580,7 @@ struct StrPBrkOpt : public LibCallOptimization { struct StrToOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy()) @@ -597,7 +602,7 @@ struct StrToOpt : public LibCallOptimization { struct StrSpnOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || @@ -626,7 +631,7 @@ struct StrSpnOpt : public LibCallOptimization { struct StrCSpnOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || @@ -658,7 +663,7 @@ struct StrCSpnOpt : public LibCallOptimization { struct StrStrOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || @@ -722,7 +727,7 @@ struct StrStrOpt : public LibCallOptimization { struct MemCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy(32)) @@ -773,7 +778,7 @@ struct MemCpyOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || @@ -795,7 +800,7 @@ struct MemMoveOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || @@ -817,7 +822,7 @@ struct MemSetOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || @@ -840,7 +845,7 @@ struct MemSetOpt : public LibCallOptimization { struct PowOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || @@ -874,8 +879,8 @@ struct PowOpt : public LibCallOptimization { Callee->getAttributes()); Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B, Callee->getAttributes()); - Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp"); - Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp"); + Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); + Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); return Sel; } @@ -895,7 +900,7 @@ struct PowOpt : public LibCallOptimization { struct Exp2Opt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || @@ -908,10 +913,10 @@ struct Exp2Opt : public LibCallOptimization { Value *LdExpArg = 0; if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp"); + LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp"); + LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); } if (LdExpArg) { @@ -946,7 +951,7 @@ struct Exp2Opt : public LibCallOptimization { struct UnaryDoubleFPOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || !FT->getParamType(0)->isDoubleTy()) return 0; @@ -973,7 +978,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { struct FFSOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 1 || @@ -996,10 +1001,10 @@ struct FFSOpt : public LibCallOptimization { Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType); Value *V = B.CreateCall(F, Op, "cttz"); - V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp"); - V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp"); + V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); + V = B.CreateIntCast(V, B.getInt32Ty(), false); - Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType)); return B.CreateSelect(Cond, V, B.getInt32(0)); } }; @@ -1009,7 +1014,7 @@ struct FFSOpt : public LibCallOptimization { struct IsDigitOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || !FT->getParamType(0)->isIntegerTy(32)) @@ -1028,7 +1033,7 @@ struct IsDigitOpt : public LibCallOptimization { struct IsAsciiOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || !FT->getParamType(0)->isIntegerTy(32)) @@ -1046,7 +1051,7 @@ struct IsAsciiOpt : public LibCallOptimization { struct AbsOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // We require integer(integer) where the types agree. if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || FT->getParamType(0) != FT->getReturnType()) @@ -1067,7 +1072,7 @@ struct AbsOpt : public LibCallOptimization { struct ToAsciiOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); // We require i32(i32) if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isIntegerTy(32)) @@ -1147,7 +1152,7 @@ struct PrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require one fixed pointer argument and an integer/void result. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) @@ -1241,7 +1246,7 @@ struct SPrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require two fixed pointer arguments and an integer result. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) @@ -1272,7 +1277,7 @@ struct SPrintFOpt : public LibCallOptimization { struct FWriteOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require a pointer, an integer, an integer, a pointer, returning integer. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || !FT->getParamType(2)->isIntegerTy() || @@ -1310,7 +1315,7 @@ struct FPutsOpt : public LibCallOptimization { if (!TD) return 0; // Require two pointers. Also, we can't optimize if return value is used. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !CI->use_empty()) @@ -1379,7 +1384,7 @@ struct FPrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require two fixed paramters as pointers and integer result. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) @@ -1410,7 +1415,7 @@ struct FPrintFOpt : public LibCallOptimization { struct PutsOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require one fixed pointer argument and an integer/void result. - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) @@ -1685,7 +1690,7 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { - const FunctionType *FTy = F.getFunctionType(); + FunctionType *FTy = F.getFunctionType(); StringRef Name = F.getName(); switch (Name[0]) { diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index 705f442..c83f56c 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -153,9 +153,13 @@ bool Sinking::ProcessBlock(BasicBlock &BB) { static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, SmallPtrSet<Instruction *, 8> &Stores) { - if (LoadInst *L = dyn_cast<LoadInst>(Inst)) { - if (L->isVolatile()) return false; + if (Inst->mayWriteToMemory()) { + Stores.insert(Inst); + return false; + } + + if (LoadInst *L = dyn_cast<LoadInst>(Inst)) { AliasAnalysis::Location Loc = AA->getLocation(L); for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(), E = Stores.end(); I != E; ++I) @@ -163,11 +167,6 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, return false; } - if (Inst->mayWriteToMemory()) { - Stores.insert(Inst); - return false; - } - if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst)) return false; diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp deleted file mode 100644 index 9dd83c0..0000000 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ /dev/null @@ -1,373 +0,0 @@ -//===- TailDuplication.cpp - Simplify CFG through tail duplication --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass performs a limited form of tail duplication, intended to simplify -// CFGs by removing some unconditional branches. This pass is necessary to -// straighten out loops created by the C front-end, but also is capable of -// making other code nicer. After this pass is run, the CFG simplify pass -// should be run to clean up the mess. -// -// This pass could be enhanced in the future to use profile information to be -// more aggressive. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "tailduplicate" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Constant.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" -#include <map> -using namespace llvm; - -STATISTIC(NumEliminated, "Number of unconditional branches eliminated"); - -static cl::opt<unsigned> -TailDupThreshold("taildup-threshold", - cl::desc("Max block size to tail duplicate"), - cl::init(1), cl::Hidden); - -namespace { - class TailDup : public FunctionPass { - bool runOnFunction(Function &F); - public: - static char ID; // Pass identification, replacement for typeid - TailDup() : FunctionPass(ID) { - initializeTailDupPass(*PassRegistry::getPassRegistry()); - } - - private: - inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned); - inline void eliminateUnconditionalBranch(BranchInst *BI); - SmallPtrSet<BasicBlock*, 4> CycleDetector; - }; -} - -char TailDup::ID = 0; -INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false) - -// Public interface to the Tail Duplication pass -FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); } - -/// runOnFunction - Top level algorithm - Loop over each unconditional branch in -/// the function, eliminating it if it looks attractive enough. CycleDetector -/// prevents infinite loops by checking that we aren't redirecting a branch to -/// a place it already pointed to earlier; see PR 2323. -bool TailDup::runOnFunction(Function &F) { - bool Changed = false; - CycleDetector.clear(); - for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - if (shouldEliminateUnconditionalBranch(I->getTerminator(), - TailDupThreshold)) { - eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator())); - Changed = true; - } else { - ++I; - CycleDetector.clear(); - } - } - return Changed; -} - -/// shouldEliminateUnconditionalBranch - Return true if this branch looks -/// attractive to eliminate. We eliminate the branch if the destination basic -/// block has <= 5 instructions in it, not counting PHI nodes. In practice, -/// since one of these is a terminator instruction, this means that we will add -/// up to 4 instructions to the new block. -/// -/// We don't count PHI nodes in the count since they will be removed when the -/// contents of the block are copied over. -/// -bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI, - unsigned Threshold) { - BranchInst *BI = dyn_cast<BranchInst>(TI); - if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch! - - BasicBlock *Dest = BI->getSuccessor(0); - if (Dest == BI->getParent()) return false; // Do not loop infinitely! - - // Do not inline a block if we will just get another branch to the same block! - TerminatorInst *DTI = Dest->getTerminator(); - if (BranchInst *DBI = dyn_cast<BranchInst>(DTI)) - if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest) - return false; // Do not loop infinitely! - - // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack, - // because doing so would require breaking critical edges. This should be - // fixed eventually. - if (!DTI->use_empty()) - return false; - - // Do not bother with blocks with only a single predecessor: simplify - // CFG will fold these two blocks together! - pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest); - ++PI; - if (PI == PE) return false; // Exactly one predecessor! - - BasicBlock::iterator I = Dest->getFirstNonPHI(); - - for (unsigned Size = 0; I != Dest->end(); ++I) { - if (Size == Threshold) return false; // The block is too large. - - // Don't tail duplicate call instructions. They are very large compared to - // other instructions. - if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false; - - // Also alloca and malloc. - if (isa<AllocaInst>(I)) return false; - - // Some vector instructions can expand into a number of instructions. - if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) || - isa<InsertElementInst>(I)) return false; - - // Only count instructions that are not debugger intrinsics. - if (!isa<DbgInfoIntrinsic>(I)) ++Size; - } - - // Do not tail duplicate a block that has thousands of successors into a block - // with a single successor if the block has many other predecessors. This can - // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in - // cases that have a large number of indirect gotos. - unsigned NumSuccs = DTI->getNumSuccessors(); - if (NumSuccs > 8) { - unsigned TooMany = 128; - if (NumSuccs >= TooMany) return false; - TooMany = TooMany/NumSuccs; - for (; PI != PE; ++PI) - if (TooMany-- == 0) return false; - } - - // If this unconditional branch is a fall-through, be careful about - // tail duplicating it. In particular, we don't want to taildup it if the - // original block will still be there after taildup is completed: doing so - // would eliminate the fall-through, requiring unconditional branches. - Function::iterator DestI = Dest; - if (&*--DestI == BI->getParent()) { - // The uncond branch is a fall-through. Tail duplication of the block is - // will eliminate the fall-through-ness and end up cloning the terminator - // at the end of the Dest block. Since the original Dest block will - // continue to exist, this means that one or the other will not be able to - // fall through. One typical example that this helps with is code like: - // if (a) - // foo(); - // if (b) - // foo(); - // Cloning the 'if b' block into the end of the first foo block is messy. - - // The messy case is when the fall-through block falls through to other - // blocks. This is what we would be preventing if we cloned the block. - DestI = Dest; - if (++DestI != Dest->getParent()->end()) { - BasicBlock *DestSucc = DestI; - // If any of Dest's successors are fall-throughs, don't do this xform. - for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest); - SI != SE; ++SI) - if (*SI == DestSucc) - return false; - } - } - - // Finally, check that we haven't redirected to this target block earlier; - // there are cases where we loop forever if we don't check this (PR 2323). - if (!CycleDetector.insert(Dest)) - return false; - - return true; -} - -/// FindObviousSharedDomOf - We know there is a branch from SrcBlock to -/// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we -/// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and -/// DstBlock, return it. -static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock, - BasicBlock *DstBlock) { - // SrcBlock must have a single predecessor. - pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock); - if (PI == PE || ++PI != PE) return 0; - - BasicBlock *SrcPred = *pred_begin(SrcBlock); - - // Look at the predecessors of DstBlock. One of them will be SrcBlock. If - // there is only one other pred, get it, otherwise we can't handle it. - PI = pred_begin(DstBlock); PE = pred_end(DstBlock); - BasicBlock *DstOtherPred = 0; - BasicBlock *P = *PI; - if (P == SrcBlock) { - if (++PI == PE) return 0; - DstOtherPred = *PI; - if (++PI != PE) return 0; - } else { - DstOtherPred = P; - if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0; - } - - // We can handle two situations here: "if then" and "if then else" blocks. An - // 'if then' situation is just where DstOtherPred == SrcPred. - if (DstOtherPred == SrcPred) - return SrcPred; - - // Check to see if we have an "if then else" situation, which means that - // DstOtherPred will have a single predecessor and it will be SrcPred. - PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred); - if (PI != PE && *PI == SrcPred) { - if (++PI != PE) return 0; // Not a single pred. - return SrcPred; // Otherwise, it's an "if then" situation. Return the if. - } - - // Otherwise, this is something we can't handle. - return 0; -} - - -/// eliminateUnconditionalBranch - Clone the instructions from the destination -/// block into the source block, eliminating the specified unconditional branch. -/// If the destination block defines values used by successors of the dest -/// block, we may need to insert PHI nodes. -/// -void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { - BasicBlock *SourceBlock = Branch->getParent(); - BasicBlock *DestBlock = Branch->getSuccessor(0); - assert(SourceBlock != DestBlock && "Our predicate is broken!"); - - DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName() - << "]: Eliminating branch: " << *Branch); - - // See if we can avoid duplicating code by moving it up to a dominator of both - // blocks. - if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { - DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n"); - - // If there are non-phi instructions in DestBlock that have no operands - // defined in DestBlock, and if the instruction has no side effects, we can - // move the instruction to DomBlock instead of duplicating it. - BasicBlock::iterator BBI = DestBlock->getFirstNonPHI(); - while (!isa<TerminatorInst>(BBI)) { - Instruction *I = BBI++; - - bool CanHoist = I->isSafeToSpeculativelyExecute() && - !I->mayReadFromMemory(); - if (CanHoist) { - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) - if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op))) - if (OpI->getParent() == DestBlock || - (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) { - CanHoist = false; - break; - } - if (CanHoist) { - // Remove from DestBlock, move right before the term in DomBlock. - DestBlock->getInstList().remove(I); - DomBlock->getInstList().insert(DomBlock->getTerminator(), I); - DEBUG(dbgs() << "Hoisted: " << *I); - } - } - } - } - - // Tail duplication can not update SSA properties correctly if the values - // defined in the duplicated tail are used outside of the tail itself. For - // this reason, we spill all values that are used outside of the tail to the - // stack. - for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I) - if (I->isUsedOutsideOfBlock(DestBlock)) { - // We found a use outside of the tail. Create a new stack slot to - // break this inter-block usage pattern. - DemoteRegToStack(*I); - } - - // We are going to have to map operands from the original block B to the new - // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI - // nodes also define part of this mapping. Loop over these PHI nodes, adding - // them to our mapping. - // - std::map<Value*, Value*> ValueMapping; - - BasicBlock::iterator BI = DestBlock->begin(); - bool HadPHINodes = isa<PHINode>(BI); - for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) - ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock); - - // Clone the non-phi instructions of the dest block into the source block, - // keeping track of the mapping... - // - for (; BI != DestBlock->end(); ++BI) { - Instruction *New = BI->clone(); - New->setName(BI->getName()); - SourceBlock->getInstList().push_back(New); - ValueMapping[BI] = New; - } - - // Now that we have built the mapping information and cloned all of the - // instructions (giving us a new terminator, among other things), walk the new - // instructions, rewriting references of old instructions to use new - // instructions. - // - BI = Branch; ++BI; // Get an iterator to the first new instruction - for (; BI != SourceBlock->end(); ++BI) - for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) { - std::map<Value*, Value*>::const_iterator I = - ValueMapping.find(BI->getOperand(i)); - if (I != ValueMapping.end()) - BI->setOperand(i, I->second); - } - - // Next we check to see if any of the successors of DestBlock had PHI nodes. - // If so, we need to add entries to the PHI nodes for SourceBlock now. - for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock); - SI != SE; ++SI) { - BasicBlock *Succ = *SI; - for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) { - PHINode *PN = cast<PHINode>(PNI); - // Ok, we have a PHI node. Figure out what the incoming value was for the - // DestBlock. - Value *IV = PN->getIncomingValueForBlock(DestBlock); - - // Remap the value if necessary... - std::map<Value*, Value*>::const_iterator I = ValueMapping.find(IV); - if (I != ValueMapping.end()) - IV = I->second; - PN->addIncoming(IV, SourceBlock); - } - } - - // Next, remove the old branch instruction, and any PHI node entries that we - // had. - BI = Branch; ++BI; // Get an iterator to the first new instruction - DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes... - SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch... - - // Final step: now that we have finished everything up, walk the cloned - // instructions one last time, constant propagating and DCE'ing them, because - // they may not be needed anymore. - // - if (HadPHINodes) { - while (BI != SourceBlock->end()) { - Instruction *Inst = BI++; - if (isInstructionTriviallyDead(Inst)) - Inst->eraseFromParent(); - else if (Value *V = SimplifyInstruction(Inst)) { - Inst->replaceAllUsesWith(V); - Inst->eraseFromParent(); - } - } - } - - ++NumEliminated; // We just killed a branch! -} diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index be7bed1..8e5a1eb 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -222,7 +222,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, const TargetData *TD = TLI.getTargetData(); gep_type_iterator GTI = gep_type_begin(AddrInst); for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { const StructLayout *SL = TD->getStructLayout(STy); unsigned Idx = cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); @@ -557,7 +557,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, Value *Address = User->getOperand(OpNo); if (!Address->getType()->isPointerTy()) return false; - const Type *AddressAccessTy = + Type *AddressAccessTy = cast<PointerType>(Address->getType())->getElementType(); // Do a match against the root of this address, ignoring profitability. This diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index b4f74f9..a7f9efd 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -287,7 +287,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { /// BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { BasicBlock::iterator SplitIt = SplitPt; - while (isa<PHINode>(SplitIt)) + while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt)) ++SplitIt; BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); @@ -299,138 +299,114 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { // Old dominates New. New node dominates all other nodes dominated by Old. - DomTreeNode *OldNode = DT->getNode(Old); - std::vector<DomTreeNode *> Children; - for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); - I != E; ++I) - Children.push_back(*I); + if (DomTreeNode *OldNode = DT->getNode(Old)) { + std::vector<DomTreeNode *> Children; + for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); + I != E; ++I) + Children.push_back(*I); DomTreeNode *NewNode = DT->addNewBlock(New,Old); for (std::vector<DomTreeNode *>::iterator I = Children.begin(), E = Children.end(); I != E; ++I) DT->changeImmediateDominator(*I, NewNode); + } } return New; } +/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA +/// analysis information. +static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, + ArrayRef<BasicBlock *> Preds, + Pass *P, bool &HasLoopExit) { + if (!P) return; -/// SplitBlockPredecessors - This method transforms BB by introducing a new -/// basic block into the function, and moving some of the predecessors of BB to -/// be predecessors of the new block. The new predecessors are indicated by the -/// Preds array, which has NumPreds elements in it. The new block is given a -/// suffix of 'Suffix'. -/// -/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, -/// LoopInfo, and LCCSA but no other analyses. In particular, it does not -/// preserve LoopSimplify (because it's complicated to handle the case where one -/// of the edges being split is an exit of a loop with other exits). -/// -BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - BasicBlock *const *Preds, - unsigned NumPreds, const char *Suffix, - Pass *P) { - // Create new basic block, insert right before the original block. - BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, - BB->getParent(), BB); - - // The new block unconditionally branches to the old block. - BranchInst *BI = BranchInst::Create(BB, NewBB); - - LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0; - Loop *L = LI ? LI->getLoopFor(BB) : 0; - bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); + LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); + Loop *L = LI ? LI->getLoopFor(OldBB) : 0; - // Move the edges from Preds to point to NewBB instead of BB. - // While here, if we need to preserve loop analyses, collect - // some information about how this split will affect loops. - bool HasLoopExit = false; + // If we need to preserve loop analyses, collect some information about how + // this split will affect loops. bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; - for (unsigned i = 0; i != NumPreds; ++i) { - // This is slightly more strict than necessary; the minimum requirement - // is that there be no more than one indirectbr branching to BB. And - // all BlockAddress uses would need to be updated. - assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && - "Cannot split an edge from an IndirectBrInst"); - - Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); - - if (LI) { - // If we need to preserve LCSSA, determine if any of - // the preds is a loop exit. + if (LI) { + bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); + for (ArrayRef<BasicBlock*>::iterator + i = Preds.begin(), e = Preds.end(); i != e; ++i) { + BasicBlock *Pred = *i; + + // If we need to preserve LCSSA, determine if any of the preds is a loop + // exit. if (PreserveLCSSA) - if (Loop *PL = LI->getLoopFor(Preds[i])) - if (!PL->contains(BB)) + if (Loop *PL = LI->getLoopFor(Pred)) + if (!PL->contains(OldBB)) HasLoopExit = true; - // If we need to preserve LoopInfo, note whether any of the - // preds crosses an interesting loop boundary. - if (L) { - if (L->contains(Preds[i])) - IsLoopEntry = false; - else - SplitMakesNewLoopHeader = true; - } + + // If we need to preserve LoopInfo, note whether any of the preds crosses + // an interesting loop boundary. + if (!L) continue; + if (L->contains(Pred)) + IsLoopEntry = false; + else + SplitMakesNewLoopHeader = true; } } // Update dominator tree if available. - DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); if (DT) DT->splitBlock(NewBB); - // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI - // node becomes an incoming value for BB's phi node. However, if the Preds - // list is empty, we need to insert dummy entries into the PHI nodes in BB to - // account for the newly created predecessor. - if (NumPreds == 0) { - // Insert dummy values as the incoming value. - for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) - cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); - return NewBB; + if (!L) return; + + if (IsLoopEntry) { + // Add the new block to the nearest enclosing loop (and not an adjacent + // loop). To find this, examine each of the predecessors and determine which + // loops enclose them, and select the most-nested loop which contains the + // loop containing the block being split. + Loop *InnermostPredLoop = 0; + for (ArrayRef<BasicBlock*>::iterator + i = Preds.begin(), e = Preds.end(); i != e; ++i) { + BasicBlock *Pred = *i; + if (Loop *PredLoop = LI->getLoopFor(Pred)) { + // Seek a loop which actually contains the block being split (to avoid + // adjacent loops). + while (PredLoop && !PredLoop->contains(OldBB)) + PredLoop = PredLoop->getParentLoop(); + + // Select the most-nested of these loops which contains the block. + if (PredLoop && PredLoop->contains(OldBB) && + (!InnermostPredLoop || + InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) + InnermostPredLoop = PredLoop; + } + } + + if (InnermostPredLoop) + InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } else { + L->addBasicBlockToLoop(NewBB, LI->getBase()); + if (SplitMakesNewLoopHeader) + L->moveToHeader(NewBB); } +} +/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming +/// from NewBB. This also updates AliasAnalysis, if available. +static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, + ArrayRef<BasicBlock*> Preds, BranchInst *BI, + Pass *P, bool HasLoopExit) { + // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; - - if (L) { - if (IsLoopEntry) { - // Add the new block to the nearest enclosing loop (and not an - // adjacent loop). To find this, examine each of the predecessors and - // determine which loops enclose them, and select the most-nested loop - // which contains the loop containing the block being split. - Loop *InnermostPredLoop = 0; - for (unsigned i = 0; i != NumPreds; ++i) - if (Loop *PredLoop = LI->getLoopFor(Preds[i])) { - // Seek a loop which actually contains the block being split (to - // avoid adjacent loops). - while (PredLoop && !PredLoop->contains(BB)) - PredLoop = PredLoop->getParentLoop(); - // Select the most-nested of these loops which contains the block. - if (PredLoop && - PredLoop->contains(BB) && - (!InnermostPredLoop || - InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) - InnermostPredLoop = PredLoop; - } - if (InnermostPredLoop) - InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); - } else { - L->addBasicBlockToLoop(NewBB, LI->getBase()); - if (SplitMakesNewLoopHeader) - L->moveToHeader(NewBB); - } - } - - // Otherwise, create a new PHI node in NewBB for each PHI node in BB. - for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { + for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); - + // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); - for (unsigned i = 1; i != NumPreds; ++i) + for (unsigned i = 1, e = Preds.size(); i != e; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; @@ -441,31 +417,191 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. - for (unsigned i = 0; i != NumPreds; ++i) + for (unsigned i = 0, e = Preds.size(); i != e; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = - PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI); + PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. - for (unsigned i = 0; i != NumPreds; ++i) { + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } + InVal = NewPHI; } - + // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } +} + +/// SplitBlockPredecessors - This method transforms BB by introducing a new +/// basic block into the function, and moving some of the predecessors of BB to +/// be predecessors of the new block. The new predecessors are indicated by the +/// Preds array, which has NumPreds elements in it. The new block is given a +/// suffix of 'Suffix'. +/// +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, +/// LoopInfo, and LCCSA but no other analyses. In particular, it does not +/// preserve LoopSimplify (because it's complicated to handle the case where one +/// of the edges being split is an exit of a loop with other exits). +/// +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + BasicBlock *const *Preds, + unsigned NumPreds, const char *Suffix, + Pass *P) { + // Create new basic block, insert right before the original block. + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, + BB->getParent(), BB); + // The new block unconditionally branches to the old block. + BranchInst *BI = BranchInst::Create(BB, NewBB); + + // Move the edges from Preds to point to NewBB instead of BB. + for (unsigned i = 0; i != NumPreds; ++i) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); + } + + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI + // node becomes an incoming value for BB's phi node. However, if the Preds + // list is empty, we need to insert dummy entries into the PHI nodes in BB to + // account for the newly created predecessor. + if (NumPreds == 0) { + // Insert dummy values as the incoming value. + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) + cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); + return NewBB; + } + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + bool HasLoopExit = false; + UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), + P, HasLoopExit); + + // Update the PHI nodes in BB with the values coming from NewBB. + UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI, + P, HasLoopExit); return NewBB; } +/// SplitLandingPadPredecessors - This method transforms the landing pad, +/// OrigBB, by introducing two new basic blocks into the function. One of those +/// new basic blocks gets the predecessors listed in Preds. The other basic +/// block gets the remaining predecessors of OrigBB. The landingpad instruction +/// OrigBB is clone into both of the new basic blocks. The new blocks are given +/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector. +/// +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, +/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular, +/// it does not preserve LoopSimplify (because it's complicated to handle the +/// case where one of the edges being split is an exit of a loop with other +/// exits). +/// +void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock*> Preds, + const char *Suffix1, const char *Suffix2, + Pass *P, + SmallVectorImpl<BasicBlock*> &NewBBs) { + assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); + + // Create a new basic block for OrigBB's predecessors listed in Preds. Insert + // it right before the original block. + BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix1, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB1); + + // The new block unconditionally branches to the old block. + BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1); + + // Move the edges from Preds to point to NewBB1 instead of OrigBB. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1); + } + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + bool HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB1. + UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit); + + // Move the remaining edges from OrigBB to point to NewBB2. + SmallVector<BasicBlock*, 8> NewBB2Preds; + for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB); + i != e; ) { + BasicBlock *Pred = *i++; + if (Pred == NewBB1) continue; + assert(!isa<IndirectBrInst>(Pred->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + NewBB2Preds.push_back(Pred); + e = pred_end(OrigBB); + } + + BasicBlock *NewBB2 = 0; + if (!NewBB2Preds.empty()) { + // Create another basic block for the rest of OrigBB's predecessors. + NewBB2 = BasicBlock::Create(OrigBB->getContext(), + OrigBB->getName() + Suffix2, + OrigBB->getParent(), OrigBB); + NewBBs.push_back(NewBB2); + + // The new block unconditionally branches to the old block. + BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2); + + // Move the remaining edges from OrigBB to point to NewBB2. + for (SmallVectorImpl<BasicBlock*>::iterator + i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i) + (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2); + + // Update DominatorTree, LoopInfo, and LCCSA analysis information. + HasLoopExit = false; + UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit); + + // Update the PHI nodes in OrigBB with the values coming from NewBB2. + UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit); + } + + LandingPadInst *LPad = OrigBB->getLandingPadInst(); + Instruction *Clone1 = LPad->clone(); + Clone1->setName(Twine("lpad") + Suffix1); + NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1); + + if (NewBB2) { + Instruction *Clone2 = LPad->clone(); + Clone2->setName(Twine("lpad") + Suffix2); + NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2); + + // Create a PHI node for the two cloned landingpad instructions. + PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad); + PN->addIncoming(Clone1, NewBB1); + PN->addIncoming(Clone2, NewBB2); + LPad->replaceAllUsesWith(PN); + LPad->eraseFromParent(); + } else { + // There is no second clone. Just replace the landing pad with the first + // clone. + LPad->replaceAllUsesWith(Clone1); + LPad->eraseFromParent(); + } +} + /// FindFunctionBackedges - Analyze the specified function to find all of the /// loop backedges in the function and return them. This is a relatively cheap /// (compared to computing dominators and loop info) analysis. diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 92ce500..c052910 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -102,7 +102,7 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, ++I; // Skip one edge due to the incoming arc from TI. if (!AllowIdenticalEdges) return I != E; - + // If AllowIdenticalEdges is true, then we allow this edge to be considered // non-critical iff all preds come from TI's block. while (I != E) { @@ -155,10 +155,10 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, /// This returns the new block if the edge was split, null otherwise. /// /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the -/// specified successor will be merged into the same critical edge block. -/// This is most commonly interesting with switch instructions, which may +/// specified successor will be merged into the same critical edge block. +/// This is most commonly interesting with switch instructions, which may /// have many edges to any one destination. This ensures that all edges to that -/// dest go to one block instead of each going to a different block, but isn't +/// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// /// It is invalid to call this function on a critical edge that starts at an @@ -167,15 +167,20 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, - Pass *P, bool MergeIdenticalEdges) { + Pass *P, bool MergeIdenticalEdges, + bool DontDeleteUselessPhis) { if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; - + assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); - + BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); + // Splitting the critical edge to a landing pad block is non-trivial. Don't do + // it in this generic function. + if (DestBB->isLandingPad()) return 0; + // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); @@ -190,7 +195,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Function &F = *TIBB->getParent(); Function::iterator FBBI = TIBB; F.getBasicBlockList().insert(++FBBI, NewBB); - + // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. { @@ -207,35 +212,35 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != TIBB) - BBIdx = PN->getBasicBlockIndex(TIBB); + BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } } - + // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). if (MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; - + // Remove an entry for TIBB from DestBB phi nodes. - DestBB->removePredecessor(TIBB); - + DestBB->removePredecessor(TIBB, DontDeleteUselessPhis); + // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } - - + + // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; - + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); - + // If we have nothing to update, just return. if (DT == 0 && LI == 0 && PI == 0) return NewBB; @@ -263,7 +268,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } bool NewBBDominatesDestBB = true; - + // Should we update DominatorTree information? if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); @@ -274,7 +279,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); DomTreeNode *DestBBNode = 0; - + // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { DestBBNode = DT->getNode(DestBB); @@ -285,7 +290,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } OtherPreds.clear(); } - + // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it // doesn't dominate anything. if (NewBBDominatesDestBB) { @@ -337,6 +342,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } // For each unique exit block... + // FIXME: This code is functionally equivalent to the corresponding + // loop in LoopSimplify. SmallVector<BasicBlock *, 4> ExitBlocks; TIL->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { @@ -348,10 +355,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; - if (TIL->contains(P)) + if (TIL->contains(P)) { + if (isa<IndirectBrInst>(P->getTerminator())) { + Preds.clear(); + break; + } Preds.push_back(P); - else + } else { HasPredOutsideOfLoop = true; + } } // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 14bb17f..4b5f45b 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -58,8 +58,8 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, AttributeWithIndex AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); - const Type *I8Ptr = B.getInt8PtrTy(); - const Type *I32Ty = B.getInt32Ty(); + Type *I8Ptr = B.getInt8PtrTy(); + Type *I32Ty = B.getInt32Ty(); Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), I8Ptr, I8Ptr, I32Ty, NULL); CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), @@ -102,7 +102,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - const Type *I8Ptr = B.getInt8PtrTy(); + Type *I8Ptr = B.getInt8PtrTy(); Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2), I8Ptr, I8Ptr, I8Ptr, NULL); CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), @@ -120,7 +120,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - const Type *I8Ptr = B.getInt8PtrTy(); + Type *I8Ptr = B.getInt8PtrTy(); Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2), I8Ptr, I8Ptr, I8Ptr, Len->getType(), NULL); @@ -361,7 +361,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { this->CI = CI; Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); - const FunctionType *FT = Callee->getFunctionType(); + FunctionType *FT = Callee->getFunctionType(); LLVMContext &Context = CI->getParent()->getContext(); IRBuilder<> B(CI); diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 204c2c6..7adc5f1 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -21,9 +21,17 @@ add_llvm_library(LLVMTransformUtils PromoteMemoryToRegister.cpp SSAUpdater.cpp SimplifyCFG.cpp + SimplifyIndVar.cpp SimplifyInstructions.cpp UnifyFunctionExitNodes.cpp Utils.cpp ValueMapper.cpp ) +add_llvm_library_dependencies(LLVMTransformUtils + LLVMAnalysis + LLVMCore + LLVMSupport + LLVMTarget + LLVMipa + ) diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 6ea831f..cf21f1e 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -331,15 +331,10 @@ ConstantFoldMappedInstruction(const Instruction *I) { TD); if (const LoadInst *LI = dyn_cast<LoadInst>(I)) - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) - if (!LI->isVolatile() && CE->getOpcode() == Instruction::GetElementPtr) - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer()) - return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), - CE); - - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0], - Ops.size(), TD); + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(Ops[0], TD); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD); } /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index a08fa35..a0e027b 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -50,10 +50,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { I != E; ++I) { GlobalVariable *GV = new GlobalVariable(*New, I->getType()->getElementType(), - false, - GlobalValue::ExternalLinkage, 0, - I->getName()); - GV->setAlignment(I->getAlignment()); + I->isConstant(), I->getLinkage(), + (Constant*) 0, I->getName(), + (GlobalVariable*) 0, + I->isThreadLocal(), + I->getType()->getAddressSpace()); + GV->copyAttributesFrom(I); VMap[I] = GV; } @@ -61,16 +63,19 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { Function *NF = Function::Create(cast<FunctionType>(I->getType()->getElementType()), - GlobalValue::ExternalLinkage, I->getName(), New); + I->getLinkage(), I->getName(), New); NF->copyAttributesFrom(I); VMap[I] = NF; } // Loop over the aliases in the module for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); - I != E; ++I) - VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage, - I->getName(), NULL, New); + I != E; ++I) { + GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(), + I->getName(), NULL, New); + GA->copyAttributesFrom(I); + VMap[I] = GA; + } // Now that all of the things that global variable initializer can refer to // have been created, loop through and copy the global variable referrers @@ -81,9 +86,6 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); if (I->hasInitializer()) GV->setInitializer(MapValue(I->getInitializer(), VMap)); - GV->setLinkage(I->getLinkage()); - GV->setThreadLocal(I->isThreadLocal()); - GV->setConstant(I->isConstant()); } // Similarly, copy over function bodies now... @@ -101,15 +103,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns); } - - F->setLinkage(I->getLinkage()); } // And aliases for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); - GA->setLinkage(I->getLinkage()); if (const Constant *C = I->getAliasee()) GA->setAliasee(MapValue(C, VMap)); } diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 0813523..5f47ebb 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -50,14 +50,14 @@ namespace { DominatorTree* DT; bool AggregateArgs; unsigned NumExitBlocks; - const Type *RetTy; + Type *RetTy; public: CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false) : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {} - Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code); + Function *ExtractCodeRegion(ArrayRef<BasicBlock*> code); - bool isEligible(const std::vector<BasicBlock*> &code); + bool isEligible(ArrayRef<BasicBlock*> code); private: /// definedInRegion - Return true if the specified value is defined in the @@ -290,7 +290,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, paramTy.clear(); paramTy.push_back(StructPtr); } - const FunctionType *funcType = + FunctionType *funcType = FunctionType::get(RetTy, paramTy, false); // Create the new function @@ -317,8 +317,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); TerminatorInst *TI = newFunction->begin()->getTerminator(); GetElementPtrInst *GEP = - GetElementPtrInst::Create(AI, Idx, Idx+2, - "gep_" + inputs[i]->getName(), TI); + GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI); RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); } else RewriteVal = AI++; @@ -420,7 +419,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); GetElementPtrInst *GEP = - GetElementPtrInst::Create(Struct, Idx, Idx + 2, + GetElementPtrInst::Create(Struct, Idx, "gep_" + StructValues[i]->getName()); codeReplacer->getInstList().push_back(GEP); StoreInst *SI = new StoreInst(StructValues[i], GEP); @@ -446,7 +445,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); GetElementPtrInst *GEP - = GetElementPtrInst::Create(Struct, Idx, Idx + 2, + = GetElementPtrInst::Create(Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; @@ -561,7 +560,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut+out); GetElementPtrInst *GEP = - GetElementPtrInst::Create(OAI, Idx, Idx + 2, + GetElementPtrInst::Create(OAI, Idx, "gep_" + outputs[out]->getName(), NTRet); new StoreInst(outputs[out], GEP, NTRet); @@ -580,7 +579,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } // Now that we've done the deed, simplify the switch instruction. - const Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); switch (NumExitBlocks) { case 0: // There are no successors (the block containing the switch itself), which @@ -655,7 +654,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { /// computed result back into memory. /// Function *CodeExtractor:: -ExtractCodeRegion(const std::vector<BasicBlock*> &code) { +ExtractCodeRegion(ArrayRef<BasicBlock*> code) { if (!isEligible(code)) return 0; @@ -755,9 +754,13 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) { return newFunction; } -bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) { +bool CodeExtractor::isEligible(ArrayRef<BasicBlock*> code) { + // Deny a single basic block that's a landing pad block. + if (code.size() == 1 && code[0]->isLandingPad()) + return false; + // Deny code region if it contains allocas or vastarts. - for (std::vector<BasicBlock*>::const_iterator BB = code.begin(), e=code.end(); + for (ArrayRef<BasicBlock*>::iterator BB = code.begin(), e=code.end(); BB != e; ++BB) for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end(); I != Ie; ++I) @@ -771,25 +774,23 @@ bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) { } -/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new -/// function +/// ExtractCodeRegion - Slurp a sequence of basic blocks into a brand new +/// function. /// Function* llvm::ExtractCodeRegion(DominatorTree &DT, - const std::vector<BasicBlock*> &code, + ArrayRef<BasicBlock*> code, bool AggregateArgs) { return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code); } -/// ExtractBasicBlock - slurp a natural loop into a brand new function +/// ExtractLoop - Slurp a natural loop into a brand new function. /// Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) { return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks()); } -/// ExtractBasicBlock - slurp a basic block into a brand new function +/// ExtractBasicBlock - Slurp a basic block into a brand new function. /// -Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) { - std::vector<BasicBlock*> Blocks; - Blocks.push_back(BB); - return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks); +Function* llvm::ExtractBasicBlock(ArrayRef<BasicBlock*> BBs, bool AggregateArgs){ + return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(BBs); } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index d5b382e..5464dbc 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -45,6 +45,9 @@ bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) { return InlineFunction(CallSite(II), IFI); } +// FIXME: New EH - Remove the functions marked [LIBUNWIND] when new EH is +// turned on. + /// [LIBUNWIND] Look for an llvm.eh.exception call in the given block. static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) { for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) { @@ -250,20 +253,32 @@ namespace { PHINode *InnerSelectorPHI; SmallVector<Value*, 8> UnwindDestPHIValues; + // FIXME: New EH - These will replace the analogous ones above. + BasicBlock *OuterResumeDest; //< Destination of the invoke's unwind. + BasicBlock *InnerResumeDest; //< Destination for the callee's resume. + LandingPadInst *CallerLPad; //< LandingPadInst associated with the invoke. + PHINode *InnerEHValuesPHI; //< PHI for EH values from landingpad insts. + public: - InvokeInliningInfo(InvokeInst *II) : - OuterUnwindDest(II->getUnwindDest()), OuterSelector(0), - InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0) { - - // If there are PHI nodes in the unwind destination block, we - // need to keep track of which values came into them from the - // invoke before removing the edge from this block. - llvm::BasicBlock *invokeBB = II->getParent(); - for (BasicBlock::iterator I = OuterUnwindDest->begin(); - isa<PHINode>(I); ++I) { + InvokeInliningInfo(InvokeInst *II) + : OuterUnwindDest(II->getUnwindDest()), OuterSelector(0), + InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0), + OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0), + CallerLPad(0), InnerEHValuesPHI(0) { + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the invoke before removing + // the edge from this block. + llvm::BasicBlock *InvokeBB = II->getParent(); + BasicBlock::iterator I = OuterUnwindDest->begin(); + for (; isa<PHINode>(I); ++I) { // Save the value to use for this edge. - PHINode *phi = cast<PHINode>(I); - UnwindDestPHIValues.push_back(phi->getIncomingValueForBlock(invokeBB)); + PHINode *PHI = cast<PHINode>(I); + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + } + + // FIXME: With the new EH, this if/dyn_cast should be a 'cast'. + if (LandingPadInst *LPI = dyn_cast<LandingPadInst>(I)) { + CallerLPad = LPI; } } @@ -281,11 +296,23 @@ namespace { BasicBlock *getInnerUnwindDest(); + // FIXME: New EH - Rename when new EH is turned on. + BasicBlock *getInnerUnwindDestNewEH(); + + LandingPadInst *getLandingPadInst() const { return CallerLPad; } + bool forwardEHResume(CallInst *call, BasicBlock *src); - /// Add incoming-PHI values to the unwind destination block for - /// the given basic block, using the values for the original - /// invoke's source block. + /// forwardResume - Forward the 'resume' instruction to the caller's landing + /// pad block. When the landing pad block has only one predecessor, this is + /// a simple branch. When there is more than one predecessor, we need to + /// split the landing pad block after the landingpad instruction and jump + /// to there. + void forwardResume(ResumeInst *RI); + + /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind + /// destination block for the given basic block, using the values for the + /// original invoke's source block. void addIncomingPHIValuesFor(BasicBlock *BB) const { addIncomingPHIValuesForInto(BB, OuterUnwindDest); } @@ -300,7 +327,7 @@ namespace { }; } -/// Get or create a target for the branch out of rewritten calls to +/// [LIBUNWIND] Get or create a target for the branch out of rewritten calls to /// llvm.eh.resume. BasicBlock *InvokeInliningInfo::getInnerUnwindDest() { if (InnerUnwindDest) return InnerUnwindDest; @@ -404,6 +431,60 @@ bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) { return true; } +/// Get or create a target for the branch from ResumeInsts. +BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() { + // FIXME: New EH - rename this function when new EH is turned on. + if (InnerResumeDest) return InnerResumeDest; + + // Split the landing pad. + BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint; + InnerResumeDest = + OuterResumeDest->splitBasicBlock(SplitPoint, + OuterResumeDest->getName() + ".body"); + + // The number of incoming edges we expect to the inner landing pad. + const unsigned PHICapacity = 2; + + // Create corresponding new PHIs for all the PHIs in the outer landing pad. + BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); + BasicBlock::iterator I = OuterResumeDest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *OuterPHI = cast<PHINode>(I); + PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, + OuterPHI->getName() + ".lpad-body", + InsertPoint); + OuterPHI->replaceAllUsesWith(InnerPHI); + InnerPHI->addIncoming(OuterPHI, OuterResumeDest); + } + + // Create a PHI for the exception values. + InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, + "eh.lpad-body", InsertPoint); + CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); + InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); + + // All done. + return InnerResumeDest; +} + +/// forwardResume - Forward the 'resume' instruction to the caller's landing pad +/// block. When the landing pad block has only one predecessor, this is a simple +/// branch. When there is more than one predecessor, we need to split the +/// landing pad block after the landingpad instruction and jump to there. +void InvokeInliningInfo::forwardResume(ResumeInst *RI) { + BasicBlock *Dest = getInnerUnwindDestNewEH(); + BasicBlock *Src = RI->getParent(); + + BranchInst::Create(Dest, Src); + + // Update the PHIs in the destination. They were inserted in an order which + // makes this work. + addIncomingPHIValuesForInto(Src, Dest); + + InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); + RI->eraseFromParent(); +} + /// [LIBUNWIND] Check whether this selector is "only cleanups": /// call i32 @llvm.eh.selector(blah, blah, i32 0) static bool isCleanupOnlySelector(EHSelectorInst *selector) { @@ -421,9 +502,19 @@ static bool isCleanupOnlySelector(EHSelectorInst *selector) { /// Returns true to indicate that the next block should be skipped. static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, InvokeInliningInfo &Invoke) { + LandingPadInst *LPI = Invoke.getLandingPadInst(); + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *I = BBI++; - + + if (LPI) // FIXME: New EH - This won't be NULL in the new EH. + if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) { + unsigned NumClauses = LPI->getNumClauses(); + L->reserveClauses(NumClauses); + for (unsigned i = 0; i != NumClauses; ++i) + L->addClause(LPI->getClause(i)); + } + // We only need to check for function calls: inlined invoke // instructions require no special handling. CallInst *CI = dyn_cast<CallInst>(I); @@ -557,6 +648,10 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // there is now a new entry in them. Invoke.addIncomingPHIValuesFor(BB); } + + if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { + Invoke.forwardResume(RI); + } } // Now that everything is happy, we have one final detail. The PHI nodes in @@ -636,7 +731,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, unsigned ByValAlignment) { - const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType(); + Type *AggTy = cast<PointerType>(Arg->getType())->getElementType(); // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and @@ -726,7 +821,7 @@ static bool isUsedByLifetimeMarker(Value *V) { // hasLifetimeMarkers - Check whether the given alloca already has // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { - const Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext()); + Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext()); if (AI->getType() == Int8PtrTy) return isUsedByLifetimeMarker(AI); @@ -770,8 +865,15 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { DebugLoc DL = BI->getDebugLoc(); - if (!DL.isUnknown()) + if (!DL.isUnknown()) { BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { + LLVMContext &Ctx = BI->getContext(); + MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); + DVI->setOperand(2, createInlinedVariable(DVI->getVariable(), + InlinedAt, Ctx)); + } + } } } } @@ -822,6 +924,40 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { return false; } + // Find the personality function used by the landing pads of the caller. If it + // exists, then check to see that it matches the personality function used in + // the callee. + for (Function::const_iterator + I = Caller->begin(), E = Caller->end(); I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + // FIXME: This 'isa' here should become go away once the new EH system is + // in place. + if (!isa<LandingPadInst>(BB->getFirstNonPHI())) + continue; + const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI()); + const Value *CallerPersFn = LP->getPersonalityFn(); + + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + for (Function::const_iterator + I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once + // the new EH system is in place. + if (const LandingPadInst *LP = + dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) + if (CallerPersFn != LP->getPersonalityFn()) + return false; + break; + } + + break; + } + // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // @@ -1090,7 +1226,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. - const Type *RTy = CalledFunc->getReturnType(); + Type *RTy = CalledFunc->getReturnType(); PHINode *PHI = 0; if (Returns.size() > 1) { diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 0f6d9ae..7034feb 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -227,13 +226,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { bool llvm::isInstructionTriviallyDead(Instruction *I) { if (!I->use_empty() || isa<TerminatorInst>(I)) return false; + // We don't want the landingpad instruction removed by anything this general. + if (isa<LandingPadInst>(I)) + return false; + // We don't want debug info removed by anything this general, unless // debug info is empty. if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) { - if (DDI->getAddress()) + if (DDI->getAddress()) return false; return true; - } + } if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { if (DVI->getValue()) return false; @@ -244,10 +247,16 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { // Special case intrinsics that "may have side effects" but can be deleted // when dead. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { // Safe to delete llvm.stacksave if dead. if (II->getIntrinsicID() == Intrinsic::stacksave) return true; + + // Lifetime intrinsics are dead when their right-hand is undef. + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) + return isa<UndefValue>(II->getArgOperand(1)); + } return false; } @@ -712,10 +721,14 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// their preferred alignment from the beginning. /// static unsigned enforceKnownAlignment(Value *V, unsigned Align, - unsigned PrefAlign) { + unsigned PrefAlign, const TargetData *TD) { V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + // If the preferred alignment is greater than the natural stack alignment + // then don't round up. This avoids dynamic stack realignment. + if (TD && TD->exceedsNaturalStackAlignment(PrefAlign)) + return Align; // If there is a requested alignment and if this is an alloca, round up. if (AI->getAlignment() >= PrefAlign) return AI->getAlignment(); @@ -766,7 +779,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, Align = std::min(Align, +Value::MaximumAlignment); if (PrefAlign > Align) - Align = enforceKnownAlignment(V, Align, PrefAlign); + Align = enforceKnownAlignment(V, Align, PrefAlign, TD); // We don't need to make any adjustment. return Align; diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index e79fb5a..cbd54a8 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -213,7 +213,7 @@ ReprocessLoop: // predecessors from outside of the loop, split the edge now. SmallVector<BasicBlock*, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); - + SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), @@ -325,6 +325,14 @@ ReprocessLoop: DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " << ExitingBlock->getName() << "\n"); + // If any reachable control flow within this loop has changed, notify + // ScalarEvolution. Currently assume the parent loop doesn't change + // (spliting edges doesn't count). If blocks, CFG edges, or other values + // in the parent loop change, then we need call to forgetLoop() for the + // parent instead. + if (SE) + SE->forgetLoop(L); + assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); Changed = true; LI->removeBlock(ExitingBlock); @@ -402,13 +410,24 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { } assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); - BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], - LoopBlocks.size(), ".loopexit", - this); + BasicBlock *NewExitBB = 0; + + if (Exit->isLandingPad()) { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0], + LoopBlocks.size()), + ".loopexit", ".nonloopexit", + this, NewBBs); + NewExitBB = NewBBs[0]; + } else { + NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], + LoopBlocks.size(), ".loopexit", + this); + } DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " - << NewBB->getName() << "\n"); - return NewBB; + << NewExitBB->getName() << "\n"); + return NewExitBB; } /// AddBlockAndPredsToSet - Add the specified block, and all of its @@ -467,23 +486,23 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, if (&*BBI == SplitPreds[i]) return; } - + // If it isn't already after an outside block, move it after one. This is // always good as it makes the uncond branch from the outside block into a // fall-through. - + // Figure out *which* outside block to put this after. Prefer an outside // block that neighbors a BB actually in the loop. BasicBlock *FoundBB = 0; for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { Function::iterator BBI = SplitPreds[i]; - if (++BBI != NewBB->getParent()->end() && + if (++BBI != NewBB->getParent()->end() && L->contains(BBI)) { FoundBB = SplitPreds[i]; break; } } - + // If our heuristic for a *good* bb to place this after doesn't find // anything, just pick something. It's likely better than leaving it within // the loop. @@ -544,7 +563,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L); - + // Create the new outer loop. Loop *NewOuter = new Loop(); @@ -735,6 +754,7 @@ void LoopSimplify::verifyAnalysis() const { } assert(HasIndBrPred && "LoopSimplify has no excuse for missing loop header info!"); + (void)HasIndBrPred; } // Indirectbr can interfere with exit block canonicalization. @@ -742,12 +762,15 @@ void LoopSimplify::verifyAnalysis() const { bool HasIndBrExiting = false; SmallVector<BasicBlock*, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) { HasIndBrExiting = true; break; } + } + assert(HasIndBrExiting && "LoopSimplify has no excuse for missing exit block info!"); + (void)HasIndBrExiting; } } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 6772511..62e4fa2 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -11,9 +11,6 @@ // actual pass or policy, but provides a single function to perform loop // unrolling. // -// It works best when loops have been canonicalized by the -indvars pass, -// allowing it to determine the trip counts of loops easily. -// // The process of unrolling can produce extraneous basic blocks linked with // unconditional branches. This will be corrected in the future. // @@ -24,6 +21,7 @@ #include "llvm/BasicBlock.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/Debug.h" @@ -31,6 +29,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; // TODO: Should these be here or in LoopUnroll? @@ -61,7 +60,8 @@ static inline void RemapInstruction(Instruction *I, /// only has one predecessor, and that predecessor only has one successor. /// The LoopInfo Analysis that is passed will be kept consistent. /// Returns the new combined block. -static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { +static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, + LPPassManager *LPM) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. @@ -93,6 +93,12 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { std::string OldName = BB->getName(); // Erase basic block from the function... + + // ScalarEvolution holds references to loop exit blocks. + if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) { + if (Loop *L = LI->getLoopFor(BB)) + SE->forgetLoop(L); + } LI->removeBlock(BB); BB->eraseFromParent(); @@ -109,12 +115,27 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// +/// TripCount is generally defined as the number of times the loop header +/// executes. UnrollLoop relaxes the definition to permit early exits: here +/// TripCount is the iteration on which control exits LatchBlock if no early +/// exits were taken. Note that UnrollLoop assumes that the loop counter test +/// terminates LatchBlock in order to remove unnecesssary instances of the +/// test. In other words, control may exit the loop prior to TripCount +/// iterations via an early branch, but control may not exit the loop from the +/// LatchBlock's terminator prior to TripCount iterations. +/// +/// Similarly, TripMultiple divides the number of times that the LatchBlock may +/// execute without exiting the loop. +/// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. -bool llvm::UnrollLoop(Loop *L, unsigned Count, - LoopInfo *LI, LPPassManager *LPM) { +/// +/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are +/// available it must also preserve those analyses. +bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, + unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -129,14 +150,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); - + if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } - + if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << @@ -146,16 +167,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. - if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE) SE->forgetLoop(L); - // Find trip count - unsigned TripCount = L->getSmallConstantTripCount(); - // Find trip multiple if count is not available - unsigned TripMultiple = 1; - if (TripCount == 0) - TripMultiple = L->getSmallConstantTripMultiple(); - if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) @@ -208,12 +223,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - OrigPHINode.push_back(PN); - if (Instruction *I = - dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock))) - if (L->contains(I)) - LastValueMap[I] = I; + OrigPHINode.push_back(cast<PHINode>(I)); } std::vector<BasicBlock*> Headers; @@ -221,11 +231,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, Headers.push_back(Header); Latches.push_back(LatchBlock); + // The current on-the-fly SSA update requires blocks to be processed in + // reverse postorder so that LastValueMap contains the correct value at each + // exit. + LoopBlocksDFS DFS(L); + DFS.perform(LI); + + // Stash the DFS iterators before adding blocks to the loop. + LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; - - for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), - E = LoopBlocks.end(); BB != E; ++BB) { + + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); @@ -251,75 +270,55 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, L->addBasicBlockToLoop(New, LI->getBase()); - // Add phi entries for newly created values to all exit blocks except - // the successor of the latch block. The successor of the exit block will - // be updated specially after unrolling all the way. - if (*BB != LatchBlock) - for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; - ++SI) - if (!L->contains(*SI)) - for (BasicBlock::iterator BBI = (*SI)->begin(); - PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { - Value *Incoming = phi->getIncomingValueForBlock(*BB); - phi->addIncoming(Incoming, New); - } - + // Add phi entries for newly created values to all exit blocks. + for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); + SI != SE; ++SI) { + if (L->contains(*SI)) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { + Value *Incoming = phi->getIncomingValueForBlock(*BB); + ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); + if (It != LastValueMap.end()) + Incoming = It->second; + phi->addIncoming(Incoming, New); + } + } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); - if (*BB == LatchBlock) { + if (*BB == LatchBlock) Latches.push_back(New); - // Also, clear out the new latch's back edge so that it doesn't look - // like a new loop, so that it's amenable to being merged with adjacent - // blocks later on. - TerminatorInst *Term = New->getTerminator(); - assert(L->contains(Term->getSuccessor(!ContinueOnTrue))); - assert(Term->getSuccessor(ContinueOnTrue) == LoopExit); - Term->setSuccessor(!ContinueOnTrue, NULL); - } - NewBlocks.push_back(New); } - + // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) ::RemapInstruction(I, LastValueMap); } - - // The latch block exits the loop. If there are any PHI nodes in the - // successor blocks, update them to use the appropriate values computed as the - // last iteration of the loop. - if (Count != 1) { - BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]); - for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock); - SI != SE; ++SI) { - for (BasicBlock::iterator BBI = (*SI)->begin(); - PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { - Value *InVal = PN->removeIncomingValue(LatchBlock, false); - // If this value was defined in the loop, take the value defined by the - // last iteration of the loop. - if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { - if (L->contains(InValI)) - InVal = LastValueMap[InVal]; - } - PN->addIncoming(InVal, LastIterationBB); - } - } - } - // Now, if we're doing complete unrolling, loop over the PHI nodes in the - // original block, setting them to their incoming values. - if (CompletelyUnroll) { - BasicBlock *Preheader = L->getLoopPreheader(); - for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { - PHINode *PN = OrigPHINode[i]; + // Loop over the PHI nodes in the original block, setting incoming values. + for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { + PHINode *PN = OrigPHINode[i]; + if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } + else if (Count > 1) { + Value *InVal = PN->removeIncomingValue(LatchBlock, false); + // If this value was defined in the loop, take the value defined by the + // last iteration of the loop. + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { + if (L->contains(InValI)) + InVal = LastValueMap[InVal]; + } + assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); + PN->addIncoming(InVal, Latches.back()); + } } // Now that all the basic blocks for the unrolled iterations are in place, @@ -351,6 +350,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { + // Remove phi operands at this loop exit + if (Dest != LoopExit) { + BasicBlock *BB = Latches[i]; + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + if (*SI == Headers[i]) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { + Phi->removeIncomingValue(BB, false); + } + } + } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); @@ -362,11 +374,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } - + + // FIXME: Reconstruct dom info, because it is not preserved properly. + // Incrementally updating domtree after loop unrolling would be easy. + if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>()) + DT->runOnFunction(*L->getHeader()->getParent()); + + // Simplify any new induction variables in the partially unrolled loop. + if (SE && !CompletelyUnroll) { + SmallVector<WeakVH, 16> DeadInsts; + simplifyLoopIVs(L, SE, LPM, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index c1213fa..61ab3f6 100644 --- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -58,7 +58,7 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { return false; LLVMContext &Context = CI->getContext(); - const Type *Int32Ty = Type::getInt32Ty(Context); + Type *Int32Ty = Type::getInt32Ty(Context); unsigned caseNo = SI->findCaseValue(ExpectedValue); std::vector<Value *> Vec; @@ -105,7 +105,7 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { return false; LLVMContext &Context = CI->getContext(); - const Type *Int32Ty = Type::getInt32Ty(Context); + Type *Int32Ty = Type::getInt32Ty(Context); bool Likely = ExpectedValue->isOne(); // If expect value is equal to 1 it means that we are more likely to take diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index f77d19d..c96c8fc 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -120,18 +120,18 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, // doInitialization - Make sure that there is a prototype for abort in the // current module. bool LowerInvoke::doInitialization(Module &M) { - const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; JBSize = JBSize ? JBSize : 200; Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); - JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty"); + JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty"); Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) }; JBLinkTy->setBody(Elts); - const Type *PtrJBList = PointerType::getUnqual(JBLinkTy); + Type *PtrJBList = PointerType::getUnqual(JBLinkTy); // Now that we've done that, insert the jmpbuf list head global, unless it // already exists. @@ -240,14 +240,14 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II); new StoreInst(StackSaveRet, StackPtr, true, II); // volatile - BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI(); + BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt(); // nonvolatile. new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), InvokeNum, false, NI); - Instruction* StackPtrLoad = new LoadInst(StackPtr, "stackptr.restore", true, - II->getUnwindDest()->getFirstNonPHI() - ); + Instruction* StackPtrLoad = + new LoadInst(StackPtr, "stackptr.restore", true, + II->getUnwindDest()->getFirstInsertionPt()); CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad); // Add a switch case to our unwind block. @@ -305,7 +305,7 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { - const Type *Ty = AI->getType(); + Type *Ty = AI->getType(); // Aggregate types can't be cast, but are legal argument types, so we have // to handle them differently. We use an extract/insert pair as a // lightweight method to achieve the same goal. @@ -406,6 +406,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { SmallVector<ReturnInst*,16> Returns; SmallVector<UnwindInst*,16> Unwinds; SmallVector<InvokeInst*,16> Invokes; + UnreachableInst* UnreachablePlaceholder = 0; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { @@ -455,8 +456,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) }; - OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2], - "OldBuf", + OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf", EntryBB->getTerminator()); // Copy the JBListHead to the alloca. @@ -487,9 +487,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Insert a load in the Catch block, and a switch on its value. By default, // we go to a block that just does an unwind (which is the correct action - // for a standard call). + // for a standard call). We insert an unreachable instruction here and + // modify the block to jump to the correct unwinding pad later. BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F); - Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB)); + UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB); Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB); SwitchInst *CatchSwitch = @@ -502,8 +503,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { "setjmp.cont"); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0); - Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2], - "TheJmpBuf", + Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf", EntryBB->getTerminator()); JmpBufPtr = new BitCastInst(JmpBufPtr, Type::getInt8PtrTy(F.getContext()), @@ -557,8 +557,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Get a pointer to the jmpbuf and longjmp. Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) }; - Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf", - UnwindBlock); + Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock); Idx[0] = new BitCastInst(Idx[0], Type::getInt8PtrTy(F.getContext()), "tmp", UnwindBlock); @@ -580,6 +579,12 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { Unwinds[i]->eraseFromParent(); } + // Replace the inserted unreachable with a branch to the unwind handler. + if (UnreachablePlaceholder) { + BranchInst::Create(UnwindHandler, UnreachablePlaceholder); + UnreachablePlaceholder->eraseFromParent(); + } + // Finally, for any returns from this function, if this function contains an // invoke, restore the old jmpbuf pointer to its input value. if (OldJmpBufPtr) { diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index ed733d3..686178c 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -277,11 +277,11 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { BasicBlock *CurBlock = SI->getParent(); BasicBlock *OrigBlock = CurBlock; Function *F = CurBlock->getParent(); - Value *Val = SI->getOperand(0); // The value we are switching on... + Value *Val = SI->getCondition(); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); // If there is only the default destination, don't bother with the code below. - if (SI->getNumOperands() == 2) { + if (SI->getNumCases() == 1) { BranchInst::Create(SI->getDefaultDest(), CurBlock); CurBlock->getInstList().erase(SI); return; diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index e5a00f4..db3e942 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -86,11 +86,15 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { UI != UE; ++UI) { // Loop over all of the uses of the alloca const User *U = *UI; if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Note that atomic loads can be transformed; atomic semantics do + // not have any meaning for a local alloca. if (LI->isVolatile()) return false; } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == AI) return false; // Don't allow a store OF the AI, only INTO the AI. + // Note that atomic stores can be transformed; atomic semantics do + // not have any meaning for a local alloca. if (SI->isVolatile()) return false; } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index b47a7cc..fa8061c 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -16,6 +16,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" @@ -43,7 +44,7 @@ SSAUpdater::~SSAUpdater() { /// Initialize - Reset this object to get ready for a new set of SSA /// updates with type 'Ty'. PHI nodes get a name based on 'Name'. -void SSAUpdater::Initialize(const Type *Ty, StringRef Name) { +void SSAUpdater::Initialize(Type *Ty, StringRef Name) { if (AV == 0) AV = new AvailableValsTy(); else @@ -378,8 +379,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // First step: bucket up uses of the alloca by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. - // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element. - DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; + DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock; for (unsigned i = 0, e = Insts.size(); i != e; ++i) { Instruction *User = Insts[i]; @@ -395,7 +395,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { for (unsigned i = 0, e = Insts.size(); i != e; ++i) { Instruction *User = Insts[i]; BasicBlock *BB = User->getParent(); - std::vector<Instruction*> &BlockUses = UsesByBlock[BB]; + TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB]; // If this block has already been processed, ignore this repeat use. if (BlockUses.empty()) continue; diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 9d9c324..b8c3ab4 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -63,6 +63,7 @@ class SimplifyCFGOpt { bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI, IRBuilder<> &Builder); + bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder); bool SimplifyUnreachable(UnreachableInst *UI); @@ -322,7 +323,7 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - const IntegerType *PtrTy = TD->getIntPtrType(V->getContext()); + IntegerType *PtrTy = TD->getIntPtrType(V->getContext()); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa<ConstantPointerNull>(V)) @@ -2138,6 +2139,52 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, return true; } +bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { + // If this is a trivial landing pad that just continues unwinding the caught + // exception then zap the landing pad, turning its invokes into calls. + BasicBlock *BB = RI->getParent(); + LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI()); + if (RI->getValue() != LPInst) + // Not a landing pad, or the resume is not unwinding the exception that + // caused control to branch here. + return false; + + // Check that there are no other instructions except for debug intrinsics. + BasicBlock::iterator I = LPInst, E = RI; + while (++I != E) + if (!isa<DbgInfoIntrinsic>(I)) + return false; + + // Turn all invokes that unwind here into calls and delete the basic block. + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { + InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator()); + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + // Insert a call instruction before the invoke. + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); + Call->takeName(II); + Call->setCallingConv(II->getCallingConv()); + Call->setAttributes(II->getAttributes()); + Call->setDebugLoc(II->getDebugLoc()); + + // Anything that used the value produced by the invoke instruction now uses + // the value produced by the call instruction. Note that we do this even + // for void functions and calls with no uses so that the callgraph edge is + // updated. + II->replaceAllUsesWith(Call); + BB->removePredecessor(II->getParent()); + + // Insert a branch to the normal destination right before the invoke. + BranchInst::Create(II->getNormalDest(), II); + + // Finally, delete the invoke instruction! + II->eraseFromParent(); + } + + // The landingpad is now unreachable. Zap it. + BB->eraseFromParent(); + return true; +} + bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; @@ -2244,18 +2291,34 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { while (UI != BB->begin()) { BasicBlock::iterator BBI = UI; --BBI; - // Do not delete instructions that can have side effects, like calls - // (which may never return) and volatile loads and stores. + // Do not delete instructions that can have side effects which might cause + // the unreachable to not be reachable; specifically, calls and volatile + // operations may have this effect. if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break; - - if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) - if (SI->isVolatile()) - break; - - if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) - if (LI->isVolatile()) + + if (BBI->mayHaveSideEffects()) { + if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (SI->isVolatile()) + break; + } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (LI->isVolatile()) + break; + } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) { + if (RMWI->isVolatile()) + break; + } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) { + if (CXI->isVolatile()) + break; + } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) && + !isa<LandingPadInst>(BBI)) { break; - + } + // Note that deleting LandingPad's here is in fact okay, although it + // involves a bit of subtle reasoning. If this inst is a LandingPad, + // all the predecessors of this block will be the unwind edges of Invokes, + // and we can therefore guarantee this block will be erased. + } + // Delete this instruction (any uses are guaranteed to be dead) if (!BBI->use_empty()) BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); @@ -2707,6 +2770,71 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return false; } +/// Check if passing a value to an instruction will cause undefined behavior. +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { + Constant *C = dyn_cast<Constant>(V); + if (!C) + return false; + + if (!I->hasOneUse()) // Only look at single-use instructions, for compile time + return false; + + if (C->isNullValue()) { + Instruction *Use = I->use_back(); + + // Now make sure that there are no instructions in between that can alter + // control flow (eg. calls) + for (BasicBlock::iterator i = ++BasicBlock::iterator(I); &*i != Use; ++i) + if (i == I->getParent()->end() || i->mayHaveSideEffects()) + return false; + + // Look through GEPs. A load from a GEP derived from NULL is still undefined + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use)) + if (GEP->getPointerOperand() == I) + return passingValueIsAlwaysUndefined(V, GEP); + + // Look through bitcasts. + if (BitCastInst *BC = dyn_cast<BitCastInst>(Use)) + return passingValueIsAlwaysUndefined(V, BC); + + // Load from null is undefined. + if (LoadInst *LI = dyn_cast<LoadInst>(Use)) + return LI->getPointerAddressSpace() == 0; + + // Store to null is undefined. + if (StoreInst *SI = dyn_cast<StoreInst>(Use)) + return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; + } + return false; +} + +/// If BB has an incoming value that will always trigger undefined behavior +/// (eg. null pointer derefence), remove the branch leading here. +static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { + for (BasicBlock::iterator i = BB->begin(); + PHINode *PHI = dyn_cast<PHINode>(i); ++i) + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) { + TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator(); + IRBuilder<> Builder(T); + if (BranchInst *BI = dyn_cast<BranchInst>(T)) { + BB->removePredecessor(PHI->getIncomingBlock(i)); + // Turn uncoditional branches into unreachables and remove the dead + // destination from conditional branches. + if (BI->isUnconditional()) + Builder.CreateUnreachable(); + else + Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) : + BI->getSuccessor(0)); + BI->eraseFromParent(); + return true; + } + // TODO: SwitchInst. + } + + return false; +} + bool SimplifyCFGOpt::run(BasicBlock *BB) { bool Changed = false; @@ -2730,6 +2858,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // Check for and eliminate duplicate PHI nodes in this block. Changed |= EliminateDuplicatePHINodes(BB); + // Check for and remove branches that will always cause undefined behavior. + Changed |= removeUndefIntroducingPredecessor(BB); + // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. @@ -2752,6 +2883,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { } else { if (SimplifyCondBranch(BI, Builder)) return true; } + } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) { + if (SimplifyResume(RI, Builder)) return true; } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { if (SimplifyReturn(RI, Builder)) return true; } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp new file mode 100644 index 0000000..76289c0 --- /dev/null +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -0,0 +1,432 @@ +//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements induction variable simplification. It does +// not define any actual pass or policy, but provides a single function to +// simplify a loop's induction variables based on ScalarEvolution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "indvars" + +#include "llvm/Instructions.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); +STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); +STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); + +namespace { + /// SimplifyIndvar - This is a utility for simplifying induction variables + /// based on ScalarEvolution. It is the primary instrument of the + /// IndvarSimplify pass, but it may also be directly invoked to cleanup after + /// other loop passes that preserve SCEV. + class SimplifyIndvar { + Loop *L; + LoopInfo *LI; + DominatorTree *DT; + ScalarEvolution *SE; + IVUsers *IU; // NULL for DisableIVRewrite + const TargetData *TD; // May be NULL + + SmallVectorImpl<WeakVH> &DeadInsts; + + bool Changed; + + public: + SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) : + L(Loop), + LI(LPM->getAnalysisIfAvailable<LoopInfo>()), + SE(SE), + IU(IVU), + TD(LPM->getAnalysisIfAvailable<TargetData>()), + DeadInsts(Dead), + Changed(false) { + assert(LI && "IV simplification requires LoopInfo"); + } + + bool hasChanged() const { return Changed; } + + /// Iteratively perform simplification on a worklist of users of the + /// specified induction variable. This is the top-level driver that applies + /// all simplicitions to users of an IV. + void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL); + + Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); + + bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); + void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); + void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, + bool IsSigned); + }; +} + +/// foldIVUser - Fold an IV operand into its use. This removes increments of an +/// aligned IV when used by a instruction that ignores the low bits. +/// +/// IVOperand is guaranteed SCEVable, but UseInst may not be. +/// +/// Return the operand of IVOperand for this induction variable if IVOperand can +/// be folded (in case more folding opportunities have been exposed). +/// Otherwise return null. +Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { + Value *IVSrc = 0; + unsigned OperIdx = 0; + const SCEV *FoldedExpr = 0; + switch (UseInst->getOpcode()) { + default: + return 0; + case Instruction::UDiv: + case Instruction::LShr: + // We're only interested in the case where we know something about + // the numerator and have a constant denominator. + if (IVOperand != UseInst->getOperand(OperIdx) || + !isa<ConstantInt>(UseInst->getOperand(1))) + return 0; + + // Attempt to fold a binary operator with constant operand. + // e.g. ((I + 1) >> 2) => I >> 2 + if (IVOperand->getNumOperands() != 2 || + !isa<ConstantInt>(IVOperand->getOperand(1))) + return 0; + + IVSrc = IVOperand->getOperand(0); + // IVSrc must be the (SCEVable) IV, since the other operand is const. + assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand"); + + ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1)); + if (UseInst->getOpcode() == Instruction::LShr) { + // Get a constant for the divisor. See createSCEV. + uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth(); + if (D->getValue().uge(BitWidth)) + return 0; + + D = ConstantInt::get(UseInst->getContext(), + APInt(BitWidth, 1).shl(D->getZExtValue())); + } + FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D)); + } + // We have something that might fold it's operand. Compare SCEVs. + if (!SE->isSCEVable(UseInst->getType())) + return 0; + + // Bypass the operand if SCEV can prove it has no effect. + if (SE->getSCEV(UseInst) != FoldedExpr) + return 0; + + DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand + << " -> " << *UseInst << '\n'); + + UseInst->setOperand(OperIdx, IVSrc); + assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); + + ++NumElimOperand; + Changed = true; + if (IVOperand->use_empty()) + DeadInsts.push_back(IVOperand); + return IVSrc; +} + +/// eliminateIVComparison - SimplifyIVUsers helper for eliminating useless +/// comparisons against an induction variable. +void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { + unsigned IVOperIdx = 0; + ICmpInst::Predicate Pred = ICmp->getPredicate(); + if (IVOperand != ICmp->getOperand(0)) { + // Swapped + assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); + IVOperIdx = 1; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); + const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // If the condition is always true or always false, replace it with + // a constant value. + if (SE->isKnownPredicate(Pred, S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); + else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); + else + return; + + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + ++NumElimCmp; + Changed = true; + DeadInsts.push_back(ICmp); +} + +/// eliminateIVRemainder - SimplifyIVUsers helper for eliminating useless +/// remainder operations operating on an induction variable. +void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, + Value *IVOperand, + bool IsSigned) { + // We're only interested in the case where we know something about + // the numerator. + if (IVOperand != Rem->getOperand(0)) + return; + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(Rem->getOperand(0)); + const SCEV *X = SE->getSCEV(Rem->getOperand(1)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // i % n --> i if i is in [0,n). + if ((!IsSigned || SE->isKnownNonNegative(S)) && + SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + S, X)) + Rem->replaceAllUsesWith(Rem->getOperand(0)); + else { + // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). + const SCEV *LessOne = + SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); + if (IsSigned && !SE->isKnownNonNegative(LessOne)) + return; + + if (!SE->isKnownPredicate(IsSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + LessOne, X)) + return; + + ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, + Rem->getOperand(0), Rem->getOperand(1)); + SelectInst *Sel = + SelectInst::Create(ICmp, + ConstantInt::get(Rem->getType(), 0), + Rem->getOperand(0), "tmp", Rem); + Rem->replaceAllUsesWith(Sel); + } + + // Inform IVUsers about the new users. + if (IU) { + if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) + IU->AddUsersIfInteresting(I); + } + DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; + Changed = true; + DeadInsts.push_back(Rem); +} + +/// eliminateIVUser - Eliminate an operation that consumes a simple IV and has +/// no observable side-effect given the range of IV values. +/// IVOperand is guaranteed SCEVable, but UseInst may not be. +bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, + Instruction *IVOperand) { + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + eliminateIVComparison(ICmp, IVOperand); + return true; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + eliminateIVRemainder(Rem, IVOperand, IsSigned); + return true; + } + } + + // Eliminate any operation that SCEV can prove is an identity function. + if (!SE->isSCEVable(UseInst->getType()) || + (UseInst->getType() != IVOperand->getType()) || + (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) + return false; + + DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); + + UseInst->replaceAllUsesWith(IVOperand); + ++NumElimIdentity; + Changed = true; + DeadInsts.push_back(UseInst); + return true; +} + +/// pushIVUsers - Add all uses of Def to the current IV's worklist. +/// +static void pushIVUsers( + Instruction *Def, + SmallPtrSet<Instruction*,16> &Simplified, + SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { + + for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + // Avoid infinite or exponential worklist processing. + // Also ensure unique worklist users. + // If Def is a LoopPhi, it may not be in the Simplified set, so check for + // self edges first. + if (User != Def && Simplified.insert(User)) + SimpleIVUsers.push_back(std::make_pair(User, Def)); + } +} + +/// isSimpleIVUser - Return true if this instruction generates a simple SCEV +/// expression in terms of that IV. +/// +/// This is similar to IVUsers' isInteresting() but processes each instruction +/// non-recursively when the operand is already known to be a simpleIVUser. +/// +static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { + if (!SE->isSCEVable(I->getType())) + return false; + + // Get the symbolic expression for this instruction. + const SCEV *S = SE->getSCEV(I); + + // Only consider affine recurrences. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); + if (AR && AR->getLoop() == L) + return true; + + return false; +} + +/// simplifyUsers - Iteratively perform simplification on a worklist of users +/// of the specified induction variable. Each successive simplification may push +/// more users which may themselves be candidates for simplification. +/// +/// This algorithm does not require IVUsers analysis. Instead, it simplifies +/// instructions in-place during analysis. Rather than rewriting induction +/// variables bottom-up from their users, it transforms a chain of IVUsers +/// top-down, updating the IR only when it encouters a clear optimization +/// opportunitiy. +/// +/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. +/// +void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { + if (!SE->isSCEVable(CurrIV->getType())) + return; + + // Instructions processed by SimplifyIndvar for CurrIV. + SmallPtrSet<Instruction*,16> Simplified; + + // Use-def pairs if IV users waiting to be processed for CurrIV. + SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; + + // Push users of the current LoopPhi. In rare cases, pushIVUsers may be + // called multiple times for the same LoopPhi. This is the proper thing to + // do for loop header phis that use each other. + pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + + while (!SimpleIVUsers.empty()) { + std::pair<Instruction*, Instruction*> UseOper = + SimpleIVUsers.pop_back_val(); + // Bypass back edges to avoid extra work. + if (UseOper.first == CurrIV) continue; + + Instruction *IVOperand = UseOper.second; + for (unsigned N = 0; IVOperand; ++N) { + assert(N <= Simplified.size() && "runaway iteration"); + + Value *NewOper = foldIVUser(UseOper.first, IVOperand); + if (!NewOper) + break; // done folding + IVOperand = dyn_cast<Instruction>(NewOper); + } + if (!IVOperand) + continue; + + if (eliminateIVUser(UseOper.first, IVOperand)) { + pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + continue; + } + CastInst *Cast = dyn_cast<CastInst>(UseOper.first); + if (V && Cast) { + V->visitCast(Cast); + continue; + } + if (isSimpleIVUser(UseOper.first, L, SE)) { + pushIVUsers(UseOper.first, Simplified, SimpleIVUsers); + } + } +} + +namespace llvm { + +/// simplifyUsersOfIV - Simplify instructions that use this induction variable +/// by using ScalarEvolution to analyze the IV's recurrence. +bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead, IVVisitor *V) +{ + LoopInfo *LI = &LPM->getAnalysis<LoopInfo>(); + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead); + SIV.simplifyUsers(CurrIV, V); + return SIV.hasChanged(); +} + +/// simplifyLoopIVs - Simplify users of induction variables within this +/// loop. This does not actually change or add IVs. +bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead) { + bool Changed = false; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead); + } + return Changed; +} + +/// simplifyIVUsers - Perform simplification on instructions recorded by the +/// IVUsers pass. +/// +/// This is the old approach to IV simplification to be replaced by +/// SimplifyLoopIVs. +bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM, + SmallVectorImpl<WeakVH> &Dead) { + SimplifyIndvar SIV(IU->getLoop(), SE, LPM, Dead); + + // Each round of simplification involves a round of eliminating operations + // followed by a round of widening IVs. A single IVUsers worklist is used + // across all rounds. The inner loop advances the user. If widening exposes + // more uses, then another pass through the outer loop is triggered. + for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) { + Instruction *UseInst = I->getUser(); + Value *IVOperand = I->getOperandValToReplace(); + + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + SIV.eliminateIVComparison(ICmp, IVOperand); + continue; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + SIV.eliminateIVRemainder(Rem, IVOperand, IsSigned); + continue; + } + } + } + return SIV.hasChanged(); +} + +} // namespace llvm diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 973b105..fc2538d 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -183,10 +183,9 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, } } - // Remap attached metadata. Don't bother remapping DebugLoc, it can never - // have mappings to do. + // Remap attached metadata. SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; - I->getAllMetadataOtherThanDebugLoc(MDs); + I->getAllMetadata(MDs); for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { MDNode *Old = MI->second; |